Fix up run-time partition pruning's use of relcache's partition data.

The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling.  Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.

While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.

While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test.  There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them.  However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch.  A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.

David Rowley and Tom Lane

Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2018-06-13 12:03:19 -04:00
parent e146e4d02d
commit e23bae82cf
6 changed files with 132 additions and 73 deletions

View File

@ -1357,11 +1357,14 @@ adjust_partition_tlist(List *tlist, TupleConversionMap *map)
*
* Functions:
*
* ExecSetupPartitionPruneState:
* ExecCreatePartitionPruneState:
* Creates the PartitionPruneState required by each of the two pruning
* functions. Details stored include how to map the partition index
* returned by the partition pruning code into subplan indexes.
*
* ExecDestroyPartitionPruneState:
* Deletes a PartitionPruneState. Must be called during executor shutdown.
*
* ExecFindInitialMatchingSubPlans:
* Returns indexes of matching subplans. Partition pruning is attempted
* without any evaluation of expressions containing PARAM_EXEC Params.
@ -1382,8 +1385,8 @@ adjust_partition_tlist(List *tlist, TupleConversionMap *map)
*/
/*
* ExecSetupPartitionPruneState
* Set up the data structure required for calling
* ExecCreatePartitionPruneState
* Build the data structure required for calling
* ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans.
*
* 'planstate' is the parent plan node's execution state.
@ -1395,7 +1398,7 @@ adjust_partition_tlist(List *tlist, TupleConversionMap *map)
* in each PartitionPruneInfo.
*/
PartitionPruneState *
ExecSetupPartitionPruneState(PlanState *planstate, List *partitionpruneinfo)
ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo)
{
PartitionPruneState *prunestate;
PartitionPruningData *prunedata;
@ -1435,11 +1438,10 @@ ExecSetupPartitionPruneState(PlanState *planstate, List *partitionpruneinfo)
PartitionPruningData *pprune = &prunedata[i];
PartitionPruneContext *context = &pprune->context;
PartitionDesc partdesc;
Relation rel;
PartitionKey partkey;
ListCell *lc2;
int partnatts;
int n_steps;
ListCell *lc2;
/*
* We must copy the subplan_map rather than pointing directly to the
@ -1456,26 +1458,33 @@ ExecSetupPartitionPruneState(PlanState *planstate, List *partitionpruneinfo)
pprune->present_parts = bms_copy(pinfo->present_parts);
/*
* Grab some info from the table's relcache; lock was already obtained
* by ExecLockNonLeafAppendTables.
* We need to hold a pin on the partitioned table's relcache entry so
* that we can rely on its copies of the table's partition key and
* partition descriptor. We need not get a lock though; one should
* have been acquired already by InitPlan or
* ExecLockNonLeafAppendTables.
*/
rel = relation_open(pinfo->reloid, NoLock);
context->partrel = relation_open(pinfo->reloid, NoLock);
partkey = RelationGetPartitionKey(rel);
partdesc = RelationGetPartitionDesc(rel);
partkey = RelationGetPartitionKey(context->partrel);
partdesc = RelationGetPartitionDesc(context->partrel);
n_steps = list_length(pinfo->pruning_steps);
context->strategy = partkey->strategy;
context->partnatts = partnatts = partkey->partnatts;
context->partopfamily = partkey->partopfamily;
context->partopcintype = partkey->partopcintype;
context->nparts = pinfo->nparts;
context->boundinfo = partdesc->boundinfo;
context->partcollation = partkey->partcollation;
context->partsupfunc = partkey->partsupfunc;
context->nparts = pinfo->nparts;
context->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey);
/* We'll look up type-specific support functions as needed */
context->stepcmpfuncs = (FmgrInfo *)
palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
context->ppccontext = CurrentMemoryContext;
context->planstate = planstate;
/* Initialize expression state for each expression we need */
n_steps = list_length(pinfo->pruning_steps);
context->exprstates = (ExprState **)
palloc0(sizeof(ExprState *) * n_steps * partnatts);
foreach(lc2, pinfo->pruning_steps)
@ -1527,14 +1536,32 @@ ExecSetupPartitionPruneState(PlanState *planstate, List *partitionpruneinfo)
prunestate->execparamids = bms_add_members(prunestate->execparamids,
pinfo->execparamids);
relation_close(rel, NoLock);
i++;
}
return prunestate;
}
/*
* ExecDestroyPartitionPruneState
* Release resources at plan shutdown.
*
* We don't bother to free any memory here, since the whole executor context
* will be going away shortly. We do need to release our relcache pins.
*/
void
ExecDestroyPartitionPruneState(PartitionPruneState *prunestate)
{
int i;
for (i = 0; i < prunestate->num_partprunedata; i++)
{
PartitionPruningData *pprune = &prunestate->partprunedata[i];
relation_close(pprune->context.partrel, NoLock);
}
}
/*
* ExecFindInitialMatchingSubPlans
* Identify the set of subplans that cannot be eliminated by initial

View File

@ -136,8 +136,10 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
/* We may need an expression context to evaluate partition exprs */
ExecAssignExprContext(estate, &appendstate->ps);
prunestate = ExecSetupPartitionPruneState(&appendstate->ps,
node->part_prune_infos);
/* Create the working data structure for pruning. */
prunestate = ExecCreatePartitionPruneState(&appendstate->ps,
node->part_prune_infos);
appendstate->as_prune_state = prunestate;
/* Perform an initial partition prune, if required. */
if (prunestate->do_initial_prune)
@ -178,8 +180,6 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
*/
if (!prunestate->do_exec_prune)
appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1);
appendstate->as_prune_state = prunestate;
}
else
{
@ -330,6 +330,12 @@ ExecEndAppend(AppendState *node)
*/
for (i = 0; i < nplans; i++)
ExecEndNode(appendplans[i]);
/*
* release any resources associated with run-time pruning
*/
if (node->as_prune_state)
ExecDestroyPartitionPruneState(node->as_prune_state);
}
void

View File

@ -436,16 +436,20 @@ prune_append_rel_partitions(RelOptInfo *rel)
if (contradictory)
return NULL;
/* Set up PartitionPruneContext */
context.strategy = rel->part_scheme->strategy;
context.partnatts = rel->part_scheme->partnatts;
context.partopfamily = rel->part_scheme->partopfamily;
context.partopcintype = rel->part_scheme->partopcintype;
context.partcollation = rel->part_scheme->partcollation;
context.partsupfunc = rel->part_scheme->partsupfunc;
context.nparts = rel->nparts;
context.boundinfo = rel->boundinfo;
context.partcollation = rel->part_scheme->partcollation;
context.partsupfunc = rel->part_scheme->partsupfunc;
context.stepcmpfuncs = (FmgrInfo *) palloc0(sizeof(FmgrInfo) *
context.partnatts *
list_length(pruning_steps));
context.ppccontext = CurrentMemoryContext;
/* These are not valid when being called from the planner */
context.partrel = NULL;
context.planstate = NULL;
context.exprstates = NULL;
context.exprhasexecparam = NULL;
@ -2809,7 +2813,8 @@ perform_pruning_base_step(PartitionPruneContext *context,
int keyno,
nvalues;
Datum values[PARTITION_MAX_KEYS];
FmgrInfo partsupfunc[PARTITION_MAX_KEYS];
FmgrInfo *partsupfunc;
int stateidx;
/*
* There better be the same number of expressions and compare functions.
@ -2844,7 +2849,6 @@ perform_pruning_base_step(PartitionPruneContext *context,
if (lc1 != NULL)
{
Expr *expr;
int stateidx;
Datum datum;
bool isnull;
@ -2873,19 +2877,25 @@ perform_pruning_base_step(PartitionPruneContext *context,
return result;
}
/*
* If we're going to need a different comparison function than
* the one cached in the PartitionKey, we'll need to look up
* the FmgrInfo.
*/
/* Set up the stepcmpfuncs entry, unless we already did */
cmpfn = lfirst_oid(lc2);
Assert(OidIsValid(cmpfn));
if (cmpfn != context->partsupfunc[keyno].fn_oid)
fmgr_info(cmpfn, &partsupfunc[keyno]);
else
fmgr_info_copy(&partsupfunc[keyno],
&context->partsupfunc[keyno],
CurrentMemoryContext);
if (cmpfn != context->stepcmpfuncs[stateidx].fn_oid)
{
/*
* If the needed support function is the same one cached
* in the relation's partition key, copy the cached
* FmgrInfo. Otherwise (i.e., when we have a cross-type
* comparison), an actual lookup is required.
*/
if (cmpfn == context->partsupfunc[keyno].fn_oid)
fmgr_info_copy(&context->stepcmpfuncs[stateidx],
&context->partsupfunc[keyno],
context->ppccontext);
else
fmgr_info_cxt(cmpfn, &context->stepcmpfuncs[stateidx],
context->ppccontext);
}
values[keyno] = datum;
nvalues++;
@ -2896,6 +2906,13 @@ perform_pruning_base_step(PartitionPruneContext *context,
}
}
/*
* Point partsupfunc to the entry for the 0th key of this step; the
* additional support functions, if any, follow consecutively.
*/
stateidx = PruneCxtStateIdx(context->partnatts, opstep->step.step_id, 0);
partsupfunc = &context->stepcmpfuncs[stateidx];
switch (context->strategy)
{
case PARTITION_STRATEGY_HASH:

View File

@ -2471,6 +2471,7 @@ RelationClearRelation(Relation relation, bool rebuild)
keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
/* partkey is immutable once set up, so we can always keep it */
keep_partkey = (relation->rd_partkey != NULL);
keep_partdesc = equalPartitionDescs(relation->rd_partkey,
relation->rd_partdesc,
@ -2515,7 +2516,7 @@ RelationClearRelation(Relation relation, bool rebuild)
SWAPFIELD(Form_pg_class, rd_rel);
/* ... but actually, we don't have to update newrel->rd_rel */
memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
/* preserve old tupledesc and rules if no logical change */
/* preserve old tupledesc, rules, policies if no logical change */
if (keep_tupdesc)
SWAPFIELD(TupleDesc, rd_att);
if (keep_rules)
@ -2529,13 +2530,12 @@ RelationClearRelation(Relation relation, bool rebuild)
SWAPFIELD(Oid, rd_toastoid);
/* pgstat_info must be preserved */
SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
/* partition key must be preserved, if we have one */
/* preserve old partitioning info if no logical change */
if (keep_partkey)
{
SWAPFIELD(PartitionKey, rd_partkey);
SWAPFIELD(MemoryContext, rd_partkeycxt);
}
/* preserve old partdesc if no logical change */
if (keep_partdesc)
{
SWAPFIELD(PartitionDesc, rd_partdesc);

View File

@ -208,8 +208,9 @@ extern HeapTuple ConvertPartitionTupleSlot(TupleConversionMap *map,
TupleTableSlot **p_my_slot);
extern void ExecCleanupTupleRouting(ModifyTableState *mtstate,
PartitionTupleRouting *proute);
extern PartitionPruneState *ExecSetupPartitionPruneState(PlanState *planstate,
List *partitionpruneinfo);
extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate,
List *partitionpruneinfo);
extern void ExecDestroyPartitionPruneState(PartitionPruneState *prunestate);
extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate);
extern Bitmapset *ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate,
int nsubplans);

View File

@ -20,49 +20,57 @@
/*
* PartitionPruneContext
* Stores information needed at runtime for pruning computations
* related to a single partitioned table.
*
* Information about a partitioned table needed to perform partition pruning.
* partrel Relcache pointer for the partitioned table,
* if we have it open (else NULL).
* strategy Partition strategy, e.g. LIST, RANGE, HASH.
* partnatts Number of columns in the partition key.
* nparts Number of partitions in this partitioned table.
* boundinfo Partition boundary info for the partitioned table.
* partcollation Array of partnatts elements, storing the collations of the
* partition key columns.
* partsupfunc Array of FmgrInfos for the comparison or hashing functions
* associated with the partition keys (partnatts elements).
* (This points into the partrel's partition key, typically.)
* stepcmpfuncs Array of FmgrInfos for the comparison or hashing function
* for each pruning step and partition key.
* ppccontext Memory context holding this PartitionPruneContext's
* subsidiary data, such as the FmgrInfos.
* planstate Points to the parent plan node's PlanState when called
* during execution; NULL when called from the planner.
* exprstates Array of ExprStates, indexed as per PruneCtxStateIdx; one
* for each partition key in each pruning step. Allocated if
* planstate is non-NULL, otherwise NULL.
* exprhasexecparam Array of bools, each true if corresponding 'exprstate'
* expression contains any PARAM_EXEC Params. (Can be NULL
* if planstate is NULL.)
* evalexecparams True if it's safe to evaluate PARAM_EXEC Params.
*/
typedef struct PartitionPruneContext
{
/* Partition key information */
Relation partrel;
char strategy;
int partnatts;
Oid *partopfamily;
Oid *partopcintype;
int nparts;
PartitionBoundInfo boundinfo;
Oid *partcollation;
FmgrInfo *partsupfunc;
/* Number of partitions */
int nparts;
/* Partition boundary info */
PartitionBoundInfo boundinfo;
/*
* This will be set when the context is used from the executor, to allow
* Params to be evaluated.
*/
FmgrInfo *stepcmpfuncs;
MemoryContext ppccontext;
PlanState *planstate;
/*
* Array of ExprStates, indexed as per PruneCtxStateIdx; one for each
* partkey in each pruning step. Allocated if planstate is non-NULL,
* otherwise NULL.
*/
ExprState **exprstates;
/*
* Similar array of flags, each true if corresponding 'exprstate'
* expression contains any PARAM_EXEC Params. (Can be NULL if planstate
* is NULL.)
*/
bool *exprhasexecparam;
/* true if it's safe to evaluate PARAM_EXEC Params */
bool evalexecparams;
} PartitionPruneContext;
/*
* PruneCxtStateIdx() computes the correct index into the stepcmpfuncs[],
* exprstates[] and exprhasexecparam[] arrays for step step_id and
* partition key column keyno. (Note: there is code that assumes the
* entries for a given step are sequential, so this is not chosen freely.)
*/
#define PruneCxtStateIdx(partnatts, step_id, keyno) \
((partnatts) * (step_id) + (keyno))