diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 11b7e67e4d..d13be4145f 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -48,8 +48,8 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, bool *isnull, int maxfieldlen); static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map); -static void find_matching_subplans_recurse(PartitionPruneState *prunestate, - PartitionPruningData *pprune, +static void find_matching_subplans_recurse(PartitionPruningData *prunedata, + PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans); @@ -1417,34 +1417,42 @@ adjust_partition_tlist(List *tlist, TupleConversionMap *map) * * 'planstate' is the parent plan node's execution state. * - * 'partitionpruneinfo' is a List of PartitionPruneInfos as generated by + * 'partitionpruneinfo' is a PartitionPruneInfo as generated by * make_partition_pruneinfo. Here we build a PartitionPruneState containing a - * PartitionPruningData for each item in that List. This data can be re-used - * each time we re-evaluate which partitions match the pruning steps provided - * in each PartitionPruneInfo. + * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of + * partitionpruneinfo->prune_infos), each of which contains a + * PartitionedRelPruningData for each PartitionedRelPruneInfo appearing in + * that sublist. This two-level system is needed to keep from confusing the + * different hierarchies when a UNION ALL contains multiple partitioned tables + * as children. The data stored in each PartitionedRelPruningData can be + * re-used each time we re-evaluate which partitions match the pruning steps + * provided in each PartitionedRelPruneInfo. */ PartitionPruneState * -ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo) +ExecCreatePartitionPruneState(PlanState *planstate, + PartitionPruneInfo *partitionpruneinfo) { PartitionPruneState *prunestate; - PartitionPruningData *prunedata; + int n_part_hierarchies; ListCell *lc; int i; - Assert(partitionpruneinfo != NIL); + n_part_hierarchies = list_length(partitionpruneinfo->prune_infos); + Assert(n_part_hierarchies > 0); /* * Allocate the data structure */ - prunestate = (PartitionPruneState *) palloc(sizeof(PartitionPruneState)); - prunedata = (PartitionPruningData *) - palloc(sizeof(PartitionPruningData) * list_length(partitionpruneinfo)); + prunestate = (PartitionPruneState *) + palloc(offsetof(PartitionPruneState, partprunedata) + + sizeof(PartitionPruningData *) * n_part_hierarchies); - prunestate->partprunedata = prunedata; - prunestate->num_partprunedata = list_length(partitionpruneinfo); + prunestate->execparamids = NULL; + /* other_subplans can change at runtime, so we need our own copy */ + prunestate->other_subplans = bms_copy(partitionpruneinfo->other_subplans); prunestate->do_initial_prune = false; /* may be set below */ prunestate->do_exec_prune = false; /* may be set below */ - prunestate->execparamids = NULL; + prunestate->num_partprunedata = n_part_hierarchies; /* * Create a short-term memory context which we'll use when making calls to @@ -1458,110 +1466,128 @@ ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo) ALLOCSET_DEFAULT_SIZES); i = 0; - foreach(lc, partitionpruneinfo) + foreach(lc, partitionpruneinfo->prune_infos) { - PartitionPruneInfo *pinfo = castNode(PartitionPruneInfo, lfirst(lc)); - PartitionPruningData *pprune = &prunedata[i]; - PartitionPruneContext *context = &pprune->context; - PartitionDesc partdesc; - PartitionKey partkey; - int partnatts; - int n_steps; + List *partrelpruneinfos = lfirst_node(List, lc); + int npartrelpruneinfos = list_length(partrelpruneinfos); + PartitionPruningData *prunedata; ListCell *lc2; + int j; - /* - * We must copy the subplan_map rather than pointing directly to the - * plan's version, as we may end up making modifications to it later. - */ - pprune->subplan_map = palloc(sizeof(int) * pinfo->nparts); - memcpy(pprune->subplan_map, pinfo->subplan_map, - sizeof(int) * pinfo->nparts); + prunedata = (PartitionPruningData *) + palloc(offsetof(PartitionPruningData, partrelprunedata) + + npartrelpruneinfos * sizeof(PartitionedRelPruningData)); + prunestate->partprunedata[i] = prunedata; + prunedata->num_partrelprunedata = npartrelpruneinfos; - /* We can use the subpart_map verbatim, since we never modify it */ - pprune->subpart_map = pinfo->subpart_map; - - /* present_parts is also subject to later modification */ - pprune->present_parts = bms_copy(pinfo->present_parts); - - /* - * We need to hold a pin on the partitioned table's relcache entry so - * that we can rely on its copies of the table's partition key and - * partition descriptor. We need not get a lock though; one should - * have been acquired already by InitPlan or - * ExecLockNonLeafAppendTables. - */ - context->partrel = relation_open(pinfo->reloid, NoLock); - - partkey = RelationGetPartitionKey(context->partrel); - partdesc = RelationGetPartitionDesc(context->partrel); - n_steps = list_length(pinfo->pruning_steps); - - context->strategy = partkey->strategy; - context->partnatts = partnatts = partkey->partnatts; - context->nparts = pinfo->nparts; - context->boundinfo = partdesc->boundinfo; - context->partcollation = partkey->partcollation; - context->partsupfunc = partkey->partsupfunc; - - /* We'll look up type-specific support functions as needed */ - context->stepcmpfuncs = (FmgrInfo *) - palloc0(sizeof(FmgrInfo) * n_steps * partnatts); - - context->ppccontext = CurrentMemoryContext; - context->planstate = planstate; - - /* Initialize expression state for each expression we need */ - context->exprstates = (ExprState **) - palloc0(sizeof(ExprState *) * n_steps * partnatts); - foreach(lc2, pinfo->pruning_steps) + j = 0; + foreach(lc2, partrelpruneinfos) { - PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc2); + PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2); + PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j]; + PartitionPruneContext *context = &pprune->context; + PartitionDesc partdesc; + PartitionKey partkey; + int partnatts; + int n_steps; ListCell *lc3; - int keyno; - /* not needed for other step kinds */ - if (!IsA(step, PartitionPruneStepOp)) - continue; + /* + * We must copy the subplan_map rather than pointing directly to + * the plan's version, as we may end up making modifications to it + * later. + */ + pprune->subplan_map = palloc(sizeof(int) * pinfo->nparts); + memcpy(pprune->subplan_map, pinfo->subplan_map, + sizeof(int) * pinfo->nparts); - Assert(list_length(step->exprs) <= partnatts); + /* We can use the subpart_map verbatim, since we never modify it */ + pprune->subpart_map = pinfo->subpart_map; - keyno = 0; - foreach(lc3, step->exprs) + /* present_parts is also subject to later modification */ + pprune->present_parts = bms_copy(pinfo->present_parts); + + /* + * We need to hold a pin on the partitioned table's relcache entry + * so that we can rely on its copies of the table's partition key + * and partition descriptor. We need not get a lock though; one + * should have been acquired already by InitPlan or + * ExecLockNonLeafAppendTables. + */ + context->partrel = relation_open(pinfo->reloid, NoLock); + + partkey = RelationGetPartitionKey(context->partrel); + partdesc = RelationGetPartitionDesc(context->partrel); + n_steps = list_length(pinfo->pruning_steps); + + context->strategy = partkey->strategy; + context->partnatts = partnatts = partkey->partnatts; + context->nparts = pinfo->nparts; + context->boundinfo = partdesc->boundinfo; + context->partcollation = partkey->partcollation; + context->partsupfunc = partkey->partsupfunc; + + /* We'll look up type-specific support functions as needed */ + context->stepcmpfuncs = (FmgrInfo *) + palloc0(sizeof(FmgrInfo) * n_steps * partnatts); + + context->ppccontext = CurrentMemoryContext; + context->planstate = planstate; + + /* Initialize expression state for each expression we need */ + context->exprstates = (ExprState **) + palloc0(sizeof(ExprState *) * n_steps * partnatts); + foreach(lc3, pinfo->pruning_steps) { - Expr *expr = (Expr *) lfirst(lc3); + PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc3); + ListCell *lc4; + int keyno; - /* not needed for Consts */ - if (!IsA(expr, Const)) + /* not needed for other step kinds */ + if (!IsA(step, PartitionPruneStepOp)) + continue; + + Assert(list_length(step->exprs) <= partnatts); + + keyno = 0; + foreach(lc4, step->exprs) { - int stateidx = PruneCxtStateIdx(partnatts, - step->step.step_id, - keyno); + Expr *expr = (Expr *) lfirst(lc4); - context->exprstates[stateidx] = - ExecInitExpr(expr, context->planstate); + /* not needed for Consts */ + if (!IsA(expr, Const)) + { + int stateidx = PruneCxtStateIdx(partnatts, + step->step.step_id, + keyno); + + context->exprstates[stateidx] = + ExecInitExpr(expr, context->planstate); + } + keyno++; } - keyno++; } + + /* Array is not modified at runtime, so just point to plan's copy */ + context->exprhasexecparam = pinfo->hasexecparam; + + pprune->pruning_steps = pinfo->pruning_steps; + pprune->do_initial_prune = pinfo->do_initial_prune; + pprune->do_exec_prune = pinfo->do_exec_prune; + + /* Record if pruning would be useful at any level */ + prunestate->do_initial_prune |= pinfo->do_initial_prune; + prunestate->do_exec_prune |= pinfo->do_exec_prune; + + /* + * Accumulate the IDs of all PARAM_EXEC Params affecting the + * partitioning decisions at this plan node. + */ + prunestate->execparamids = bms_add_members(prunestate->execparamids, + pinfo->execparamids); + + j++; } - - /* Array is not modified at runtime, so just point to plan's copy */ - context->exprhasexecparam = pinfo->hasexecparam; - - pprune->pruning_steps = pinfo->pruning_steps; - pprune->do_initial_prune = pinfo->do_initial_prune; - pprune->do_exec_prune = pinfo->do_exec_prune; - - /* Record if pruning would be useful at any level */ - prunestate->do_initial_prune |= pinfo->do_initial_prune; - prunestate->do_exec_prune |= pinfo->do_exec_prune; - - /* - * Accumulate the IDs of all PARAM_EXEC Params affecting the - * partitioning decisions at this plan node. - */ - prunestate->execparamids = bms_add_members(prunestate->execparamids, - pinfo->execparamids); - i++; } @@ -1578,13 +1604,17 @@ ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo) void ExecDestroyPartitionPruneState(PartitionPruneState *prunestate) { + PartitionPruningData **partprunedata = prunestate->partprunedata; int i; for (i = 0; i < prunestate->num_partprunedata; i++) { - PartitionPruningData *pprune = &prunestate->partprunedata[i]; + PartitionPruningData *prunedata = partprunedata[i]; + PartitionedRelPruningData *pprune = prunedata->partrelprunedata; + int j; - relation_close(pprune->context.partrel, NoLock); + for (j = 0; j < prunedata->num_partrelprunedata; j++) + relation_close(pprune[j].context.partrel, NoLock); } } @@ -1604,31 +1634,46 @@ ExecDestroyPartitionPruneState(PartitionPruneState *prunestate) Bitmapset * ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) { - PartitionPruningData *pprune; - MemoryContext oldcontext; Bitmapset *result = NULL; + MemoryContext oldcontext; + int i; Assert(prunestate->do_initial_prune); - pprune = prunestate->partprunedata; - /* * Switch to a temp context to avoid leaking memory in the executor's * memory context. */ oldcontext = MemoryContextSwitchTo(prunestate->prune_context); - /* Perform pruning without using PARAM_EXEC Params */ - find_matching_subplans_recurse(prunestate, pprune, true, &result); + /* + * For each hierarchy, do the pruning tests, and add deletable subplans' + * indexes to "result". + */ + for (i = 0; i < prunestate->num_partprunedata; i++) + { + PartitionPruningData *prunedata; + PartitionedRelPruningData *pprune; + + prunedata = prunestate->partprunedata[i]; + pprune = &prunedata->partrelprunedata[0]; + + /* Perform pruning without using PARAM_EXEC Params */ + find_matching_subplans_recurse(prunedata, pprune, true, &result); + + /* Expression eval may have used space in node's ps_ExprContext too */ + ResetExprContext(pprune->context.planstate->ps_ExprContext); + } MemoryContextSwitchTo(oldcontext); /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + /* Add in any subplans that partition pruning didn't account for */ + result = bms_add_members(result, prunestate->other_subplans); + MemoryContextReset(prunestate->prune_context); - /* Expression eval may have used space in node's ps_ExprContext too */ - ResetExprContext(pprune->context.planstate->ps_ExprContext); /* * If any subplans were pruned, we must re-sequence the subplan indexes so @@ -1638,14 +1683,17 @@ ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) if (bms_num_members(result) < nsubplans) { int *new_subplan_indexes; + Bitmapset *new_other_subplans; int i; int newidx; /* * First we must build a temporary array which maps old subplan - * indexes to new ones. + * indexes to new ones. While we're at it, also recompute the + * other_subplans set, since indexes in it may change. */ new_subplan_indexes = (int *) palloc(sizeof(int) * nsubplans); + new_other_subplans = NULL; newidx = 0; for (i = 0; i < nsubplans; i++) { @@ -1653,58 +1701,74 @@ ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) new_subplan_indexes[i] = newidx++; else new_subplan_indexes[i] = -1; /* Newly pruned */ + + if (bms_is_member(i, prunestate->other_subplans)) + new_other_subplans = bms_add_member(new_other_subplans, + new_subplan_indexes[i]); } + bms_free(prunestate->other_subplans); + prunestate->other_subplans = new_other_subplans; /* - * Now we can update each PartitionPruneInfo's subplan_map with new - * subplan indexes. We must also recompute its present_parts bitmap. - * We perform this loop in back-to-front order so that we determine - * present_parts for the lowest-level partitioned tables first. This - * way we can tell whether a sub-partitioned table's partitions were - * entirely pruned so we can exclude that from 'present_parts'. + * Now we can update each PartitionedRelPruneInfo's subplan_map with + * new subplan indexes. We must also recompute its present_parts + * bitmap. */ - for (i = prunestate->num_partprunedata - 1; i >= 0; i--) + for (i = 0; i < prunestate->num_partprunedata; i++) { - int nparts; + PartitionPruningData *prunedata = prunestate->partprunedata[i]; int j; - pprune = &prunestate->partprunedata[i]; - nparts = pprune->context.nparts; - /* We just rebuild present_parts from scratch */ - bms_free(pprune->present_parts); - pprune->present_parts = NULL; - - for (j = 0; j < nparts; j++) + /* + * Within each hierarchy, we perform this loop in back-to-front + * order so that we determine present_parts for the lowest-level + * partitioned tables first. This way we can tell whether a + * sub-partitioned table's partitions were entirely pruned so we + * can exclude that from 'present_parts'. + */ + for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--) { - int oldidx = pprune->subplan_map[j]; - int subidx; + PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j]; + int nparts = pprune->context.nparts; + int k; - /* - * If this partition existed as a subplan then change the old - * subplan index to the new subplan index. The new index may - * become -1 if the partition was pruned above, or it may just - * come earlier in the subplan list due to some subplans being - * removed earlier in the list. If it's a subpartition, add - * it to present_parts unless it's entirely pruned. - */ - if (oldidx >= 0) + /* We just rebuild present_parts from scratch */ + bms_free(pprune->present_parts); + pprune->present_parts = NULL; + + for (k = 0; k < nparts; k++) { - Assert(oldidx < nsubplans); - pprune->subplan_map[j] = new_subplan_indexes[oldidx]; + int oldidx = pprune->subplan_map[k]; + int subidx; - if (new_subplan_indexes[oldidx] >= 0) - pprune->present_parts = - bms_add_member(pprune->present_parts, j); - } - else if ((subidx = pprune->subpart_map[j]) >= 0) - { - PartitionPruningData *subprune; + /* + * If this partition existed as a subplan then change the + * old subplan index to the new subplan index. The new + * index may become -1 if the partition was pruned above, + * or it may just come earlier in the subplan list due to + * some subplans being removed earlier in the list. If + * it's a subpartition, add it to present_parts unless + * it's entirely pruned. + */ + if (oldidx >= 0) + { + Assert(oldidx < nsubplans); + pprune->subplan_map[k] = new_subplan_indexes[oldidx]; - subprune = &prunestate->partprunedata[subidx]; + if (new_subplan_indexes[oldidx] >= 0) + pprune->present_parts = + bms_add_member(pprune->present_parts, k); + } + else if ((subidx = pprune->subpart_map[k]) >= 0) + { + PartitionedRelPruningData *subprune; - if (!bms_is_empty(subprune->present_parts)) - pprune->present_parts = - bms_add_member(pprune->present_parts, j); + subprune = &prunedata->partrelprunedata[subidx]; + + if (!bms_is_empty(subprune->present_parts)) + pprune->present_parts = + bms_add_member(pprune->present_parts, k); + } } } } @@ -1725,11 +1789,9 @@ ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate) { - PartitionPruningData *pprune; - MemoryContext oldcontext; Bitmapset *result = NULL; - - pprune = prunestate->partprunedata; + MemoryContext oldcontext; + int i; /* * Switch to a temp context to avoid leaking memory in the executor's @@ -1737,16 +1799,33 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate) */ oldcontext = MemoryContextSwitchTo(prunestate->prune_context); - find_matching_subplans_recurse(prunestate, pprune, false, &result); + /* + * For each hierarchy, do the pruning tests, and add deletable subplans' + * indexes to "result". + */ + for (i = 0; i < prunestate->num_partprunedata; i++) + { + PartitionPruningData *prunedata; + PartitionedRelPruningData *pprune; + + prunedata = prunestate->partprunedata[i]; + pprune = &prunedata->partrelprunedata[0]; + + find_matching_subplans_recurse(prunedata, pprune, false, &result); + + /* Expression eval may have used space in node's ps_ExprContext too */ + ResetExprContext(pprune->context.planstate->ps_ExprContext); + } MemoryContextSwitchTo(oldcontext); /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + /* Add in any subplans that partition pruning didn't account for */ + result = bms_add_members(result, prunestate->other_subplans); + MemoryContextReset(prunestate->prune_context); - /* Expression eval may have used space in node's ps_ExprContext too */ - ResetExprContext(pprune->context.planstate->ps_ExprContext); return result; } @@ -1759,8 +1838,8 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate) * Adds valid (non-prunable) subplan IDs to *validsubplans */ static void -find_matching_subplans_recurse(PartitionPruneState *prunestate, - PartitionPruningData *pprune, +find_matching_subplans_recurse(PartitionPruningData *prunedata, + PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans) { @@ -1802,8 +1881,8 @@ find_matching_subplans_recurse(PartitionPruneState *prunestate, int partidx = pprune->subpart_map[i]; if (partidx >= 0) - find_matching_subplans_recurse(prunestate, - &prunestate->partprunedata[partidx], + find_matching_subplans_recurse(prunedata, + &prunedata->partrelprunedata[partidx], initial_prune, validsubplans); else { diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index 86a68d3020..f08dfcbcf0 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -129,7 +129,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags) appendstate->as_whichplan = INVALID_SUBPLAN_INDEX; /* If run-time partition pruning is enabled, then set that up now */ - if (node->part_prune_infos != NIL) + if (node->part_prune_info != NULL) { PartitionPruneState *prunestate; @@ -138,7 +138,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags) /* Create the working data structure for pruning. */ prunestate = ExecCreatePartitionPruneState(&appendstate->ps, - node->part_prune_infos); + node->part_prune_info); appendstate->as_prune_state = prunestate; /* Perform an initial partition prune, if required. */ diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index be43014cb8..9a72d3a0ac 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -90,7 +90,7 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) mergestate->ms_noopscan = false; /* If run-time partition pruning is enabled, then set that up now */ - if (node->part_prune_infos != NIL) + if (node->part_prune_info != NULL) { PartitionPruneState *prunestate; @@ -98,7 +98,7 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) ExecAssignExprContext(estate, &mergestate->ps); prunestate = ExecCreatePartitionPruneState(&mergestate->ps, - node->part_prune_infos); + node->part_prune_info); mergestate->ms_prune_state = prunestate; /* Perform an initial partition prune, if required. */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 17b650b8cb..7c8220cf65 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -245,7 +245,7 @@ _copyAppend(const Append *from) COPY_NODE_FIELD(appendplans); COPY_SCALAR_FIELD(first_partial_plan); COPY_NODE_FIELD(partitioned_rels); - COPY_NODE_FIELD(part_prune_infos); + COPY_NODE_FIELD(part_prune_info); return newnode; } @@ -273,7 +273,7 @@ _copyMergeAppend(const MergeAppend *from) COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); - COPY_NODE_FIELD(part_prune_infos); + COPY_NODE_FIELD(part_prune_info); return newnode; } @@ -1182,6 +1182,17 @@ _copyPartitionPruneInfo(const PartitionPruneInfo *from) { PartitionPruneInfo *newnode = makeNode(PartitionPruneInfo); + COPY_NODE_FIELD(prune_infos); + COPY_BITMAPSET_FIELD(other_subplans); + + return newnode; +} + +static PartitionedRelPruneInfo * +_copyPartitionedRelPruneInfo(const PartitionedRelPruneInfo *from) +{ + PartitionedRelPruneInfo *newnode = makeNode(PartitionedRelPruneInfo); + COPY_SCALAR_FIELD(reloid); COPY_NODE_FIELD(pruning_steps); COPY_BITMAPSET_FIELD(present_parts); @@ -4908,6 +4919,9 @@ copyObjectImpl(const void *from) case T_PartitionPruneInfo: retval = _copyPartitionPruneInfo(from); break; + case T_PartitionedRelPruneInfo: + retval = _copyPartitionedRelPruneInfo(from); + break; case T_PartitionPruneStepOp: retval = _copyPartitionPruneStepOp(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index a6454ce28b..6269f474d2 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -402,7 +402,7 @@ _outAppend(StringInfo str, const Append *node) WRITE_NODE_FIELD(appendplans); WRITE_INT_FIELD(first_partial_plan); WRITE_NODE_FIELD(partitioned_rels); - WRITE_NODE_FIELD(part_prune_infos); + WRITE_NODE_FIELD(part_prune_info); } static void @@ -435,7 +435,7 @@ _outMergeAppend(StringInfo str, const MergeAppend *node) for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %s", booltostr(node->nullsFirst[i])); - WRITE_NODE_FIELD(part_prune_infos); + WRITE_NODE_FIELD(part_prune_info); } static void @@ -1014,10 +1014,19 @@ _outPlanRowMark(StringInfo str, const PlanRowMark *node) static void _outPartitionPruneInfo(StringInfo str, const PartitionPruneInfo *node) +{ + WRITE_NODE_TYPE("PARTITIONPRUNEINFO"); + + WRITE_NODE_FIELD(prune_infos); + WRITE_BITMAPSET_FIELD(other_subplans); +} + +static void +_outPartitionedRelPruneInfo(StringInfo str, const PartitionedRelPruneInfo *node) { int i; - WRITE_NODE_TYPE("PARTITIONPRUNEINFO"); + WRITE_NODE_TYPE("PARTITIONEDRELPRUNEINFO"); WRITE_OID_FIELD(reloid); WRITE_NODE_FIELD(pruning_steps); @@ -3831,6 +3840,9 @@ outNode(StringInfo str, const void *obj) case T_PartitionPruneInfo: _outPartitionPruneInfo(str, obj); break; + case T_PartitionedRelPruneInfo: + _outPartitionedRelPruneInfo(str, obj); + break; case T_PartitionPruneStepOp: _outPartitionPruneStepOp(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 9a01eb6b63..3254524223 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1612,7 +1612,7 @@ _readAppend(void) READ_NODE_FIELD(appendplans); READ_INT_FIELD(first_partial_plan); READ_NODE_FIELD(partitioned_rels); - READ_NODE_FIELD(part_prune_infos); + READ_NODE_FIELD(part_prune_info); READ_DONE(); } @@ -1634,7 +1634,7 @@ _readMergeAppend(void) READ_OID_ARRAY(sortOperators, local_node->numCols); READ_OID_ARRAY(collations, local_node->numCols); READ_BOOL_ARRAY(nullsFirst, local_node->numCols); - READ_NODE_FIELD(part_prune_infos); + READ_NODE_FIELD(part_prune_info); READ_DONE(); } @@ -2329,6 +2329,17 @@ _readPartitionPruneInfo(void) { READ_LOCALS(PartitionPruneInfo); + READ_NODE_FIELD(prune_infos); + READ_BITMAPSET_FIELD(other_subplans); + + READ_DONE(); +} + +static PartitionedRelPruneInfo * +_readPartitionedRelPruneInfo(void) +{ + READ_LOCALS(PartitionedRelPruneInfo); + READ_OID_FIELD(reloid); READ_NODE_FIELD(pruning_steps); READ_BITMAPSET_FIELD(present_parts); @@ -2726,6 +2737,8 @@ parseNodeString(void) return_value = _readPlanRowMark(); else if (MATCH("PARTITIONPRUNEINFO", 18)) return_value = _readPartitionPruneInfo(); + else if (MATCH("PARTITIONEDRELPRUNEINFO", 23)) + return_value = _readPartitionedRelPruneInfo(); else if (MATCH("PARTITIONPRUNESTEPOP", 20)) return_value = _readPartitionPruneStepOp(); else if (MATCH("PARTITIONPRUNESTEPCOMBINE", 25)) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index f04c30af45..0e80aeb65c 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1388,7 +1388,6 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *all_child_outers = NIL; ListCell *l; List *partitioned_rels = NIL; - bool build_partitioned_rels = false; double partial_rows = -1; /* If appropriate, consider parallel append */ @@ -1413,10 +1412,11 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, if (rel->part_scheme != NULL) { if (IS_SIMPLE_REL(rel)) - partitioned_rels = rel->partitioned_child_rels; + partitioned_rels = list_make1(rel->partitioned_child_rels); else if (IS_JOIN_REL(rel)) { int relid = -1; + List *partrels = NIL; /* * For a partitioned joinrel, concatenate the component rels' @@ -1430,16 +1430,16 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, component = root->simple_rel_array[relid]; Assert(component->part_scheme != NULL); Assert(list_length(component->partitioned_child_rels) >= 1); - partitioned_rels = - list_concat(partitioned_rels, + partrels = + list_concat(partrels, list_copy(component->partitioned_child_rels)); } + + partitioned_rels = list_make1(partrels); } Assert(list_length(partitioned_rels) >= 1); } - else if (rel->rtekind == RTE_SUBQUERY) - build_partitioned_rels = true; /* * For every non-dummy child, remember the cheapest path. Also, identify @@ -1453,17 +1453,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, Path *cheapest_partial_path = NULL; /* - * If we need to build partitioned_rels, accumulate the partitioned - * rels for this child. We must ensure that parents are always listed - * before their child partitioned tables. + * For UNION ALLs with non-empty partitioned_child_rels, accumulate + * the Lists of child relations. */ - if (build_partitioned_rels) - { - List *cprels = childrel->partitioned_child_rels; - - partitioned_rels = list_concat(partitioned_rels, - list_copy(cprels)); - } + if (rel->rtekind == RTE_SUBQUERY && childrel->partitioned_child_rels != NIL) + partitioned_rels = lappend(partitioned_rels, + childrel->partitioned_child_rels); /* * If child has an unparameterized cheapest-total path, add that to diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 0a0bec3bfc..ae41c9efa0 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -124,6 +124,7 @@ static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root, static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, List **qual, List **indexqual, List **indexECs); static void bitmap_subplan_mark_shared(Plan *plan); +static List *flatten_partitioned_rels(List *partitioned_rels); static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path, List *tlist, List *scan_clauses); static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root, @@ -202,7 +203,8 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual, Index scanrelid, int wtParam); static Append *make_append(List *appendplans, int first_partial_plan, - List *tlist, List *partitioned_rels, List *partpruneinfos); + List *tlist, List *partitioned_rels, + PartitionPruneInfo *partpruneinfo); static RecursiveUnion *make_recursive_union(List *tlist, Plan *lefttree, Plan *righttree, @@ -1030,7 +1032,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) List *subplans = NIL; ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; - List *partpruneinfos = NIL; + PartitionPruneInfo *partpruneinfo = NULL; /* * The subpaths list could be empty, if every child was proven empty by @@ -1070,8 +1072,8 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) /* * If any quals exist, they may be useful to perform further partition - * pruning during execution. Gather information needed by the executor - * to do partition pruning. + * pruning during execution. Gather information needed by the executor to + * do partition pruning. */ if (enable_partition_pruning && rel->reloptkind == RELOPT_BASEREL && @@ -1093,10 +1095,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) } if (prunequal != NIL) - partpruneinfos = - make_partition_pruneinfo(root, + partpruneinfo = + make_partition_pruneinfo(root, rel, + best_path->subpaths, best_path->partitioned_rels, - best_path->subpaths, prunequal); + prunequal); } /* @@ -1108,7 +1111,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) plan = make_append(subplans, best_path->first_partial_path, tlist, best_path->partitioned_rels, - partpruneinfos); + partpruneinfo); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1132,7 +1135,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) List *subplans = NIL; ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; - List *partpruneinfos = NIL; + PartitionPruneInfo *partpruneinfo = NULL; /* * We don't have the actual creation of the MergeAppend node split out @@ -1220,8 +1223,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) /* * If any quals exist, they may be useful to perform further partition - * pruning during execution. Gather information needed by the executor - * to do partition pruning. + * pruning during execution. Gather information needed by the executor to + * do partition pruning. */ if (enable_partition_pruning && rel->reloptkind == RELOPT_BASEREL && @@ -1244,14 +1247,16 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) } if (prunequal != NIL) - partpruneinfos = make_partition_pruneinfo(root, - best_path->partitioned_rels, - best_path->subpaths, prunequal); + partpruneinfo = make_partition_pruneinfo(root, rel, + best_path->subpaths, + best_path->partitioned_rels, + prunequal); } - node->partitioned_rels = best_path->partitioned_rels; + node->partitioned_rels = + flatten_partitioned_rels(best_path->partitioned_rels); node->mergeplans = subplans; - node->part_prune_infos = partpruneinfos; + node->part_prune_info = partpruneinfo; return (Plan *) node; } @@ -5000,6 +5005,27 @@ bitmap_subplan_mark_shared(Plan *plan) elog(ERROR, "unrecognized node type: %d", nodeTag(plan)); } +/* + * flatten_partitioned_rels + * Convert List of Lists into a single List with all elements from the + * sub-lists. + */ +static List * +flatten_partitioned_rels(List *partitioned_rels) +{ + List *newlist = NIL; + ListCell *lc; + + foreach(lc, partitioned_rels) + { + List *sublist = lfirst(lc); + + newlist = list_concat(newlist, list_copy(sublist)); + } + + return newlist; +} + /***************************************************************************** * * PLAN NODE BUILDING ROUTINES @@ -5343,7 +5369,7 @@ make_foreignscan(List *qptlist, static Append * make_append(List *appendplans, int first_partial_plan, List *tlist, List *partitioned_rels, - List *partpruneinfos) + PartitionPruneInfo *partpruneinfo) { Append *node = makeNode(Append); Plan *plan = &node->plan; @@ -5354,8 +5380,8 @@ make_append(List *appendplans, int first_partial_plan, plan->righttree = NULL; node->appendplans = appendplans; node->first_partial_plan = first_partial_plan; - node->partitioned_rels = partitioned_rels; - node->part_prune_infos = partpruneinfos; + node->partitioned_rels = flatten_partitioned_rels(partitioned_rels); + node->part_prune_info = partpruneinfo; return node; } @@ -6512,7 +6538,7 @@ make_modifytable(PlannerInfo *root, node->operation = operation; node->canSetTag = canSetTag; node->nominalRelation = nominalRelation; - node->partitioned_rels = partitioned_rels; + node->partitioned_rels = flatten_partitioned_rels(partitioned_rels); node->partColsUpdated = partColsUpdated; node->resultRelations = resultRelations; node->resultRelIndex = -1; /* will be set correctly in setrefs.c */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index df4ec448cb..fd06da98b9 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1616,6 +1616,7 @@ inheritance_planner(PlannerInfo *root) * contain at least one member, that is, the root parent's index. */ Assert(list_length(partitioned_rels) >= 1); + partitioned_rels = list_make1(partitioned_rels); } /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */ diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index bfacc2ce29..752810d0e4 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -112,6 +112,11 @@ typedef struct PruneStepResult } PruneStepResult; +static List *make_partitionedrel_pruneinfo(PlannerInfo *root, + RelOptInfo *parentrel, + int *relid_subplan_map, + List *partitioned_rels, List *prunequal, + Bitmapset **matchedsubplans); static List *gen_partprune_steps(RelOptInfo *rel, List *clauses, bool *contradictory); static List *gen_partprune_steps_internal(GeneratePruningStepsContext *context, @@ -160,7 +165,7 @@ static PruneStepResult *get_matching_range_bounds(PartitionPruneContext *context FmgrInfo *partsupfunc, Bitmapset *nullkeys); static Bitmapset *pull_exec_paramids(Expr *expr); static bool pull_exec_paramids_walker(Node *node, Bitmapset **context); -static bool analyze_partkey_exprs(PartitionPruneInfo *pinfo, List *steps, +static bool analyze_partkey_exprs(PartitionedRelPruneInfo *pinfo, List *steps, int partnatts); static PruneStepResult *perform_pruning_base_step(PartitionPruneContext *context, PartitionPruneStepOp *opstep); @@ -176,38 +181,43 @@ static bool partkey_datum_from_expr(PartitionPruneContext *context, /* * make_partition_pruneinfo - * Build List of PartitionPruneInfos, one for each partitioned rel. - * These can be used in the executor to allow additional partition - * pruning to take place. + * Builds a PartitionPruneInfo which can be used in the executor to allow + * additional partition pruning to take place. Returns NULL when + * partition pruning would be useless. * - * Here we generate partition pruning steps for 'prunequal' and also build a - * data structure which allows mapping of partition indexes into 'subpaths' - * indexes. + * 'parentrel' is the RelOptInfo for an appendrel, and 'subpaths' is the list + * of scan paths for its child rels. * - * If no non-Const expressions are being compared to the partition key in any - * of the 'partitioned_rels', then we return NIL to indicate no run-time - * pruning should be performed. Run-time pruning would be useless, since the - * pruning done during planning will have pruned everything that can be. + * 'partitioned_rels' is a List containing Lists of relids of partitioned + * tables (a/k/a non-leaf partitions) that are parents of some of the child + * rels. Here we attempt to populate the PartitionPruneInfo by adding a + * 'prune_infos' item for each sublist in the 'partitioned_rels' list. + * However, some of the sets of partitioned relations may not require any + * run-time pruning. In these cases we'll simply not include a 'prune_infos' + * item for that set and instead we'll add all the subplans which belong to + * that set into the PartitionPruneInfo's 'other_subplans' field. Callers + * will likely never want to prune subplans which are mentioned in this field. + * + * 'prunequal' is a list of potential pruning quals. */ -List * -make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, - List *subpaths, List *prunequal) +PartitionPruneInfo * +make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, + List *subpaths, List *partitioned_rels, + List *prunequal) { - RelOptInfo *targetpart = NULL; - List *pinfolist = NIL; - bool doruntimeprune = false; + PartitionPruneInfo *pruneinfo; + Bitmapset *allmatchedsubplans = NULL; int *relid_subplan_map; - int *relid_subpart_map; ListCell *lc; + List *prunerelinfos; int i; /* - * Construct two temporary arrays to map from planner relids to subplan - * and sub-partition indexes. For convenience, we use 1-based indexes - * here, so that zero can represent an un-filled array entry. + * Construct a temporary array to map from planner relids to subplan + * indexes. For convenience, we use 1-based indexes here, so that zero + * can represent an un-filled array entry. */ relid_subplan_map = palloc0(sizeof(int) * root->simple_rel_array_size); - relid_subpart_map = palloc0(sizeof(int) * root->simple_rel_array_size); /* * relid_subplan_map maps relid of a leaf partition to the index in @@ -227,10 +237,107 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, relid_subplan_map[pathrel->relid] = i++; } + /* We now build a PartitionedRelPruneInfo for each partitioned rel. */ + prunerelinfos = NIL; + foreach(lc, partitioned_rels) + { + List *rels = (List *) lfirst(lc); + List *pinfolist; + Bitmapset *matchedsubplans = NULL; + + pinfolist = make_partitionedrel_pruneinfo(root, parentrel, + relid_subplan_map, + rels, prunequal, + &matchedsubplans); + + /* When pruning is possible, record the matched subplans */ + if (pinfolist != NIL) + { + prunerelinfos = lappend(prunerelinfos, pinfolist); + allmatchedsubplans = bms_join(matchedsubplans, + allmatchedsubplans); + } + } + + pfree(relid_subplan_map); + + /* + * If none of the partition hierarchies had any useful run-time pruning + * quals, then we can just not bother with run-time pruning. + */ + if (prunerelinfos == NIL) + return NULL; + + /* Else build the result data structure */ + pruneinfo = makeNode(PartitionPruneInfo); + pruneinfo->prune_infos = prunerelinfos; + + /* + * Some subplans may not belong to any of the listed partitioned rels. + * This can happen for UNION ALL queries which include a non-partitioned + * table, or when some of the hierarchies aren't run-time prunable. Build + * a bitmapset of the indexes of all such subplans, so that the executor + * can identify which subplans should never be pruned. + */ + if (bms_num_members(allmatchedsubplans) < list_length(subpaths)) + { + Bitmapset *other_subplans; + + /* Create the complement of allmatchedsubplans */ + other_subplans = bms_add_range(NULL, 0, list_length(subpaths) - 1); + other_subplans = bms_del_members(other_subplans, allmatchedsubplans); + + pruneinfo->other_subplans = other_subplans; + } + else + pruneinfo->other_subplans = NULL; + + return pruneinfo; +} + +/* + * make_partitionedrel_pruneinfo + * Build a List of PartitionedRelPruneInfos, one for each partitioned + * rel. These can be used in the executor to allow additional partition + * pruning to take place. + * + * Here we generate partition pruning steps for 'prunequal' and also build a + * data structure which allows mapping of partition indexes into 'subpaths' + * indexes. + * + * If no non-Const expressions are being compared to the partition key in any + * of the 'partitioned_rels', then we return NIL to indicate no run-time + * pruning should be performed. Run-time pruning would be useless since the + * pruning done during planning will have pruned everything that can be. + * + * On non-NIL return, 'matchedsubplans' is set to the subplan indexes which + * were matched to this partition hierarchy. + */ +static List * +make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, + int *relid_subplan_map, + List *partitioned_rels, List *prunequal, + Bitmapset **matchedsubplans) +{ + RelOptInfo *targetpart = NULL; + List *pinfolist = NIL; + bool doruntimeprune = false; + int *relid_subpart_map; + Bitmapset *subplansfound = NULL; + ListCell *lc; + int i; + + /* + * Construct a temporary array to map from planner relids to index of the + * partitioned_rel. For convenience, we use 1-based indexes here, so that + * zero can represent an un-filled array entry. + */ + relid_subpart_map = palloc0(sizeof(int) * root->simple_rel_array_size); + /* * relid_subpart_map maps relid of a non-leaf partition to the index in * 'partitioned_rels' of that rel (which will also be the index in the - * returned PartitionPruneInfo list of the info for that partition). + * returned PartitionedRelPruneInfo list of the info for that partition). */ i = 1; foreach(lc, partitioned_rels) @@ -246,12 +353,12 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, relid_subpart_map[rti] = i++; } - /* We now build a PartitionPruneInfo for each partitioned rel */ + /* We now build a PartitionedRelPruneInfo for each partitioned rel */ foreach(lc, partitioned_rels) { Index rti = lfirst_int(lc); RelOptInfo *subpart = find_base_rel(root, rti); - PartitionPruneInfo *pinfo; + PartitionedRelPruneInfo *pinfo; RangeTblEntry *rte; Bitmapset *present_parts; int nparts = subpart->nparts; @@ -263,12 +370,35 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, bool contradictory; /* - * The first item in the list is the target partitioned relation. The - * quals belong to this relation, so require no translation. + * The first item in the list is the target partitioned relation. */ if (!targetpart) { targetpart = subpart; + + /* + * The prunequal is presented to us as a qual for 'parentrel'. + * Frequently this rel is the same as targetpart, so we can skip + * an adjust_appendrel_attrs step. But it might not be, and then + * we have to translate. We update the prunequal parameter here, + * because in later iterations of the loop for child partitions, + * we want to translate from parent to child variables. + */ + if (parentrel != subpart) + { + int nappinfos; + AppendRelInfo **appinfos = find_appinfos_by_relids(root, + subpart->relids, + &nappinfos); + + prunequal = (List *) adjust_appendrel_attrs(root, (Node *) + prunequal, + nappinfos, + appinfos); + + pfree(appinfos); + } + partprunequal = prunequal; } else @@ -320,13 +450,20 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, subplan_map[i] = subplanidx; subpart_map[i] = subpartidx; - if (subplanidx >= 0 || subpartidx >= 0) + if (subplanidx >= 0) + { + present_parts = bms_add_member(present_parts, i); + + /* Record finding this subplan */ + subplansfound = bms_add_member(subplansfound, subplanidx); + } + else if (subpartidx >= 0) present_parts = bms_add_member(present_parts, i); } rte = root->simple_rte_array[subpart->relid]; - pinfo = makeNode(PartitionPruneInfo); + pinfo = makeNode(PartitionedRelPruneInfo); pinfo->reloid = rte->relid; pinfo->pruning_steps = pruning_steps; pinfo->present_parts = present_parts; @@ -341,14 +478,17 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, pinfolist = lappend(pinfolist, pinfo); } - pfree(relid_subplan_map); pfree(relid_subpart_map); - if (doruntimeprune) - return pinfolist; + if (!doruntimeprune) + { + /* No run-time pruning required. */ + return NIL; + } - /* No run-time pruning required. */ - return NIL; + *matchedsubplans = subplansfound; + + return pinfolist; } /* @@ -2772,7 +2912,8 @@ pull_exec_paramids_walker(Node *node, Bitmapset **context) * level. Also fills fields of *pinfo to record how to process each step. */ static bool -analyze_partkey_exprs(PartitionPruneInfo *pinfo, List *steps, int partnatts) +analyze_partkey_exprs(PartitionedRelPruneInfo *pinfo, List *steps, + int partnatts) { bool doruntimeprune = false; ListCell *lc; diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index e9e6d380ec..f6cd842cc9 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -112,15 +112,14 @@ typedef struct PartitionTupleRouting TupleTableSlot *root_tuple_slot; } PartitionTupleRouting; -/*----------------------- - * PartitionPruningData - Per-partitioned-table data for run-time pruning +/* + * PartitionedRelPruningData - Per-partitioned-table data for run-time pruning * of partitions. For a multilevel partitioned table, we have one of these - * for the topmost partition plus one for each non-leaf child partition, - * ordered such that parents appear before their children. + * for the topmost partition plus one for each non-leaf child partition. * * subplan_map[] and subpart_map[] have the same definitions as in - * PartitionPruneInfo (see plannodes.h); though note that here, - * subpart_map contains indexes into PartitionPruneState.partprunedata[]. + * PartitionedRelPruneInfo (see plannodes.h); though note that here, + * subpart_map contains indexes into PartitionPruningData.partrelprunedata[]. * * subplan_map Subplan index by partition index, or -1. * subpart_map Subpart index by partition index, or -1. @@ -134,9 +133,8 @@ typedef struct PartitionTupleRouting * executor startup (for this partitioning level). * do_exec_prune true if pruning should be performed during * executor run (for this partitioning level). - *----------------------- */ -typedef struct PartitionPruningData +typedef struct PartitionedRelPruningData { int *subplan_map; int *subpart_map; @@ -145,43 +143,59 @@ typedef struct PartitionPruningData List *pruning_steps; bool do_initial_prune; bool do_exec_prune; +} PartitionedRelPruningData; + +/* + * PartitionPruningData - Holds all the run-time pruning information for + * a single partitioning hierarchy containing one or more partitions. + * partrelprunedata[] is an array ordered such that parents appear before + * their children; in particular, the first entry is the topmost partition, + * which was actually named in the SQL query. + */ +typedef struct PartitionPruningData +{ + int num_partrelprunedata; /* number of array entries */ + PartitionedRelPruningData partrelprunedata[FLEXIBLE_ARRAY_MEMBER]; } PartitionPruningData; -/*----------------------- +/* * PartitionPruneState - State object required for plan nodes to perform * run-time partition pruning. * * This struct can be attached to plan types which support arbitrary Lists of - * subplans containing partitions to allow subplans to be eliminated due to + * subplans containing partitions, to allow subplans to be eliminated due to * the clauses being unable to match to any tuple that the subplan could - * possibly produce. Note that we currently support only one partitioned - * table per parent plan node, hence partprunedata[] need describe only one - * partitioning hierarchy. + * possibly produce. * - * partprunedata Array of PartitionPruningData for the plan's - * partitioned relation, ordered such that parent tables - * appear before children (hence, topmost table is first). - * num_partprunedata Number of items in 'partprunedata' array. - * do_initial_prune true if pruning should be performed during executor - * startup (at any hierarchy level). - * do_exec_prune true if pruning should be performed during - * executor run (at any hierarchy level). * execparamids Contains paramids of PARAM_EXEC Params found within * any of the partprunedata structs. Pruning must be * done again each time the value of one of these * parameters changes. + * other_subplans Contains indexes of subplans that don't belong to any + * "partprunedata", e.g UNION ALL children that are not + * partitioned tables, or a partitioned table that the + * planner deemed run-time pruning to be useless for. + * These must not be pruned. * prune_context A short-lived memory context in which to execute the * partition pruning functions. - *----------------------- + * do_initial_prune true if pruning should be performed during executor + * startup (at any hierarchy level). + * do_exec_prune true if pruning should be performed during + * executor run (at any hierarchy level). + * num_partprunedata Number of items in "partprunedata" array. + * partprunedata Array of PartitionPruningData pointers for the plan's + * partitioned relation(s), one for each partitioning + * hierarchy that requires run-time pruning. */ typedef struct PartitionPruneState { - PartitionPruningData *partprunedata; - int num_partprunedata; + Bitmapset *execparamids; + Bitmapset *other_subplans; + MemoryContext prune_context; bool do_initial_prune; bool do_exec_prune; - Bitmapset *execparamids; - MemoryContext prune_context; + int num_partprunedata; + PartitionPruningData *partprunedata[FLEXIBLE_ARRAY_MEMBER]; } PartitionPruneState; extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, @@ -210,7 +224,7 @@ extern HeapTuple ConvertPartitionTupleSlot(TupleConversionMap *map, extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute); extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate, - List *partitionpruneinfo); + PartitionPruneInfo *partitionpruneinfo); extern void ExecDestroyPartitionPruneState(PartitionPruneState *prunestate); extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate); extern Bitmapset *ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 43f1552241..697d3d7a5f 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -88,6 +88,7 @@ typedef enum NodeTag T_NestLoopParam, T_PlanRowMark, T_PartitionPruneInfo, + T_PartitionedRelPruneInfo, T_PartitionPruneStepOp, T_PartitionPruneStepCombine, T_PlanInvalItem, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b80df601cd..7c2abbd03a 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -241,6 +241,8 @@ typedef struct ModifyTable List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ } ModifyTable; +struct PartitionPruneInfo; /* forward reference to struct below */ + /* ---------------- * Append node - * Generate the concatenation of the results of sub-plans. @@ -260,8 +262,8 @@ typedef struct Append /* RT indexes of non-leaf tables in a partition tree */ List *partitioned_rels; - /* Info for run-time subplan pruning, one entry per partitioned_rels */ - List *part_prune_infos; /* List of PartitionPruneInfo */ + /* Info for run-time subplan pruning; NULL if we're not doing that */ + struct PartitionPruneInfo *part_prune_info; } Append; /* ---------------- @@ -281,9 +283,8 @@ typedef struct MergeAppend Oid *sortOperators; /* OIDs of operators to sort them by */ Oid *collations; /* OIDs of collations */ bool *nullsFirst; /* NULLS FIRST/LAST directions */ - - /* Info for run-time subplan pruning, one entry per partitioned_rels */ - List *part_prune_infos; /* List of PartitionPruneInfo */ + /* Info for run-time subplan pruning; NULL if we're not doing that */ + struct PartitionPruneInfo *part_prune_info; } MergeAppend; /* ---------------- @@ -1063,12 +1064,32 @@ typedef struct PlanRowMark * We also store various details to tell the executor when it should be * performing partition pruning. * - * Each PartitionPruneInfo describes the partitioning rules for a single - * partitioned table (a/k/a level of partitioning). For a multilevel - * partitioned table, we have a List of PartitionPruneInfos, where the - * first entry represents the topmost partitioned table and additional - * entries represent non-leaf child partitions, ordered such that parents - * appear before their children. + * Each PartitionedRelPruneInfo describes the partitioning rules for a single + * partitioned table (a/k/a level of partitioning). Since a partitioning + * hierarchy could contain multiple levels, we represent it by a List of + * PartitionedRelPruneInfos, where the first entry represents the topmost + * partitioned table and additional entries represent non-leaf child + * partitions, ordered such that parents appear before their children. + * Then, since an Append-type node could have multiple partitioning + * hierarchies among its children, we have an unordered List of those Lists. + * + * prune_infos List of Lists containing PartitionedRelPruneInfo nodes, + * one sublist per run-time-prunable partition hierarchy + * appearing in the parent plan node's subplans. + * other_subplans Indexes of any subplans that are not accounted for + * by any of the PartitionedRelPruneInfo nodes in + * "prune_infos". These subplans must not be pruned. + */ +typedef struct PartitionPruneInfo +{ + NodeTag type; + List *prune_infos; + Bitmapset *other_subplans; +} PartitionPruneInfo; + +/* + * PartitionedRelPruneInfo - Details required to allow the executor to prune + * partitions for a single partitioned table. * * subplan_map[] and subpart_map[] are indexed by partition index (where * zero is the topmost partition, and non-leaf partitions must come before @@ -1076,11 +1097,12 @@ typedef struct PlanRowMark * zero-based index of the partition's subplan in the parent plan's subplan * list; it is -1 if the partition is non-leaf or has been pruned. For a * non-leaf partition p, subpart_map[p] contains the zero-based index of - * that sub-partition's PartitionPruneInfo in the plan's PartitionPruneInfo - * list; it is -1 if the partition is a leaf or has been pruned. All these - * indexes are global across the whole partitioned table and Append plan node. + * that sub-partition's PartitionedRelPruneInfo in the hierarchy's + * PartitionedRelPruneInfo list; it is -1 if the partition is a leaf or has + * been pruned. Note that subplan indexes are global across the parent plan + * node, but partition indexes are valid only within a particular hierarchy. */ -typedef struct PartitionPruneInfo +typedef struct PartitionedRelPruneInfo { NodeTag type; Oid reloid; /* OID of partition rel for this level */ @@ -1098,7 +1120,7 @@ typedef struct PartitionPruneInfo bool do_exec_prune; /* true if pruning should be performed during * executor run. */ Bitmapset *execparamids; /* All PARAM_EXEC Param IDs in pruning_steps */ -} PartitionPruneInfo; +} PartitionedRelPruneInfo; /* * Abstract Node type for partition pruning steps (there are no concrete diff --git a/src/include/partitioning/partprune.h b/src/include/partitioning/partprune.h index 9944d2832f..b95c346bab 100644 --- a/src/include/partitioning/partprune.h +++ b/src/include/partitioning/partprune.h @@ -74,9 +74,11 @@ typedef struct PartitionPruneContext #define PruneCxtStateIdx(partnatts, step_id, keyno) \ ((partnatts) * (step_id) + (keyno)) -extern List *make_partition_pruneinfo(PlannerInfo *root, +extern PartitionPruneInfo *make_partition_pruneinfo(PlannerInfo *root, + RelOptInfo *parentrel, + List *subpaths, List *partitioned_rels, - List *subpaths, List *prunequal); + List *prunequal); extern Relids prune_append_rel_partitions(RelOptInfo *rel); extern Bitmapset *get_matching_partitions(PartitionPruneContext *context, List *pruning_steps); diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 3fef4921aa..358eccad70 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -2382,6 +2382,96 @@ select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 Index Cond: (a = $0) (52 rows) +-- Test run-time partition pruning with UNION ALL parents +explain (analyze, costs off, summary off, timing off) +select * from (select * from ab where a = 1 union all select * from ab) ab where b = (select 1); + QUERY PLAN +------------------------------------------------------------------------------- + Append (actual rows=0 loops=1) + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Append (actual rows=0 loops=1) + -> Bitmap Heap Scan on ab_a1_b1 ab_a1_b1_1 (actual rows=0 loops=1) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1) + Index Cond: (a = 1) + -> Bitmap Heap Scan on ab_a1_b2 ab_a1_b2_1 (never executed) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b2_a_idx (never executed) + Index Cond: (a = 1) + -> Bitmap Heap Scan on ab_a1_b3 ab_a1_b3_1 (never executed) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b3_a_idx (never executed) + Index Cond: (a = 1) + -> Seq Scan on ab_a1_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a1_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a1_b3 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a2_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a2_b3 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a3_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a3_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a3_b3 (never executed) + Filter: (b = $0) +(37 rows) + +-- A case containing a UNION ALL with a non-partitioned child. +explain (analyze, costs off, summary off, timing off) +select * from (select * from ab where a = 1 union all (values(10,5)) union all select * from ab) ab where b = (select 1); + QUERY PLAN +------------------------------------------------------------------------------- + Append (actual rows=0 loops=1) + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Append (actual rows=0 loops=1) + -> Bitmap Heap Scan on ab_a1_b1 ab_a1_b1_1 (actual rows=0 loops=1) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1) + Index Cond: (a = 1) + -> Bitmap Heap Scan on ab_a1_b2 ab_a1_b2_1 (never executed) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b2_a_idx (never executed) + Index Cond: (a = 1) + -> Bitmap Heap Scan on ab_a1_b3 ab_a1_b3_1 (never executed) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b3_a_idx (never executed) + Index Cond: (a = 1) + -> Result (actual rows=0 loops=1) + One-Time Filter: (5 = $0) + -> Seq Scan on ab_a1_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a1_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a1_b3 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a2_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a2_b3 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a3_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a3_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a3_b3 (never executed) + Filter: (b = $0) +(39 rows) + deallocate ab_q1; deallocate ab_q2; deallocate ab_q3; @@ -3318,3 +3408,86 @@ explain (costs off) select * from pp_temp_parent where a = 2; (3 rows) drop table pp_temp_parent; +-- Stress run-time partition pruning a bit more, per bug reports +create temp table p (a int, b int, c int) partition by list (a); +create temp table p1 partition of p for values in (1); +create temp table p2 partition of p for values in (2); +create temp table q (a int, b int, c int) partition by list (a); +create temp table q1 partition of q for values in (1) partition by list (b); +create temp table q11 partition of q1 for values in (1) partition by list (c); +create temp table q111 partition of q11 for values in (1); +create temp table q2 partition of q for values in (2) partition by list (b); +create temp table q21 partition of q2 for values in (1); +create temp table q22 partition of q2 for values in (2); +insert into q22 values (2, 2, 3); +explain (costs off) +select * +from ( + select * from p + union all + select * from q1 + union all + select 1, 1, 1 + ) s(a, b, c) +where s.a = 1 and s.b = 1 and s.c = (select 1); + QUERY PLAN +---------------------------------------------------- + Append + InitPlan 1 (returns $0) + -> Result + -> Seq Scan on p1 + Filter: ((a = 1) AND (b = 1) AND (c = $0)) + -> Seq Scan on q111 + Filter: ((a = 1) AND (b = 1) AND (c = $0)) + -> Result + One-Time Filter: (1 = $0) +(9 rows) + +select * +from ( + select * from p + union all + select * from q1 + union all + select 1, 1, 1 + ) s(a, b, c) +where s.a = 1 and s.b = 1 and s.c = (select 1); + a | b | c +---+---+--- + 1 | 1 | 1 +(1 row) + +prepare q (int, int) as +select * +from ( + select * from p + union all + select * from q1 + union all + select 1, 1, 1 + ) s(a, b, c) +where s.a = $1 and s.b = $2 and s.c = (select 1); +set plan_cache_mode to force_generic_plan; +explain (costs off) execute q (1, 1); + QUERY PLAN +--------------------------------------------------------------- + Append + InitPlan 1 (returns $0) + -> Result + Subplans Removed: 1 + -> Seq Scan on p1 + Filter: ((a = $1) AND (b = $2) AND (c = $0)) + -> Seq Scan on q111 + Filter: ((a = $1) AND (b = $2) AND (c = $0)) + -> Result + One-Time Filter: ((1 = $1) AND (1 = $2) AND (1 = $0)) +(10 rows) + +execute q (1, 1); + a | b | c +---+---+--- + 1 | 1 | 1 +(1 row) + +reset plan_cache_mode; +drop table p, q; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 4b198b1a1d..035ea49ccb 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -540,6 +540,14 @@ reset max_parallel_workers_per_gather; explain (analyze, costs off, summary off, timing off) select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 from lprt_a); +-- Test run-time partition pruning with UNION ALL parents +explain (analyze, costs off, summary off, timing off) +select * from (select * from ab where a = 1 union all select * from ab) ab where b = (select 1); + +-- A case containing a UNION ALL with a non-partitioned child. +explain (analyze, costs off, summary off, timing off) +select * from (select * from ab where a = 1 union all (values(10,5)) union all select * from ab) ab where b = (select 1); + deallocate ab_q1; deallocate ab_q2; deallocate ab_q3; @@ -878,3 +886,57 @@ create temp table pp_temp_part_def partition of pp_temp_parent default; explain (costs off) select * from pp_temp_parent where true; explain (costs off) select * from pp_temp_parent where a = 2; drop table pp_temp_parent; + +-- Stress run-time partition pruning a bit more, per bug reports +create temp table p (a int, b int, c int) partition by list (a); +create temp table p1 partition of p for values in (1); +create temp table p2 partition of p for values in (2); +create temp table q (a int, b int, c int) partition by list (a); +create temp table q1 partition of q for values in (1) partition by list (b); +create temp table q11 partition of q1 for values in (1) partition by list (c); +create temp table q111 partition of q11 for values in (1); +create temp table q2 partition of q for values in (2) partition by list (b); +create temp table q21 partition of q2 for values in (1); +create temp table q22 partition of q2 for values in (2); + +insert into q22 values (2, 2, 3); + +explain (costs off) +select * +from ( + select * from p + union all + select * from q1 + union all + select 1, 1, 1 + ) s(a, b, c) +where s.a = 1 and s.b = 1 and s.c = (select 1); + +select * +from ( + select * from p + union all + select * from q1 + union all + select 1, 1, 1 + ) s(a, b, c) +where s.a = 1 and s.b = 1 and s.c = (select 1); + +prepare q (int, int) as +select * +from ( + select * from p + union all + select * from q1 + union all + select 1, 1, 1 + ) s(a, b, c) +where s.a = $1 and s.b = $2 and s.c = (select 1); + +set plan_cache_mode to force_generic_plan; + +explain (costs off) execute q (1, 1); +execute q (1, 1); + +reset plan_cache_mode; +drop table p, q;