/*------------------------------------------------------------------------- * * nodeAppend.c * routines to handle append nodes. * * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/executor/nodeAppend.c * *------------------------------------------------------------------------- */ /* INTERFACE ROUTINES * ExecInitAppend - initialize the append node * ExecAppend - retrieve the next tuple from the node * ExecEndAppend - shut down the append node * ExecReScanAppend - rescan the append node * * NOTES * Each append node contains a list of one or more subplans which * must be iteratively processed (forwards or backwards). * Tuples are retrieved by executing the 'whichplan'th subplan * until the subplan stops returning tuples, at which point that * plan is shut down and the next started up. * * Append nodes don't make use of their left and right * subtrees, rather they maintain a list of subplans so * a typical append node looks like this in the plan tree: * * ... * / * Append -------+------+------+--- nil * / \ | | | * nil nil ... ... ... * subplans * * Append nodes are currently used for unions, and to support * inheritance queries, where several relations need to be scanned. * For example, in our standard person/student/employee/student-emp * example, where student and employee inherit from person * and student-emp inherits from student and employee, the * query: * * select name from person * * generates the plan: * * | * Append -------+-------+--------+--------+ * / \ | | | | * nil nil Scan Scan Scan Scan * | | | | * person employee student student-emp */ #include "postgres.h" #include "executor/execdebug.h" #include "executor/nodeAppend.h" #include "miscadmin.h" /* Shared state for parallel-aware Append. */ struct ParallelAppendState { LWLock pa_lock; /* mutual exclusion to choose next subplan */ int pa_next_plan; /* next plan to choose by any worker */ /* * pa_finished[i] should be true if no more workers should select subplan * i. for a non-partial plan, this should be set to true as soon as a * worker selects the plan; for a partial plan, it remains false until * some worker executes the plan to completion. */ bool pa_finished[FLEXIBLE_ARRAY_MEMBER]; }; #define INVALID_SUBPLAN_INDEX -1 static TupleTableSlot *ExecAppend(PlanState *pstate); static bool choose_next_subplan_locally(AppendState *node); static bool choose_next_subplan_for_leader(AppendState *node); static bool choose_next_subplan_for_worker(AppendState *node); /* ---------------------------------------------------------------- * ExecInitAppend * * Begin all of the subscans of the append node. * * (This is potentially wasteful, since the entire result of the * append node may not be scanned, but this way all of the * structures get allocated in the executor's top level memory * block instead of that of the call to ExecAppend.) * ---------------------------------------------------------------- */ AppendState * ExecInitAppend(Append *node, EState *estate, int eflags) { AppendState *appendstate = makeNode(AppendState); PlanState **appendplanstates; int nplans; int i; ListCell *lc; /* check for unsupported flags */ Assert(!(eflags & EXEC_FLAG_MARK)); /* * Lock the non-leaf tables in the partition tree controlled by this node. * It's a no-op for non-partitioned parent tables. */ ExecLockNonLeafAppendTables(node->partitioned_rels, estate); /* * Set up empty vector of subplan states */ nplans = list_length(node->appendplans); appendplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *)); /* * create new AppendState for our append node */ appendstate->ps.plan = (Plan *) node; appendstate->ps.state = estate; appendstate->ps.ExecProcNode = ExecAppend; appendstate->appendplans = appendplanstates; appendstate->as_nplans = nplans; /* * Miscellaneous initialization * * Append plans don't have expression contexts because they never call * ExecQual or ExecProject. */ /* * append nodes still have Result slots, which hold pointers to tuples, so * we have to initialize them. */ ExecInitResultTupleSlot(estate, &appendstate->ps); /* * call ExecInitNode on each of the plans to be executed and save the * results into the array "appendplans". */ i = 0; foreach(lc, node->appendplans) { Plan *initNode = (Plan *) lfirst(lc); appendplanstates[i] = ExecInitNode(initNode, estate, eflags); i++; } /* * initialize output tuple type */ ExecAssignResultTypeFromTL(&appendstate->ps); appendstate->ps.ps_ProjInfo = NULL; /* * Parallel-aware append plans must choose the first subplan to execute by * looking at shared memory, but non-parallel-aware append plans can * always start with the first subplan. */ appendstate->as_whichplan = appendstate->ps.plan->parallel_aware ? INVALID_SUBPLAN_INDEX : 0; /* If parallel-aware, this will be overridden later. */ appendstate->choose_next_subplan = choose_next_subplan_locally; return appendstate; } /* ---------------------------------------------------------------- * ExecAppend * * Handles iteration over multiple subplans. * ---------------------------------------------------------------- */ static TupleTableSlot * ExecAppend(PlanState *pstate) { AppendState *node = castNode(AppendState, pstate); /* If no subplan has been chosen, we must choose one before proceeding. */ if (node->as_whichplan == INVALID_SUBPLAN_INDEX && !node->choose_next_subplan(node)) return ExecClearTuple(node->ps.ps_ResultTupleSlot); for (;;) { PlanState *subnode; TupleTableSlot *result; CHECK_FOR_INTERRUPTS(); /* * figure out which subplan we are currently processing */ Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans); subnode = node->appendplans[node->as_whichplan]; /* * get a tuple from the subplan */ result = ExecProcNode(subnode); if (!TupIsNull(result)) { /* * If the subplan gave us something then return it as-is. We do * NOT make use of the result slot that was set up in * ExecInitAppend; there's no need for it. */ return result; } /* choose new subplan; if none, we're done */ if (!node->choose_next_subplan(node)) return ExecClearTuple(node->ps.ps_ResultTupleSlot); } } /* ---------------------------------------------------------------- * ExecEndAppend * * Shuts down the subscans of the append node. * * Returns nothing of interest. * ---------------------------------------------------------------- */ void ExecEndAppend(AppendState *node) { PlanState **appendplans; int nplans; int i; /* * get information from the node */ appendplans = node->appendplans; nplans = node->as_nplans; /* * shut down each of the subscans */ for (i = 0; i < nplans; i++) ExecEndNode(appendplans[i]); } void ExecReScanAppend(AppendState *node) { int i; for (i = 0; i < node->as_nplans; i++) { PlanState *subnode = node->appendplans[i]; /* * ExecReScan doesn't know about my subplans, so I have to do * changed-parameter signaling myself. */ if (node->ps.chgParam != NULL) UpdateChangedParamSet(subnode, node->ps.chgParam); /* * If chgParam of subnode is not null then plan will be re-scanned by * first ExecProcNode. */ if (subnode->chgParam == NULL) ExecReScan(subnode); } node->as_whichplan = node->ps.plan->parallel_aware ? INVALID_SUBPLAN_INDEX : 0; } /* ---------------------------------------------------------------- * Parallel Append Support * ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- * ExecAppendEstimate * * Compute the amount of space we'll need in the parallel * query DSM, and inform pcxt->estimator about our needs. * ---------------------------------------------------------------- */ void ExecAppendEstimate(AppendState *node, ParallelContext *pcxt) { node->pstate_len = add_size(offsetof(ParallelAppendState, pa_finished), sizeof(bool) * node->as_nplans); shm_toc_estimate_chunk(&pcxt->estimator, node->pstate_len); shm_toc_estimate_keys(&pcxt->estimator, 1); } /* ---------------------------------------------------------------- * ExecAppendInitializeDSM * * Set up shared state for Parallel Append. * ---------------------------------------------------------------- */ void ExecAppendInitializeDSM(AppendState *node, ParallelContext *pcxt) { ParallelAppendState *pstate; pstate = shm_toc_allocate(pcxt->toc, node->pstate_len); memset(pstate, 0, node->pstate_len); LWLockInitialize(&pstate->pa_lock, LWTRANCHE_PARALLEL_APPEND); shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate); node->as_pstate = pstate; node->choose_next_subplan = choose_next_subplan_for_leader; } /* ---------------------------------------------------------------- * ExecAppendReInitializeDSM * * Reset shared state before beginning a fresh scan. * ---------------------------------------------------------------- */ void ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt) { ParallelAppendState *pstate = node->as_pstate; pstate->pa_next_plan = 0; memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans); } /* ---------------------------------------------------------------- * ExecAppendInitializeWorker * * Copy relevant information from TOC into planstate, and initialize * whatever is required to choose and execute the optimal subplan. * ---------------------------------------------------------------- */ void ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt) { node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false); node->choose_next_subplan = choose_next_subplan_for_worker; } /* ---------------------------------------------------------------- * choose_next_subplan_locally * * Choose next subplan for a non-parallel-aware Append, * returning false if there are no more. * ---------------------------------------------------------------- */ static bool choose_next_subplan_locally(AppendState *node) { int whichplan = node->as_whichplan; /* We should never see INVALID_SUBPLAN_INDEX in this case. */ Assert(whichplan >= 0 && whichplan <= node->as_nplans); if (ScanDirectionIsForward(node->ps.state->es_direction)) { if (whichplan >= node->as_nplans - 1) return false; node->as_whichplan++; } else { if (whichplan <= 0) return false; node->as_whichplan--; } return true; } /* ---------------------------------------------------------------- * choose_next_subplan_for_leader * * Try to pick a plan which doesn't commit us to doing much * work locally, so that as much work as possible is done in * the workers. Cheapest subplans are at the end. * ---------------------------------------------------------------- */ static bool choose_next_subplan_for_leader(AppendState *node) { ParallelAppendState *pstate = node->as_pstate; Append *append = (Append *) node->ps.plan; /* Backward scan is not supported by parallel-aware plans */ Assert(ScanDirectionIsForward(node->ps.state->es_direction)); LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE); if (node->as_whichplan != INVALID_SUBPLAN_INDEX) { /* Mark just-completed subplan as finished. */ node->as_pstate->pa_finished[node->as_whichplan] = true; } else { /* Start with last subplan. */ node->as_whichplan = node->as_nplans - 1; } /* Loop until we find a subplan to execute. */ while (pstate->pa_finished[node->as_whichplan]) { if (node->as_whichplan == 0) { pstate->pa_next_plan = INVALID_SUBPLAN_INDEX; node->as_whichplan = INVALID_SUBPLAN_INDEX; LWLockRelease(&pstate->pa_lock); return false; } node->as_whichplan--; } /* If non-partial, immediately mark as finished. */ if (node->as_whichplan < append->first_partial_plan) node->as_pstate->pa_finished[node->as_whichplan] = true; LWLockRelease(&pstate->pa_lock); return true; } /* ---------------------------------------------------------------- * choose_next_subplan_for_worker * * Choose next subplan for a parallel-aware Append, returning * false if there are no more. * * We start from the first plan and advance through the list; * when we get back to the end, we loop back to the first * nonpartial plan. This assigns the non-partial plans first * in order of descending cost and then spreads out the * workers as evenly as possible across the remaining partial * plans. * ---------------------------------------------------------------- */ static bool choose_next_subplan_for_worker(AppendState *node) { ParallelAppendState *pstate = node->as_pstate; Append *append = (Append *) node->ps.plan; /* Backward scan is not supported by parallel-aware plans */ Assert(ScanDirectionIsForward(node->ps.state->es_direction)); LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE); /* Mark just-completed subplan as finished. */ if (node->as_whichplan != INVALID_SUBPLAN_INDEX) node->as_pstate->pa_finished[node->as_whichplan] = true; /* If all the plans are already done, we have nothing to do */ if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX) { LWLockRelease(&pstate->pa_lock); return false; } /* Loop until we find a subplan to execute. */ while (pstate->pa_finished[pstate->pa_next_plan]) { if (pstate->pa_next_plan < node->as_nplans - 1) { /* Advance to next plan. */ pstate->pa_next_plan++; } else if (append->first_partial_plan < node->as_nplans) { /* Loop back to first partial plan. */ pstate->pa_next_plan = append->first_partial_plan; } else { /* At last plan, no partial plans, arrange to bail out. */ pstate->pa_next_plan = node->as_whichplan; } if (pstate->pa_next_plan == node->as_whichplan) { /* We've tried everything! */ pstate->pa_next_plan = INVALID_SUBPLAN_INDEX; LWLockRelease(&pstate->pa_lock); return false; } } /* Pick the plan we found, and advance pa_next_plan one more time. */ node->as_whichplan = pstate->pa_next_plan++; if (pstate->pa_next_plan >= node->as_nplans) { if (append->first_partial_plan < node->as_nplans) pstate->pa_next_plan = append->first_partial_plan; else { /* * We have only non-partial plans, and we already chose the last * one; so arrange for the other workers to immediately bail out. */ pstate->pa_next_plan = INVALID_SUBPLAN_INDEX; } } /* If non-partial, immediately mark as finished. */ if (node->as_whichplan < append->first_partial_plan) node->as_pstate->pa_finished[node->as_whichplan] = true; LWLockRelease(&pstate->pa_lock); return true; }