2017-03-09 13:40:36 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* nodeGatherMerge.c
|
|
|
|
* Scan a plan in multiple workers, and do order-preserving merge.
|
|
|
|
*
|
|
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/executor/nodeGatherMerge.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "access/relscan.h"
|
|
|
|
#include "access/xact.h"
|
|
|
|
#include "executor/execdebug.h"
|
|
|
|
#include "executor/execParallel.h"
|
|
|
|
#include "executor/nodeGatherMerge.h"
|
|
|
|
#include "executor/nodeSubplan.h"
|
|
|
|
#include "executor/tqueue.h"
|
|
|
|
#include "lib/binaryheap.h"
|
|
|
|
#include "miscadmin.h"
|
2017-11-15 14:17:29 +01:00
|
|
|
#include "optimizer/planmain.h"
|
2017-03-09 13:40:36 +01:00
|
|
|
#include "utils/memutils.h"
|
|
|
|
#include "utils/rel.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When we read tuples from workers, it's a good idea to read several at once
|
|
|
|
* for efficiency when possible: this minimizes context-switching overhead.
|
|
|
|
* But reading too many at a time wastes memory without improving performance.
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
* We'll read up to MAX_TUPLE_STORE tuples (in addition to the first one).
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
#define MAX_TUPLE_STORE 10
|
|
|
|
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
/*
|
|
|
|
* Pending-tuple array for each worker. This holds additional tuples that
|
|
|
|
* we were able to fetch from the worker, but can't process yet. In addition,
|
|
|
|
* this struct holds the "done" flag indicating the worker is known to have
|
|
|
|
* no more tuples. (We do not use this struct for the leader; we don't keep
|
|
|
|
* any pending tuples for the leader, and the need_to_scan_locally flag serves
|
|
|
|
* as its "done" indicator.)
|
|
|
|
*/
|
|
|
|
typedef struct GMReaderTupleBuffer
|
|
|
|
{
|
|
|
|
HeapTuple *tuple; /* array of length MAX_TUPLE_STORE */
|
|
|
|
int nTuples; /* number of tuples currently stored */
|
|
|
|
int readCounter; /* index of next tuple to extract */
|
|
|
|
bool done; /* true if reader is known exhausted */
|
|
|
|
} GMReaderTupleBuffer;
|
|
|
|
|
2017-07-17 09:33:49 +02:00
|
|
|
static TupleTableSlot *ExecGatherMerge(PlanState *pstate);
|
2017-03-09 13:40:36 +01:00
|
|
|
static int32 heap_compare_slots(Datum a, Datum b, void *arg);
|
|
|
|
static TupleTableSlot *gather_merge_getnext(GatherMergeState *gm_state);
|
|
|
|
static HeapTuple gm_readnext_tuple(GatherMergeState *gm_state, int nreader,
|
|
|
|
bool nowait, bool *done);
|
|
|
|
static void ExecShutdownGatherMergeWorkers(GatherMergeState *node);
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
static void gather_merge_setup(GatherMergeState *gm_state);
|
|
|
|
static void gather_merge_init(GatherMergeState *gm_state);
|
|
|
|
static void gather_merge_clear_tuples(GatherMergeState *gm_state);
|
2017-03-09 13:40:36 +01:00
|
|
|
static bool gather_merge_readnext(GatherMergeState *gm_state, int reader,
|
|
|
|
bool nowait);
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
static void load_tuple_array(GatherMergeState *gm_state, int reader);
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecInitGather
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
GatherMergeState *
|
|
|
|
ExecInitGatherMerge(GatherMerge *node, EState *estate, int eflags)
|
|
|
|
{
|
|
|
|
GatherMergeState *gm_state;
|
|
|
|
Plan *outerNode;
|
|
|
|
bool hasoid;
|
|
|
|
TupleDesc tupDesc;
|
|
|
|
|
|
|
|
/* Gather merge node doesn't have innerPlan node. */
|
|
|
|
Assert(innerPlan(node) == NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* create state structure
|
|
|
|
*/
|
|
|
|
gm_state = makeNode(GatherMergeState);
|
|
|
|
gm_state->ps.plan = (Plan *) node;
|
|
|
|
gm_state->ps.state = estate;
|
2017-07-17 09:33:49 +02:00
|
|
|
gm_state->ps.ExecProcNode = ExecGatherMerge;
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
|
|
|
|
gm_state->initialized = false;
|
|
|
|
gm_state->gm_initialized = false;
|
2017-08-29 19:12:23 +02:00
|
|
|
gm_state->tuples_needed = -1;
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Miscellaneous initialization
|
|
|
|
*
|
|
|
|
* create expression context for node
|
|
|
|
*/
|
|
|
|
ExecAssignExprContext(estate, &gm_state->ps);
|
|
|
|
|
|
|
|
/*
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
* GatherMerge doesn't support checking a qual (it's always more efficient
|
|
|
|
* to do it in the child node).
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
Assert(!node->plan.qual);
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* tuple table initialization
|
|
|
|
*/
|
|
|
|
ExecInitResultTupleSlot(estate, &gm_state->ps);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* now initialize outer plan
|
|
|
|
*/
|
|
|
|
outerNode = outerPlan(node);
|
|
|
|
outerPlanState(gm_state) = ExecInitNode(outerNode, estate, eflags);
|
|
|
|
|
2017-11-25 16:49:17 +01:00
|
|
|
/*
|
|
|
|
* Store the tuple descriptor into gather merge state, so we can use it
|
|
|
|
* while initializing the gather merge slots.
|
|
|
|
*/
|
|
|
|
if (!ExecContextForcesOids(outerPlanState(gm_state), &hasoid))
|
|
|
|
hasoid = false;
|
|
|
|
tupDesc = ExecTypeFromTL(outerNode->targetlist, hasoid);
|
|
|
|
gm_state->tupDesc = tupDesc;
|
|
|
|
|
2017-03-09 13:40:36 +01:00
|
|
|
/*
|
|
|
|
* Initialize result tuple type and projection info.
|
|
|
|
*/
|
|
|
|
ExecAssignResultTypeFromTL(&gm_state->ps);
|
2017-11-25 16:49:17 +01:00
|
|
|
ExecConditionalAssignProjectionInfo(&gm_state->ps, tupDesc, OUTER_VAR);
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* initialize sort-key information
|
|
|
|
*/
|
|
|
|
if (node->numCols)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
gm_state->gm_nkeys = node->numCols;
|
|
|
|
gm_state->gm_sortkeys =
|
|
|
|
palloc0(sizeof(SortSupportData) * node->numCols);
|
|
|
|
|
|
|
|
for (i = 0; i < node->numCols; i++)
|
|
|
|
{
|
|
|
|
SortSupport sortKey = gm_state->gm_sortkeys + i;
|
|
|
|
|
|
|
|
sortKey->ssup_cxt = CurrentMemoryContext;
|
|
|
|
sortKey->ssup_collation = node->collations[i];
|
|
|
|
sortKey->ssup_nulls_first = node->nullsFirst[i];
|
|
|
|
sortKey->ssup_attno = node->sortColIdx[i];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't perform abbreviated key conversion here, for the same
|
|
|
|
* reasons that it isn't used in MergeAppend
|
|
|
|
*/
|
|
|
|
sortKey->abbreviate = false;
|
|
|
|
|
|
|
|
PrepareSortSupportFromOrderingOp(node->sortOperators[i], sortKey);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
/* Now allocate the workspace for gather merge */
|
|
|
|
gather_merge_setup(gm_state);
|
|
|
|
|
2017-03-09 13:40:36 +01:00
|
|
|
return gm_state;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecGatherMerge(node)
|
|
|
|
*
|
|
|
|
* Scans the relation via multiple workers and returns
|
|
|
|
* the next qualifying tuple.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
2017-07-17 09:33:49 +02:00
|
|
|
static TupleTableSlot *
|
|
|
|
ExecGatherMerge(PlanState *pstate)
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
2017-07-17 09:33:49 +02:00
|
|
|
GatherMergeState *node = castNode(GatherMergeState, pstate);
|
2017-03-09 13:40:36 +01:00
|
|
|
TupleTableSlot *slot;
|
|
|
|
ExprContext *econtext;
|
|
|
|
|
2017-07-26 02:37:17 +02:00
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
2017-03-09 13:40:36 +01:00
|
|
|
/*
|
|
|
|
* As with Gather, we don't launch workers until this node is actually
|
|
|
|
* executed.
|
|
|
|
*/
|
|
|
|
if (!node->initialized)
|
|
|
|
{
|
|
|
|
EState *estate = node->ps.state;
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
GatherMerge *gm = castNode(GatherMerge, node->ps.plan);
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Sometimes we might have to run without parallelism; but if parallel
|
|
|
|
* mode is active then we can try to fire up some workers.
|
|
|
|
*/
|
2017-10-27 16:04:01 +02:00
|
|
|
if (gm->num_workers > 0 && estate->es_use_parallel_mode)
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
|
|
|
ParallelContext *pcxt;
|
|
|
|
|
Separate reinitialization of shared parallel-scan state from ExecReScan.
Previously, the parallel executor logic did reinitialization of shared
state within the ExecReScan code for parallel-aware scan nodes. This is
problematic, because it means that the ExecReScan call has to occur
synchronously (ie, during the parent Gather node's ReScan call). That is
swimming very much against the tide so far as the ExecReScan machinery is
concerned; the fact that it works at all today depends on a lot of fragile
assumptions, such as that no plan node between Gather and a parallel-aware
scan node is parameterized. Another objection is that because ExecReScan
might be called in workers as well as the leader, hacky extra tests are
needed in some places to prevent unwanted shared-state resets.
Hence, let's separate this code into two functions, a ReInitializeDSM
call and the ReScan call proper. ReInitializeDSM is called only in
the leader and is guaranteed to run before we start new workers.
ReScan is returned to its traditional function of resetting only local
state, which means that ExecReScan's usual habits of delaying or
eliminating child rescan calls are safe again.
As with the preceding commit 7df2c1f8d, it doesn't seem to be necessary
to make these changes in 9.6, which is a good thing because the FDW and
CustomScan APIs are impacted.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 19:18:16 +02:00
|
|
|
/* Initialize, or re-initialize, shared state needed by workers. */
|
2017-03-09 13:40:36 +01:00
|
|
|
if (!node->pei)
|
|
|
|
node->pei = ExecInitParallelPlan(node->ps.lefttree,
|
|
|
|
estate,
|
2017-11-16 18:06:14 +01:00
|
|
|
gm->initParam,
|
2017-08-29 19:12:23 +02:00
|
|
|
gm->num_workers,
|
|
|
|
node->tuples_needed);
|
Separate reinitialization of shared parallel-scan state from ExecReScan.
Previously, the parallel executor logic did reinitialization of shared
state within the ExecReScan code for parallel-aware scan nodes. This is
problematic, because it means that the ExecReScan call has to occur
synchronously (ie, during the parent Gather node's ReScan call). That is
swimming very much against the tide so far as the ExecReScan machinery is
concerned; the fact that it works at all today depends on a lot of fragile
assumptions, such as that no plan node between Gather and a parallel-aware
scan node is parameterized. Another objection is that because ExecReScan
might be called in workers as well as the leader, hacky extra tests are
needed in some places to prevent unwanted shared-state resets.
Hence, let's separate this code into two functions, a ReInitializeDSM
call and the ReScan call proper. ReInitializeDSM is called only in
the leader and is guaranteed to run before we start new workers.
ReScan is returned to its traditional function of resetting only local
state, which means that ExecReScan's usual habits of delaying or
eliminating child rescan calls are safe again.
As with the preceding commit 7df2c1f8d, it doesn't seem to be necessary
to make these changes in 9.6, which is a good thing because the FDW and
CustomScan APIs are impacted.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 19:18:16 +02:00
|
|
|
else
|
|
|
|
ExecParallelReinitialize(node->ps.lefttree,
|
2017-11-16 18:06:14 +01:00
|
|
|
node->pei,
|
|
|
|
gm->initParam);
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/* Try to launch workers. */
|
|
|
|
pcxt = node->pei->pcxt;
|
|
|
|
LaunchParallelWorkers(pcxt);
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
/* We save # workers launched for the benefit of EXPLAIN */
|
2017-03-09 13:40:36 +01:00
|
|
|
node->nworkers_launched = pcxt->nworkers_launched;
|
|
|
|
|
|
|
|
/* Set up tuple queue readers to read the results. */
|
|
|
|
if (pcxt->nworkers_launched > 0)
|
|
|
|
{
|
2017-09-15 04:59:02 +02:00
|
|
|
ExecParallelCreateReaders(node->pei);
|
2017-09-01 23:38:54 +02:00
|
|
|
/* Make a working array showing the active readers */
|
|
|
|
node->nreaders = pcxt->nworkers_launched;
|
|
|
|
node->reader = (TupleQueueReader **)
|
|
|
|
palloc(node->nreaders * sizeof(TupleQueueReader *));
|
|
|
|
memcpy(node->reader, node->pei->reader,
|
|
|
|
node->nreaders * sizeof(TupleQueueReader *));
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* No workers? Then never mind. */
|
2017-09-01 23:38:54 +02:00
|
|
|
node->nreaders = 0;
|
|
|
|
node->reader = NULL;
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-15 14:17:29 +01:00
|
|
|
/* allow leader to participate if enabled or no choice */
|
|
|
|
if (parallel_leader_participation || node->nreaders == 0)
|
|
|
|
node->need_to_scan_locally = true;
|
2017-03-09 13:40:36 +01:00
|
|
|
node->initialized = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reset per-tuple memory context to free any expression evaluation
|
|
|
|
* storage allocated in the previous tuple cycle.
|
|
|
|
*/
|
|
|
|
econtext = node->ps.ps_ExprContext;
|
|
|
|
ResetExprContext(econtext);
|
|
|
|
|
|
|
|
/*
|
2017-05-17 22:31:56 +02:00
|
|
|
* Get next tuple, either from one of our workers, or by running the plan
|
|
|
|
* ourselves.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
slot = gather_merge_getnext(node);
|
|
|
|
if (TupIsNull(slot))
|
|
|
|
return NULL;
|
|
|
|
|
2017-11-25 16:49:17 +01:00
|
|
|
/* If no projection is required, we're done. */
|
|
|
|
if (node->ps.ps_ProjInfo == NULL)
|
|
|
|
return slot;
|
|
|
|
|
2017-03-09 13:40:36 +01:00
|
|
|
/*
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
* Form the result tuple using ExecProject(), and return it.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
econtext->ecxt_outertuple = slot;
|
|
|
|
return ExecProject(node->ps.ps_ProjInfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecEndGatherMerge
|
|
|
|
*
|
|
|
|
* frees any storage allocated through C routines.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecEndGatherMerge(GatherMergeState *node)
|
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
ExecEndNode(outerPlanState(node)); /* let children clean up first */
|
2017-03-09 13:40:36 +01:00
|
|
|
ExecShutdownGatherMerge(node);
|
|
|
|
ExecFreeExprContext(&node->ps);
|
|
|
|
ExecClearTuple(node->ps.ps_ResultTupleSlot);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecShutdownGatherMerge
|
|
|
|
*
|
|
|
|
* Destroy the setup for parallel workers including parallel context.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecShutdownGatherMerge(GatherMergeState *node)
|
|
|
|
{
|
|
|
|
ExecShutdownGatherMergeWorkers(node);
|
|
|
|
|
|
|
|
/* Now destroy the parallel context. */
|
|
|
|
if (node->pei != NULL)
|
|
|
|
{
|
|
|
|
ExecParallelCleanup(node->pei);
|
|
|
|
node->pei = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecShutdownGatherMergeWorkers
|
|
|
|
*
|
2017-09-01 23:38:54 +02:00
|
|
|
* Stop all the parallel workers.
|
2017-03-09 13:40:36 +01:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ExecShutdownGatherMergeWorkers(GatherMergeState *node)
|
|
|
|
{
|
|
|
|
if (node->pei != NULL)
|
|
|
|
ExecParallelFinish(node->pei);
|
2017-09-01 23:38:54 +02:00
|
|
|
|
|
|
|
/* Flush local copy of reader array */
|
|
|
|
if (node->reader)
|
|
|
|
pfree(node->reader);
|
|
|
|
node->reader = NULL;
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecReScanGatherMerge
|
|
|
|
*
|
Separate reinitialization of shared parallel-scan state from ExecReScan.
Previously, the parallel executor logic did reinitialization of shared
state within the ExecReScan code for parallel-aware scan nodes. This is
problematic, because it means that the ExecReScan call has to occur
synchronously (ie, during the parent Gather node's ReScan call). That is
swimming very much against the tide so far as the ExecReScan machinery is
concerned; the fact that it works at all today depends on a lot of fragile
assumptions, such as that no plan node between Gather and a parallel-aware
scan node is parameterized. Another objection is that because ExecReScan
might be called in workers as well as the leader, hacky extra tests are
needed in some places to prevent unwanted shared-state resets.
Hence, let's separate this code into two functions, a ReInitializeDSM
call and the ReScan call proper. ReInitializeDSM is called only in
the leader and is guaranteed to run before we start new workers.
ReScan is returned to its traditional function of resetting only local
state, which means that ExecReScan's usual habits of delaying or
eliminating child rescan calls are safe again.
As with the preceding commit 7df2c1f8d, it doesn't seem to be necessary
to make these changes in 9.6, which is a good thing because the FDW and
CustomScan APIs are impacted.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 19:18:16 +02:00
|
|
|
* Prepare to re-scan the result of a GatherMerge.
|
2017-03-09 13:40:36 +01:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecReScanGatherMerge(GatherMergeState *node)
|
|
|
|
{
|
Force rescanning of parallel-aware scan nodes below a Gather[Merge].
The ExecReScan machinery contains various optimizations for postponing
or skipping rescans of plan subtrees; for example a HashAgg node may
conclude that it can re-use the table it built before, instead of
re-reading its input subtree. But that is wrong if the input contains
a parallel-aware table scan node, since the portion of the table scanned
by the leader process is likely to vary from one rescan to the next.
This explains the timing-dependent buildfarm failures we saw after
commit a2b70c89c.
The established mechanism for showing that a plan node's output is
potentially variable is to mark it as depending on some runtime Param.
Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC
parameter number, but carries no actual value) associated with each Gather
or GatherMerge node, mark parallel-aware nodes below that node as dependent
on that Param, and arrange for ExecReScanGather[Merge] to flag that Param
as changed whenever the Gather[Merge] node is rescanned.
This solution breaks an undocumented assumption made by the parallel
executor logic, namely that all rescans of nodes below a Gather[Merge]
will happen synchronously during the ReScan of the top node itself.
But that's fundamentally contrary to the design of the ExecReScan code,
and so was doomed to fail someday anyway (even if you want to argue
that the bug being fixed here wasn't a failure of that assumption).
A follow-on patch will address that issue. In the meantime, the worst
that's expected to happen is that given very bad timing luck, the leader
might have to do all the work during a rescan, because workers think
they have nothing to do, if they are able to start up before the eventual
ReScan of the leader's parallel-aware table scan node has reset the
shared scan state.
Although this problem exists in 9.6, there does not seem to be any way
for it to manifest there. Without GatherMerge, it seems that a plan tree
that has a rescan-short-circuiting node below Gather will always also
have one above it that will short-circuit in the same cases, preventing
the Gather from being rescanned. Hence we won't take the risk of
back-patching this change into 9.6. But v10 needs it.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
|
|
|
GatherMerge *gm = (GatherMerge *) node->ps.plan;
|
|
|
|
PlanState *outerPlan = outerPlanState(node);
|
|
|
|
|
Separate reinitialization of shared parallel-scan state from ExecReScan.
Previously, the parallel executor logic did reinitialization of shared
state within the ExecReScan code for parallel-aware scan nodes. This is
problematic, because it means that the ExecReScan call has to occur
synchronously (ie, during the parent Gather node's ReScan call). That is
swimming very much against the tide so far as the ExecReScan machinery is
concerned; the fact that it works at all today depends on a lot of fragile
assumptions, such as that no plan node between Gather and a parallel-aware
scan node is parameterized. Another objection is that because ExecReScan
might be called in workers as well as the leader, hacky extra tests are
needed in some places to prevent unwanted shared-state resets.
Hence, let's separate this code into two functions, a ReInitializeDSM
call and the ReScan call proper. ReInitializeDSM is called only in
the leader and is guaranteed to run before we start new workers.
ReScan is returned to its traditional function of resetting only local
state, which means that ExecReScan's usual habits of delaying or
eliminating child rescan calls are safe again.
As with the preceding commit 7df2c1f8d, it doesn't seem to be necessary
to make these changes in 9.6, which is a good thing because the FDW and
CustomScan APIs are impacted.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 19:18:16 +02:00
|
|
|
/* Make sure any existing workers are gracefully shut down */
|
2017-03-09 13:40:36 +01:00
|
|
|
ExecShutdownGatherMergeWorkers(node);
|
|
|
|
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
/* Free any unused tuples, so we don't leak memory across rescans */
|
|
|
|
gather_merge_clear_tuples(node);
|
|
|
|
|
Separate reinitialization of shared parallel-scan state from ExecReScan.
Previously, the parallel executor logic did reinitialization of shared
state within the ExecReScan code for parallel-aware scan nodes. This is
problematic, because it means that the ExecReScan call has to occur
synchronously (ie, during the parent Gather node's ReScan call). That is
swimming very much against the tide so far as the ExecReScan machinery is
concerned; the fact that it works at all today depends on a lot of fragile
assumptions, such as that no plan node between Gather and a parallel-aware
scan node is parameterized. Another objection is that because ExecReScan
might be called in workers as well as the leader, hacky extra tests are
needed in some places to prevent unwanted shared-state resets.
Hence, let's separate this code into two functions, a ReInitializeDSM
call and the ReScan call proper. ReInitializeDSM is called only in
the leader and is guaranteed to run before we start new workers.
ReScan is returned to its traditional function of resetting only local
state, which means that ExecReScan's usual habits of delaying or
eliminating child rescan calls are safe again.
As with the preceding commit 7df2c1f8d, it doesn't seem to be necessary
to make these changes in 9.6, which is a good thing because the FDW and
CustomScan APIs are impacted.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 19:18:16 +02:00
|
|
|
/* Mark node so that shared state will be rebuilt at next call */
|
2017-03-09 13:40:36 +01:00
|
|
|
node->initialized = false;
|
2017-08-17 19:49:22 +02:00
|
|
|
node->gm_initialized = false;
|
2017-03-09 13:40:36 +01:00
|
|
|
|
Force rescanning of parallel-aware scan nodes below a Gather[Merge].
The ExecReScan machinery contains various optimizations for postponing
or skipping rescans of plan subtrees; for example a HashAgg node may
conclude that it can re-use the table it built before, instead of
re-reading its input subtree. But that is wrong if the input contains
a parallel-aware table scan node, since the portion of the table scanned
by the leader process is likely to vary from one rescan to the next.
This explains the timing-dependent buildfarm failures we saw after
commit a2b70c89c.
The established mechanism for showing that a plan node's output is
potentially variable is to mark it as depending on some runtime Param.
Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC
parameter number, but carries no actual value) associated with each Gather
or GatherMerge node, mark parallel-aware nodes below that node as dependent
on that Param, and arrange for ExecReScanGather[Merge] to flag that Param
as changed whenever the Gather[Merge] node is rescanned.
This solution breaks an undocumented assumption made by the parallel
executor logic, namely that all rescans of nodes below a Gather[Merge]
will happen synchronously during the ReScan of the top node itself.
But that's fundamentally contrary to the design of the ExecReScan code,
and so was doomed to fail someday anyway (even if you want to argue
that the bug being fixed here wasn't a failure of that assumption).
A follow-on patch will address that issue. In the meantime, the worst
that's expected to happen is that given very bad timing luck, the leader
might have to do all the work during a rescan, because workers think
they have nothing to do, if they are able to start up before the eventual
ReScan of the leader's parallel-aware table scan node has reset the
shared scan state.
Although this problem exists in 9.6, there does not seem to be any way
for it to manifest there. Without GatherMerge, it seems that a plan tree
that has a rescan-short-circuiting node below Gather will always also
have one above it that will short-circuit in the same cases, preventing
the Gather from being rescanned. Hence we won't take the risk of
back-patching this change into 9.6. But v10 needs it.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
|
|
|
/*
|
|
|
|
* Set child node's chgParam to tell it that the next scan might deliver a
|
|
|
|
* different set of rows within the leader process. (The overall rowset
|
|
|
|
* shouldn't change, but the leader process's subset might; hence nodes
|
|
|
|
* between here and the parallel table scan node mustn't optimize on the
|
|
|
|
* assumption of an unchanging rowset.)
|
|
|
|
*/
|
|
|
|
if (gm->rescan_param >= 0)
|
|
|
|
outerPlan->chgParam = bms_add_member(outerPlan->chgParam,
|
|
|
|
gm->rescan_param);
|
|
|
|
|
|
|
|
/*
|
Separate reinitialization of shared parallel-scan state from ExecReScan.
Previously, the parallel executor logic did reinitialization of shared
state within the ExecReScan code for parallel-aware scan nodes. This is
problematic, because it means that the ExecReScan call has to occur
synchronously (ie, during the parent Gather node's ReScan call). That is
swimming very much against the tide so far as the ExecReScan machinery is
concerned; the fact that it works at all today depends on a lot of fragile
assumptions, such as that no plan node between Gather and a parallel-aware
scan node is parameterized. Another objection is that because ExecReScan
might be called in workers as well as the leader, hacky extra tests are
needed in some places to prevent unwanted shared-state resets.
Hence, let's separate this code into two functions, a ReInitializeDSM
call and the ReScan call proper. ReInitializeDSM is called only in
the leader and is guaranteed to run before we start new workers.
ReScan is returned to its traditional function of resetting only local
state, which means that ExecReScan's usual habits of delaying or
eliminating child rescan calls are safe again.
As with the preceding commit 7df2c1f8d, it doesn't seem to be necessary
to make these changes in 9.6, which is a good thing because the FDW and
CustomScan APIs are impacted.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 19:18:16 +02:00
|
|
|
* If chgParam of subnode is not null then plan will be re-scanned by
|
|
|
|
* first ExecProcNode. Note: because this does nothing if we have a
|
|
|
|
* rescan_param, it's currently guaranteed that parallel-aware child nodes
|
|
|
|
* will not see a ReScan call until after they get a ReInitializeDSM call.
|
|
|
|
* That ordering might not be something to rely on, though. A good rule
|
|
|
|
* of thumb is that ReInitializeDSM should reset only shared state, ReScan
|
|
|
|
* should reset only local state, and anything that depends on both of
|
|
|
|
* those steps being finished must wait until the first ExecProcNode call.
|
Force rescanning of parallel-aware scan nodes below a Gather[Merge].
The ExecReScan machinery contains various optimizations for postponing
or skipping rescans of plan subtrees; for example a HashAgg node may
conclude that it can re-use the table it built before, instead of
re-reading its input subtree. But that is wrong if the input contains
a parallel-aware table scan node, since the portion of the table scanned
by the leader process is likely to vary from one rescan to the next.
This explains the timing-dependent buildfarm failures we saw after
commit a2b70c89c.
The established mechanism for showing that a plan node's output is
potentially variable is to mark it as depending on some runtime Param.
Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC
parameter number, but carries no actual value) associated with each Gather
or GatherMerge node, mark parallel-aware nodes below that node as dependent
on that Param, and arrange for ExecReScanGather[Merge] to flag that Param
as changed whenever the Gather[Merge] node is rescanned.
This solution breaks an undocumented assumption made by the parallel
executor logic, namely that all rescans of nodes below a Gather[Merge]
will happen synchronously during the ReScan of the top node itself.
But that's fundamentally contrary to the design of the ExecReScan code,
and so was doomed to fail someday anyway (even if you want to argue
that the bug being fixed here wasn't a failure of that assumption).
A follow-on patch will address that issue. In the meantime, the worst
that's expected to happen is that given very bad timing luck, the leader
might have to do all the work during a rescan, because workers think
they have nothing to do, if they are able to start up before the eventual
ReScan of the leader's parallel-aware table scan node has reset the
shared scan state.
Although this problem exists in 9.6, there does not seem to be any way
for it to manifest there. Without GatherMerge, it seems that a plan tree
that has a rescan-short-circuiting node below Gather will always also
have one above it that will short-circuit in the same cases, preventing
the Gather from being rescanned. Hence we won't take the risk of
back-patching this change into 9.6. But v10 needs it.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
|
|
|
*/
|
|
|
|
if (outerPlan->chgParam == NULL)
|
|
|
|
ExecReScan(outerPlan);
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
* Set up the data structures that we'll need for Gather Merge.
|
|
|
|
*
|
|
|
|
* We allocate these once on the basis of gm->num_workers, which is an
|
|
|
|
* upper bound for the number of workers we'll actually have. During
|
|
|
|
* a rescan, we reset the structures to empty. This approach simplifies
|
|
|
|
* not leaking memory across rescans.
|
2017-03-09 13:40:36 +01:00
|
|
|
*
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
* In the gm_slots[] array, index 0 is for the leader, and indexes 1 to n
|
|
|
|
* are for workers. The values placed into gm_heap correspond to indexes
|
|
|
|
* in gm_slots[]. The gm_tuple_buffers[] array, however, is indexed from
|
|
|
|
* 0 to n-1; it has no entry for the leader.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
static void
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
gather_merge_setup(GatherMergeState *gm_state)
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
GatherMerge *gm = castNode(GatherMerge, gm_state->ps.plan);
|
|
|
|
int nreaders = gm->num_workers;
|
2017-03-09 13:40:36 +01:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
* Allocate gm_slots for the number of workers + one more slot for leader.
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
* Slot 0 is always for the leader. Leader always calls ExecProcNode() to
|
|
|
|
* read the tuple, and then stores it directly into its gm_slots entry.
|
|
|
|
* For other slots, code below will call ExecInitExtraTupleSlot() to
|
|
|
|
* create a slot for the worker's results. Note that during any single
|
|
|
|
* scan, we might have fewer than num_workers available workers, in which
|
|
|
|
* case the extra array entries go unused.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
gm_state->gm_slots = (TupleTableSlot **)
|
|
|
|
palloc0((nreaders + 1) * sizeof(TupleTableSlot *));
|
|
|
|
|
|
|
|
/* Allocate the tuple slot and tuple array for each worker */
|
|
|
|
gm_state->gm_tuple_buffers = (GMReaderTupleBuffer *)
|
|
|
|
palloc0(nreaders * sizeof(GMReaderTupleBuffer));
|
|
|
|
|
|
|
|
for (i = 0; i < nreaders; i++)
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
/* Allocate the tuple array with length MAX_TUPLE_STORE */
|
2017-03-09 13:40:36 +01:00
|
|
|
gm_state->gm_tuple_buffers[i].tuple =
|
|
|
|
(HeapTuple *) palloc0(sizeof(HeapTuple) * MAX_TUPLE_STORE);
|
|
|
|
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
/* Initialize tuple slot for worker */
|
|
|
|
gm_state->gm_slots[i + 1] = ExecInitExtraTupleSlot(gm_state->ps.state);
|
|
|
|
ExecSetSlotDescriptor(gm_state->gm_slots[i + 1],
|
2017-03-09 13:40:36 +01:00
|
|
|
gm_state->tupDesc);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate the resources for the merge */
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
gm_state->gm_heap = binaryheap_allocate(nreaders + 1,
|
2017-03-09 13:40:36 +01:00
|
|
|
heap_compare_slots,
|
|
|
|
gm_state);
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the Gather Merge.
|
|
|
|
*
|
|
|
|
* Reset data structures to ensure they're empty. Then pull at least one
|
|
|
|
* tuple from leader + each worker (or set its "done" indicator), and set up
|
|
|
|
* the heap.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
gather_merge_init(GatherMergeState *gm_state)
|
|
|
|
{
|
|
|
|
int nreaders = gm_state->nreaders;
|
|
|
|
bool nowait = true;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Assert that gather_merge_setup made enough space */
|
|
|
|
Assert(nreaders <= castNode(GatherMerge, gm_state->ps.plan)->num_workers);
|
|
|
|
|
|
|
|
/* Reset leader's tuple slot to empty */
|
|
|
|
gm_state->gm_slots[0] = NULL;
|
|
|
|
|
|
|
|
/* Reset the tuple slot and tuple array for each worker */
|
|
|
|
for (i = 0; i < nreaders; i++)
|
|
|
|
{
|
|
|
|
/* Reset tuple array to empty */
|
|
|
|
gm_state->gm_tuple_buffers[i].nTuples = 0;
|
|
|
|
gm_state->gm_tuple_buffers[i].readCounter = 0;
|
|
|
|
/* Reset done flag to not-done */
|
|
|
|
gm_state->gm_tuple_buffers[i].done = false;
|
|
|
|
/* Ensure output slot is empty */
|
|
|
|
ExecClearTuple(gm_state->gm_slots[i + 1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Reset binary heap to empty */
|
|
|
|
binaryheap_reset(gm_state->gm_heap);
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* First, try to read a tuple from each worker (including leader) in
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
* nowait mode. After this, if not all workers were able to produce a
|
|
|
|
* tuple (or a "done" indication), then re-read from remaining workers,
|
|
|
|
* this time using wait mode. Add all live readers (those producing at
|
|
|
|
* least one tuple) to the heap.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
reread:
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
for (i = 0; i <= nreaders; i++)
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
2017-07-26 02:37:17 +02:00
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
/* skip this source if already known done */
|
|
|
|
if ((i == 0) ? gm_state->need_to_scan_locally :
|
|
|
|
!gm_state->gm_tuple_buffers[i - 1].done)
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
if (TupIsNull(gm_state->gm_slots[i]))
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
/* Don't have a tuple yet, try to get one */
|
|
|
|
if (gather_merge_readnext(gm_state, i, nowait))
|
|
|
|
binaryheap_add_unordered(gm_state->gm_heap,
|
|
|
|
Int32GetDatum(i));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We already got at least one tuple from this worker, but
|
|
|
|
* might as well see if it has any more ready by now.
|
|
|
|
*/
|
|
|
|
load_tuple_array(gm_state, i);
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
/* need not recheck leader, since nowait doesn't matter for it */
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
for (i = 1; i <= nreaders; i++)
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
{
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
if (!gm_state->gm_tuple_buffers[i - 1].done &&
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
TupIsNull(gm_state->gm_slots[i]))
|
|
|
|
{
|
|
|
|
nowait = false;
|
2017-03-09 13:40:36 +01:00
|
|
|
goto reread;
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
}
|
|
|
|
}
|
2017-03-09 13:40:36 +01:00
|
|
|
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
/* Now heapify the heap. */
|
2017-03-09 13:40:36 +01:00
|
|
|
binaryheap_build(gm_state->gm_heap);
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
|
2017-03-09 13:40:36 +01:00
|
|
|
gm_state->gm_initialized = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
* Clear out the tuple table slot, and any unused pending tuples,
|
|
|
|
* for each gather merge input.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
2017-04-01 03:15:05 +02:00
|
|
|
static void
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
gather_merge_clear_tuples(GatherMergeState *gm_state)
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < gm_state->nreaders; i++)
|
|
|
|
{
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
GMReaderTupleBuffer *tuple_buffer = &gm_state->gm_tuple_buffers[i];
|
2017-03-09 13:40:36 +01:00
|
|
|
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
while (tuple_buffer->readCounter < tuple_buffer->nTuples)
|
|
|
|
heap_freetuple(tuple_buffer->tuple[tuple_buffer->readCounter++]);
|
|
|
|
|
|
|
|
ExecClearTuple(gm_state->gm_slots[i + 1]);
|
|
|
|
}
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read the next tuple for gather merge.
|
|
|
|
*
|
|
|
|
* Fetch the sorted tuple out of the heap.
|
|
|
|
*/
|
|
|
|
static TupleTableSlot *
|
|
|
|
gather_merge_getnext(GatherMergeState *gm_state)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2017-03-12 20:52:50 +01:00
|
|
|
if (!gm_state->gm_initialized)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* First time through: pull the first tuple from each participant, and
|
|
|
|
* set up the heap.
|
|
|
|
*/
|
2017-03-09 13:40:36 +01:00
|
|
|
gather_merge_init(gm_state);
|
2017-03-12 20:52:50 +01:00
|
|
|
}
|
2017-03-09 13:40:36 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Otherwise, pull the next tuple from whichever participant we
|
2017-03-12 20:52:50 +01:00
|
|
|
* returned from last time, and reinsert that participant's index into
|
|
|
|
* the heap, because it might now compare differently against the
|
|
|
|
* other elements of the heap.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
i = DatumGetInt32(binaryheap_first(gm_state->gm_heap));
|
|
|
|
|
|
|
|
if (gather_merge_readnext(gm_state, i, false))
|
|
|
|
binaryheap_replace_first(gm_state->gm_heap, Int32GetDatum(i));
|
|
|
|
else
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
{
|
|
|
|
/* reader exhausted, remove it from heap */
|
2017-03-09 13:40:36 +01:00
|
|
|
(void) binaryheap_remove_first(gm_state->gm_heap);
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
}
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (binaryheap_empty(gm_state->gm_heap))
|
|
|
|
{
|
|
|
|
/* All the queues are exhausted, and so is the heap */
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
gather_merge_clear_tuples(gm_state);
|
2017-04-01 03:15:05 +02:00
|
|
|
return NULL;
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2017-03-12 20:52:50 +01:00
|
|
|
/* Return next tuple from whichever participant has the leading one */
|
2017-03-09 13:40:36 +01:00
|
|
|
i = DatumGetInt32(binaryheap_first(gm_state->gm_heap));
|
|
|
|
return gm_state->gm_slots[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
* Read tuple(s) for given reader in nowait mode, and load into its tuple
|
|
|
|
* array, until we have MAX_TUPLE_STORE of them or would have to block.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
static void
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
load_tuple_array(GatherMergeState *gm_state, int reader)
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
GMReaderTupleBuffer *tuple_buffer;
|
2017-03-09 13:40:36 +01:00
|
|
|
int i;
|
|
|
|
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
/* Don't do anything if this is the leader. */
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
if (reader == 0)
|
2017-03-09 13:40:36 +01:00
|
|
|
return;
|
|
|
|
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
tuple_buffer = &gm_state->gm_tuple_buffers[reader - 1];
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
|
|
|
|
/* If there's nothing in the array, reset the counters to zero. */
|
2017-03-09 13:40:36 +01:00
|
|
|
if (tuple_buffer->nTuples == tuple_buffer->readCounter)
|
|
|
|
tuple_buffer->nTuples = tuple_buffer->readCounter = 0;
|
|
|
|
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
/* Try to fill additional slots in the array. */
|
2017-03-09 13:40:36 +01:00
|
|
|
for (i = tuple_buffer->nTuples; i < MAX_TUPLE_STORE; i++)
|
|
|
|
{
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
HeapTuple tuple;
|
|
|
|
|
|
|
|
tuple = gm_readnext_tuple(gm_state,
|
|
|
|
reader,
|
|
|
|
true,
|
|
|
|
&tuple_buffer->done);
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
2017-03-09 13:40:36 +01:00
|
|
|
break;
|
2017-12-04 16:33:09 +01:00
|
|
|
tuple_buffer->tuple[i] = tuple;
|
2017-03-09 13:40:36 +01:00
|
|
|
tuple_buffer->nTuples++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Store the next tuple for a given reader into the appropriate slot.
|
|
|
|
*
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
* Returns true if successful, false if not (either reader is exhausted,
|
|
|
|
* or we didn't want to wait for a tuple). Sets done flag if reader
|
|
|
|
* is found to be exhausted.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait)
|
|
|
|
{
|
|
|
|
GMReaderTupleBuffer *tuple_buffer;
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
HeapTuple tup;
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/*
|
2017-05-17 22:31:56 +02:00
|
|
|
* If we're being asked to generate a tuple from the leader, then we just
|
|
|
|
* call ExecProcNode as normal to produce one.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
if (reader == 0)
|
2017-03-09 13:40:36 +01:00
|
|
|
{
|
|
|
|
if (gm_state->need_to_scan_locally)
|
|
|
|
{
|
|
|
|
PlanState *outerPlan = outerPlanState(gm_state);
|
|
|
|
TupleTableSlot *outerTupleSlot;
|
2017-12-18 18:17:37 +01:00
|
|
|
EState *estate = gm_state->ps.state;
|
2017-03-09 13:40:36 +01:00
|
|
|
|
2017-12-18 18:17:37 +01:00
|
|
|
/* Install our DSA area while executing the plan. */
|
|
|
|
estate->es_query_dsa = gm_state->pei ? gm_state->pei->area : NULL;
|
2017-03-09 13:40:36 +01:00
|
|
|
outerTupleSlot = ExecProcNode(outerPlan);
|
2017-12-18 18:17:37 +01:00
|
|
|
estate->es_query_dsa = NULL;
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
if (!TupIsNull(outerTupleSlot))
|
|
|
|
{
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
gm_state->gm_slots[0] = outerTupleSlot;
|
2017-03-09 13:40:36 +01:00
|
|
|
return true;
|
|
|
|
}
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
/* need_to_scan_locally serves as "done" flag for leader */
|
2017-03-09 13:40:36 +01:00
|
|
|
gm_state->need_to_scan_locally = false;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Otherwise, check the state of the relevant tuple buffer. */
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
tuple_buffer = &gm_state->gm_tuple_buffers[reader - 1];
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
if (tuple_buffer->nTuples > tuple_buffer->readCounter)
|
|
|
|
{
|
|
|
|
/* Return any tuple previously read that is still buffered. */
|
|
|
|
tup = tuple_buffer->tuple[tuple_buffer->readCounter++];
|
|
|
|
}
|
|
|
|
else if (tuple_buffer->done)
|
|
|
|
{
|
|
|
|
/* Reader is known to be exhausted. */
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Read and buffer next tuple. */
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
tup = gm_readnext_tuple(gm_state,
|
|
|
|
reader,
|
|
|
|
nowait,
|
|
|
|
&tuple_buffer->done);
|
|
|
|
if (!HeapTupleIsValid(tup))
|
|
|
|
return false;
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/*
|
2017-05-17 22:31:56 +02:00
|
|
|
* Attempt to read more tuples in nowait mode and store them in the
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
* pending-tuple array for the reader.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
load_tuple_array(gm_state, reader);
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Assert(HeapTupleIsValid(tup));
|
|
|
|
|
|
|
|
/* Build the TupleTableSlot for the given tuple */
|
|
|
|
ExecStoreTuple(tup, /* tuple to store */
|
|
|
|
gm_state->gm_slots[reader], /* slot in which to store the
|
|
|
|
* tuple */
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
InvalidBuffer, /* no buffer associated with tuple */
|
|
|
|
true); /* pfree tuple when done with it */
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
* Attempt to read a tuple from given worker.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
static HeapTuple
|
|
|
|
gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait,
|
|
|
|
bool *done)
|
|
|
|
{
|
|
|
|
TupleQueueReader *reader;
|
Code review for nodeGatherMerge.c.
Comment the fields of GatherMergeState, and organize them a bit more
sensibly. Comment GMReaderTupleBuffer more usefully too. Improve
assorted other comments that were obsolete or just not very good English.
Get rid of the use of a GMReaderTupleBuffer for the leader process;
that was confusing, since only the "done" field was used, and that
in a way redundant with need_to_scan_locally.
In gather_merge_init, avoid calling load_tuple_array for
already-known-exhausted workers. I'm not sure if there's a live bug there,
but the case is unlikely to be well tested due to timing considerations.
Remove some useless code, such as duplicating the tts_isempty test done by
TupIsNull.
Remove useless initialization of ps.qual, replacing that with an assertion
that we have no qual to check. (If we did, the code would fail to check
it.)
Avoid applying heap_copytuple to a null tuple. While that fails to crash,
it's confusing and it makes the code less legible not more so IMO.
Propagate a couple of these changes into nodeGather.c, as well.
Back-patch to v10, partly because of the possibility that the
gather_merge_init change is fixing a live bug, but mostly to keep
the branches in sync to ease future bug fixes.
2017-08-30 23:21:08 +02:00
|
|
|
HeapTuple tup;
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
/* Check for async events, particularly messages from workers. */
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
/* Attempt to read a tuple. */
|
Avoid memory leaks when a GatherMerge node is rescanned.
Rescanning a GatherMerge led to leaking some memory in the executor's
query-lifespan context, because most of the node's working data structures
were simply abandoned and rebuilt from scratch. In practice, this might
never amount to much, given the cost of relaunching worker processes ---
but it's still pretty messy, so let's fix it.
We can rearrange things so that the tuple arrays are simply cleared and
reused, and we don't need to rebuild the TupleTableSlots either, just
clear them. One small complication is that because we might get a
different number of workers on each iteration, we can't keep the old
convention that the leader's gm_slots[] entry is the last one; the leader
might clobber a TupleTableSlot that we need for a worker in a future
iteration. Hence, adjust the logic so that the leader has slot 0 always,
while the active workers have slots 1..n.
Back-patch to v10 to keep all the existing versions of nodeGatherMerge.c
in sync --- because of the renumbering of the slots, there would otherwise
be a very large risk that any future backpatches in this module would
introduce bugs.
Discussion: https://postgr.es/m/8670.1504192177@sss.pgh.pa.us
2017-08-31 22:20:58 +02:00
|
|
|
reader = gm_state->reader[nreader - 1];
|
2017-03-09 13:40:36 +01:00
|
|
|
tup = TupleQueueReaderNext(reader, nowait, done);
|
|
|
|
|
|
|
|
return tup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We have one slot for each item in the heap array. We use SlotNumber
|
|
|
|
* to store slot indexes. This doesn't actually provide any formal
|
|
|
|
* type-safety, but it makes the code more self-documenting.
|
|
|
|
*/
|
|
|
|
typedef int32 SlotNumber;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compare the tuples in the two given slots.
|
|
|
|
*/
|
|
|
|
static int32
|
|
|
|
heap_compare_slots(Datum a, Datum b, void *arg)
|
|
|
|
{
|
|
|
|
GatherMergeState *node = (GatherMergeState *) arg;
|
|
|
|
SlotNumber slot1 = DatumGetInt32(a);
|
|
|
|
SlotNumber slot2 = DatumGetInt32(b);
|
|
|
|
|
|
|
|
TupleTableSlot *s1 = node->gm_slots[slot1];
|
|
|
|
TupleTableSlot *s2 = node->gm_slots[slot2];
|
|
|
|
int nkey;
|
|
|
|
|
|
|
|
Assert(!TupIsNull(s1));
|
|
|
|
Assert(!TupIsNull(s2));
|
|
|
|
|
|
|
|
for (nkey = 0; nkey < node->gm_nkeys; nkey++)
|
|
|
|
{
|
|
|
|
SortSupport sortKey = node->gm_sortkeys + nkey;
|
|
|
|
AttrNumber attno = sortKey->ssup_attno;
|
|
|
|
Datum datum1,
|
|
|
|
datum2;
|
|
|
|
bool isNull1,
|
|
|
|
isNull2;
|
|
|
|
int compare;
|
|
|
|
|
|
|
|
datum1 = slot_getattr(s1, attno, &isNull1);
|
|
|
|
datum2 = slot_getattr(s2, attno, &isNull2);
|
|
|
|
|
|
|
|
compare = ApplySortComparator(datum1, isNull1,
|
|
|
|
datum2, isNull2,
|
|
|
|
sortKey);
|
|
|
|
if (compare != 0)
|
|
|
|
return -compare;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|