Modify tqueue infrastructure to support transient record types.

Commit 4a4e6893aa, which introduced this
mechanism, failed to account for the fact that the RECORD pseudo-type
uses transient typmods that are only meaningful within a single
backend.  Transferring such tuples without modification between two
cooperating backends does not work.  This commit installs a system
for passing the tuple descriptors over the same shm_mq being used to
send the tuples themselves.  The two sides might not assign the same
transient typmod to any given tuple descriptor, so we must also
substitute the appropriate receiver-side typmod for the one used by
the sender.  That adds some CPU overhead, but still seems better than
being unable to pass records between cooperating parallel processes.

Along the way, move the logic for handling multiple tuple queues from
tqueue.c to nodeGather.c; tqueue.c now provides a TupleQueueReader,
which reads from a single queue, rather than a TupleQueueFunnel, which
potentially reads from multiple queues.  This change was suggested
previously as a way to make sure that nodeGather.c rather than tqueue.c
had policy control over the order in which to read from queues, but
it wasn't clear to me until now how good an idea it was.  typmod
mapping needs to be performed separately for each queue, and it is
much simpler if the tqueue.c code handles that and leaves multiplexing
multiple queues to higher layers of the stack.
This commit is contained in:
Robert Haas 2015-11-06 16:58:45 -05:00
parent cbb82e370d
commit 6e71dd7ce9
5 changed files with 991 additions and 158 deletions

View File

@ -36,11 +36,13 @@
#include "executor/nodeGather.h"
#include "executor/nodeSubplan.h"
#include "executor/tqueue.h"
#include "miscadmin.h"
#include "utils/memutils.h"
#include "utils/rel.h"
static TupleTableSlot *gather_getnext(GatherState *gatherstate);
static HeapTuple gather_readnext(GatherState *gatherstate);
static void ExecShutdownGatherWorkers(GatherState *node);
@ -125,6 +127,7 @@ ExecInitGather(Gather *node, EState *estate, int eflags)
TupleTableSlot *
ExecGather(GatherState *node)
{
TupleTableSlot *fslot = node->funnel_slot;
int i;
TupleTableSlot *slot;
TupleTableSlot *resultSlot;
@ -148,6 +151,7 @@ ExecGather(GatherState *node)
*/
if (gather->num_workers > 0 && IsInParallelMode())
{
ParallelContext *pcxt;
bool got_any_worker = false;
/* Initialize the workers required to execute Gather node. */
@ -160,18 +164,26 @@ ExecGather(GatherState *node)
* Register backend workers. We might not get as many as we
* requested, or indeed any at all.
*/
LaunchParallelWorkers(node->pei->pcxt);
pcxt = node->pei->pcxt;
LaunchParallelWorkers(pcxt);
/* Set up a tuple queue to collect the results. */
node->funnel = CreateTupleQueueFunnel();
for (i = 0; i < node->pei->pcxt->nworkers; ++i)
/* Set up tuple queue readers to read the results. */
if (pcxt->nworkers > 0)
{
if (node->pei->pcxt->worker[i].bgwhandle)
node->nreaders = 0;
node->reader =
palloc(pcxt->nworkers * sizeof(TupleQueueReader *));
for (i = 0; i < pcxt->nworkers; ++i)
{
if (pcxt->worker[i].bgwhandle == NULL)
continue;
shm_mq_set_handle(node->pei->tqueue[i],
node->pei->pcxt->worker[i].bgwhandle);
RegisterTupleQueueOnFunnel(node->funnel,
node->pei->tqueue[i]);
pcxt->worker[i].bgwhandle);
node->reader[node->nreaders++] =
CreateTupleQueueReader(node->pei->tqueue[i],
fslot->tts_tupleDescriptor);
got_any_worker = true;
}
}
@ -182,7 +194,7 @@ ExecGather(GatherState *node)
}
/* Run plan locally if no workers or not single-copy. */
node->need_to_scan_locally = (node->funnel == NULL)
node->need_to_scan_locally = (node->reader == NULL)
|| !gather->single_copy;
node->initialized = true;
}
@ -254,13 +266,9 @@ ExecEndGather(GatherState *node)
}
/*
* gather_getnext
*
* Get the next tuple from shared memory queue. This function
* is responsible for fetching tuples from all the queues associated
* with worker backends used in Gather node execution and if there is
* no data available from queues or no worker is available, it does
* fetch the data from local node.
* Read the next tuple. We might fetch a tuple from one of the tuple queues
* using gather_readnext, or if no tuple queue contains a tuple and the
* single_copy flag is not set, we might generate one locally instead.
*/
static TupleTableSlot *
gather_getnext(GatherState *gatherstate)
@ -270,18 +278,11 @@ gather_getnext(GatherState *gatherstate)
TupleTableSlot *fslot = gatherstate->funnel_slot;
HeapTuple tup;
while (gatherstate->funnel != NULL || gatherstate->need_to_scan_locally)
while (gatherstate->reader != NULL || gatherstate->need_to_scan_locally)
{
if (gatherstate->funnel != NULL)
if (gatherstate->reader != NULL)
{
bool done = false;
/* wait only if local scan is done */
tup = TupleQueueFunnelNext(gatherstate->funnel,
gatherstate->need_to_scan_locally,
&done);
if (done)
ExecShutdownGatherWorkers(gatherstate);
tup = gather_readnext(gatherstate);
if (HeapTupleIsValid(tup))
{
@ -309,6 +310,80 @@ gather_getnext(GatherState *gatherstate)
return ExecClearTuple(fslot);
}
/*
* Attempt to read a tuple from one of our parallel workers.
*/
static HeapTuple
gather_readnext(GatherState *gatherstate)
{
int waitpos = gatherstate->nextreader;
for (;;)
{
TupleQueueReader *reader;
HeapTuple tup;
bool readerdone;
/* Make sure we've read all messages from workers. */
HandleParallelMessages();
/* Attempt to read a tuple, but don't block if none is available. */
reader = gatherstate->reader[gatherstate->nextreader];
tup = TupleQueueReaderNext(reader, true, &readerdone);
/*
* If this reader is done, remove it. If all readers are done,
* clean up remaining worker state.
*/
if (readerdone)
{
DestroyTupleQueueReader(reader);
--gatherstate->nreaders;
if (gatherstate->nreaders == 0)
{
ExecShutdownGather(gatherstate);
return NULL;
}
else
{
memmove(&gatherstate->reader[gatherstate->nextreader],
&gatherstate->reader[gatherstate->nextreader + 1],
sizeof(TupleQueueReader *)
* (gatherstate->nreaders - gatherstate->nextreader));
if (gatherstate->nextreader >= gatherstate->nreaders)
gatherstate->nextreader = 0;
if (gatherstate->nextreader < waitpos)
--waitpos;
}
continue;
}
/* Advance nextreader pointer in round-robin fashion. */
gatherstate->nextreader =
(gatherstate->nextreader + 1) % gatherstate->nreaders;
/* If we got a tuple, return it. */
if (tup)
return tup;
/* Have we visited every TupleQueueReader? */
if (gatherstate->nextreader == waitpos)
{
/*
* If (still) running plan locally, return NULL so caller can
* generate another tuple from the local copy of the plan.
*/
if (gatherstate->need_to_scan_locally)
return NULL;
/* Nothing to do except wait for developments. */
WaitLatch(MyLatch, WL_LATCH_SET, 0);
CHECK_FOR_INTERRUPTS();
ResetLatch(MyLatch);
}
}
}
/* ----------------------------------------------------------------
* ExecShutdownGatherWorkers
*
@ -320,11 +395,14 @@ gather_getnext(GatherState *gatherstate)
void
ExecShutdownGatherWorkers(GatherState *node)
{
/* Shut down tuple queue funnel before shutting down workers. */
if (node->funnel != NULL)
/* Shut down tuple queue readers before shutting down workers. */
if (node->reader != NULL)
{
DestroyTupleQueueFunnel(node->funnel);
node->funnel = NULL;
int i;
for (i = 0; i < node->nreaders; ++i)
DestroyTupleQueueReader(node->reader[i]);
node->reader = NULL;
}
/* Now shut down the workers. */

File diff suppressed because it is too large Load Diff

View File

@ -21,11 +21,11 @@
extern DestReceiver *CreateTupleQueueDestReceiver(shm_mq_handle *handle);
/* Use these to receive tuples from a shm_mq. */
typedef struct TupleQueueFunnel TupleQueueFunnel;
extern TupleQueueFunnel *CreateTupleQueueFunnel(void);
extern void DestroyTupleQueueFunnel(TupleQueueFunnel *funnel);
extern void RegisterTupleQueueOnFunnel(TupleQueueFunnel *, shm_mq_handle *);
extern HeapTuple TupleQueueFunnelNext(TupleQueueFunnel *, bool nowait,
bool *done);
typedef struct TupleQueueReader TupleQueueReader;
extern TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle,
TupleDesc tupledesc);
extern void DestroyTupleQueueReader(TupleQueueReader *funnel);
extern HeapTuple TupleQueueReaderNext(TupleQueueReader *,
bool nowait, bool *done);
#endif /* TQUEUE_H */

View File

@ -1963,7 +1963,9 @@ typedef struct GatherState
PlanState ps; /* its first field is NodeTag */
bool initialized;
struct ParallelExecutorInfo *pei;
struct TupleQueueFunnel *funnel;
int nreaders;
int nextreader;
struct TupleQueueReader **reader;
TupleTableSlot *funnel_slot;
bool need_to_scan_locally;
} GatherState;

View File

@ -2018,7 +2018,7 @@ TupleHashEntry
TupleHashEntryData
TupleHashIterator
TupleHashTable
TupleQueueFunnel
TupleQueueReader
TupleTableSlot
Tuplesortstate
Tuplestorestate