Add new function WaitForParallelWorkersToAttach.

Once this function has been called, we know that all workers have
started and attached to their error queues -- so if any of them
subsequently exit uncleanly, we'll be sure to throw an ERROR promptly.
Otherwise, users of the ParallelContext machinery must be careful not
to wait forever for a worker that has failed to start.  Parallel query
manages to work without needing this for reasons explained in new
comments added by this patch, but it's a useful primitive for other
parallel operations, such as the pending patch to make creating a
btree index run in parallel.

Amit Kapila, revised by me.  Additional review by Peter Geoghegan.

Discussion: http://postgr.es/m/CAA4eK1+e2MzyouF5bg=OtyhDSX+=Ao=3htN=T-r_6s3gCtKFiw@mail.gmail.com
This commit is contained in:
Robert Haas 2018-02-02 09:00:59 -05:00
parent a2a2205761
commit 9222c0d9ed
4 changed files with 163 additions and 11 deletions

View File

@ -437,10 +437,11 @@ ReinitializeParallelDSM(ParallelContext *pcxt)
WaitForParallelWorkersToFinish(pcxt);
WaitForParallelWorkersToExit(pcxt);
pcxt->nworkers_launched = 0;
if (pcxt->any_message_received)
if (pcxt->known_attached_workers)
{
pfree(pcxt->any_message_received);
pcxt->any_message_received = NULL;
pfree(pcxt->known_attached_workers);
pcxt->known_attached_workers = NULL;
pcxt->nknown_attached_workers = 0;
}
}
@ -542,16 +543,147 @@ LaunchParallelWorkers(ParallelContext *pcxt)
/*
* Now that nworkers_launched has taken its final value, we can initialize
* any_message_received.
* known_attached_workers.
*/
if (pcxt->nworkers_launched > 0)
pcxt->any_message_received =
{
pcxt->known_attached_workers =
palloc0(sizeof(bool) * pcxt->nworkers_launched);
pcxt->nknown_attached_workers = 0;
}
/* Restore previous memory context. */
MemoryContextSwitchTo(oldcontext);
}
/*
* Wait for all workers to attach to their error queues, and throw an error if
* any worker fails to do this.
*
* Callers can assume that if this function returns successfully, then the
* number of workers given by pcxt->nworkers_launched have initialized and
* attached to their error queues. Whether or not these workers are guaranteed
* to still be running depends on what code the caller asked them to run;
* this function does not guarantee that they have not exited. However, it
* does guarantee that any workers which exited must have done so cleanly and
* after successfully performing the work with which they were tasked.
*
* If this function is not called, then some of the workers that were launched
* may not have been started due to a fork() failure, or may have exited during
* early startup prior to attaching to the error queue, so nworkers_launched
* cannot be viewed as completely reliable. It will never be less than the
* number of workers which actually started, but it might be more. Any workers
* that failed to start will still be discovered by
* WaitForParallelWorkersToFinish and an error will be thrown at that time,
* provided that function is eventually reached.
*
* In general, the leader process should do as much work as possible before
* calling this function. fork() failures and other early-startup failures
* are very uncommon, and having the leader sit idle when it could be doing
* useful work is undesirable. However, if the leader needs to wait for
* all of its workers or for a specific worker, it may want to call this
* function before doing so. If not, it must make some other provision for
* the failure-to-start case, lest it wait forever. On the other hand, a
* leader which never waits for a worker that might not be started yet, or
* at least never does so prior to WaitForParallelWorkersToFinish(), need not
* call this function at all.
*/
void
WaitForParallelWorkersToAttach(ParallelContext *pcxt)
{
int i;
/* Skip this if we have no launched workers. */
if (pcxt->nworkers_launched == 0)
return;
for (;;)
{
/*
* This will process any parallel messages that are pending and it may
* also throw an error propagated from a worker.
*/
CHECK_FOR_INTERRUPTS();
for (i = 0; i < pcxt->nworkers_launched; ++i)
{
BgwHandleStatus status;
shm_mq *mq;
int rc;
pid_t pid;
if (pcxt->known_attached_workers[i])
continue;
/*
* If error_mqh is NULL, then the worker has already exited
* cleanly.
*/
if (pcxt->worker[i].error_mqh == NULL)
{
pcxt->known_attached_workers[i] = true;
++pcxt->nknown_attached_workers;
continue;
}
status = GetBackgroundWorkerPid(pcxt->worker[i].bgwhandle, &pid);
if (status == BGWH_STARTED)
{
/* Has the worker attached to the error queue? */
mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
if (shm_mq_get_sender(mq) != NULL)
{
/* Yes, so it is known to be attached. */
pcxt->known_attached_workers[i] = true;
++pcxt->nknown_attached_workers;
}
}
else if (status == BGWH_STOPPED)
{
/*
* If the worker stopped without attaching to the error queue,
* throw an error.
*/
mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
if (shm_mq_get_sender(mq) == NULL)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("parallel worker failed to initialize"),
errhint("More details may be available in the server log.")));
pcxt->known_attached_workers[i] = true;
++pcxt->nknown_attached_workers;
}
else
{
/*
* Worker not yet started, so we must wait. The postmaster
* will notify us if the worker's state changes. Our latch
* might also get set for some other reason, but if so we'll
* just end up waiting for the same worker again.
*/
rc = WaitLatch(MyLatch,
WL_LATCH_SET | WL_POSTMASTER_DEATH,
-1, WAIT_EVENT_BGWORKER_STARTUP);
/* emergency bailout if postmaster has died */
if (rc & WL_POSTMASTER_DEATH)
proc_exit(1);
if (rc & WL_LATCH_SET)
ResetLatch(MyLatch);
}
}
/* If all workers are known to have started, we're done. */
if (pcxt->nknown_attached_workers >= pcxt->nworkers_launched)
{
Assert(pcxt->nknown_attached_workers == pcxt->nworkers_launched);
break;
}
}
}
/*
* Wait for all workers to finish computing.
*
@ -589,7 +721,7 @@ WaitForParallelWorkersToFinish(ParallelContext *pcxt)
*/
if (pcxt->worker[i].error_mqh == NULL)
++nfinished;
else if (pcxt->any_message_received[i])
else if (pcxt->known_attached_workers[i])
{
anyone_alive = true;
break;
@ -909,8 +1041,12 @@ HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg)
{
char msgtype;
if (pcxt->any_message_received != NULL)
pcxt->any_message_received[i] = true;
if (pcxt->known_attached_workers != NULL &&
!pcxt->known_attached_workers[i])
{
pcxt->known_attached_workers[i] = true;
pcxt->nknown_attached_workers++;
}
msgtype = pq_getmsgbyte(msg);

View File

@ -312,7 +312,14 @@ gather_readnext(GatherState *gatherstate)
/* Check for async events, particularly messages from workers. */
CHECK_FOR_INTERRUPTS();
/* Attempt to read a tuple, but don't block if none is available. */
/*
* Attempt to read a tuple, but don't block if none is available.
*
* Note that TupleQueueReaderNext will just return NULL for a worker
* which fails to initialize. We'll treat that worker as having
* produced no tuples; WaitForParallelWorkersToFinish will error out
* when we get there.
*/
Assert(gatherstate->nextreader < gatherstate->nreaders);
reader = gatherstate->reader[gatherstate->nextreader];
tup = TupleQueueReaderNext(reader, true, &readerdone);

View File

@ -710,7 +710,14 @@ gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait,
/* Check for async events, particularly messages from workers. */
CHECK_FOR_INTERRUPTS();
/* Attempt to read a tuple. */
/*
* Attempt to read a tuple.
*
* Note that TupleQueueReaderNext will just return NULL for a worker which
* fails to initialize. We'll treat that worker as having produced no
* tuples; WaitForParallelWorkersToFinish will error out when we get
* there.
*/
reader = gm_state->reader[nreader - 1];
tup = TupleQueueReaderNext(reader, nowait, done);

View File

@ -43,7 +43,8 @@ typedef struct ParallelContext
void *private_memory;
shm_toc *toc;
ParallelWorkerInfo *worker;
bool *any_message_received;
int nknown_attached_workers;
bool *known_attached_workers;
} ParallelContext;
typedef struct ParallelWorkerContext
@ -62,6 +63,7 @@ extern ParallelContext *CreateParallelContext(const char *library_name, const ch
extern void InitializeParallelDSM(ParallelContext *pcxt);
extern void ReinitializeParallelDSM(ParallelContext *pcxt);
extern void LaunchParallelWorkers(ParallelContext *pcxt);
extern void WaitForParallelWorkersToAttach(ParallelContext *pcxt);
extern void WaitForParallelWorkersToFinish(ParallelContext *pcxt);
extern void DestroyParallelContext(ParallelContext *pcxt);
extern bool ParallelContextActive(void);