Make WaitLatch's WL_POSTMASTER_DEATH result trustworthy; simplify callers.

Per a suggestion from Peter Geoghegan, make WaitLatch responsible for
verifying that the WL_POSTMASTER_DEATH bit it returns is truthful (by
testing PostmasterIsAlive).  Then simplify its callers, who no longer
need to do that for themselves.  Remove weasel wording about falsely-set
result bits from WaitLatch's API contract.
This commit is contained in:
Tom Lane 2012-05-10 14:34:22 -04:00
parent 586d356bc5
commit f40022f1ad
7 changed files with 47 additions and 37 deletions

View File

@ -50,6 +50,7 @@
#include "miscadmin.h"
#include "postmaster/postmaster.h"
#include "storage/latch.h"
#include "storage/pmsignal.h"
#include "storage/shmem.h"
/* Are we currently in WaitLatch? The signal handler would like to know. */
@ -160,15 +161,7 @@ DisownLatch(volatile Latch *latch)
*
* Returns bit mask indicating which condition(s) caused the wake-up. Note
* that if multiple wake-up conditions are true, there is no guarantee that
* we return all of them in one call, but we will return at least one. Also,
* according to the select(2) man page on Linux, select(2) may spuriously
* return and report a file descriptor as readable, when it's not. We use
* select(2), so WaitLatch can also spuriously claim that a socket is
* readable, or postmaster has died, even when none of the wake conditions
* have been satisfied. That should be rare in practice, but the caller
* should not use the return value for anything critical, re-checking the
* situation with PostmasterIsAlive() or read() on a socket as necessary.
* The latch and timeout flag bits can be trusted, however.
* we return all of them in one call, but we will return at least one.
*/
int
WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
@ -318,7 +311,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
{
result |= WL_POSTMASTER_DEATH;
/*
* According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not clear
* that the relevant cases would ever apply to the postmaster
* pipe, but since the consequences of falsely returning
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
* trouble to positively verify EOF with PostmasterIsAlive().
*/
if (!PostmasterIsAlive())
result |= WL_POSTMASTER_DEATH;
}
#else /* !HAVE_POLL */
@ -380,7 +383,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
{
result |= WL_POSTMASTER_DEATH;
/*
* According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not clear
* that the relevant cases would ever apply to the postmaster
* pipe, but since the consequences of falsely returning
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
* trouble to positively verify EOF with PostmasterIsAlive().
*/
if (!PostmasterIsAlive())
result |= WL_POSTMASTER_DEATH;
}
#endif /* HAVE_POLL */
} while (result == 0);

View File

@ -26,6 +26,7 @@
#include "miscadmin.h"
#include "postmaster/postmaster.h"
#include "storage/latch.h"
#include "storage/pmsignal.h"
#include "storage/shmem.h"
@ -217,8 +218,15 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
else if ((wakeEvents & WL_POSTMASTER_DEATH) &&
rc == WAIT_OBJECT_0 + pmdeath_eventno)
{
/* Postmaster died */
result |= WL_POSTMASTER_DEATH;
/*
* Postmaster apparently died. Since the consequences of falsely
* returning WL_POSTMASTER_DEATH could be pretty unpleasant, we
* take the trouble to positively verify this with
* PostmasterIsAlive(), even though there is no known reason to
* think that the event could be falsely set on Windows.
*/
if (!PostmasterIsAlive())
result |= WL_POSTMASTER_DEATH;
}
else
elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);

View File

@ -574,6 +574,7 @@ AutoVacLauncherMain(int argc, char *argv[])
TimestampTz current_time = 0;
bool can_launch;
Dlelem *elem;
int rc;
/*
* This loop is a bit different from the normal use of WaitLatch,
@ -592,9 +593,9 @@ AutoVacLauncherMain(int argc, char *argv[])
* Wait until naptime expires or we get some type of signal (all the
* signal handlers will wake us by calling SetLatch).
*/
WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
(nap.tv_sec * 1000L) + (nap.tv_usec / 1000L));
rc = WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
(nap.tv_sec * 1000L) + (nap.tv_usec / 1000L));
ResetLatch(&MyProc->procLatch);
@ -604,7 +605,7 @@ AutoVacLauncherMain(int argc, char *argv[])
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children.
*/
if (!PostmasterIsAlive())
if (rc & WL_POSTMASTER_DEATH)
proc_exit(1);
/* the normal shutdown case */

View File

@ -48,7 +48,6 @@
#include "storage/buf_internals.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
@ -323,11 +322,9 @@ BackgroundWriterMain(void)
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. Note
* that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
* if it is set, recheck with PostmasterIsAlive before believing it.
* necessity for manual cleanup of all postmaster children.
*/
if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
if (rc & WL_POSTMASTER_DEATH)
exit(1);
prev_hibernate = can_hibernate;

View File

@ -50,7 +50,6 @@
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
@ -581,11 +580,9 @@ CheckpointerMain(void)
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. Note
* that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
* if it is set, recheck with PostmasterIsAlive before believing it.
* necessity for manual cleanup of all postmaster children.
*/
if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
if (rc & WL_POSTMASTER_DEATH)
exit(1);
}
}

View File

@ -51,7 +51,6 @@
#include "storage/ipc.h"
#include "storage/latch.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "storage/procsignal.h"
#include "utils/ascii.h"
#include "utils/guc.h"
@ -3227,11 +3226,9 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. Note
* that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
* if it is set, recheck with PostmasterIsAlive before believing it.
* necessity for manual cleanup of all postmaster children.
*/
if ((wr & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
if (wr & WL_POSTMASTER_DEATH)
break;
} /* end of outer loop */

View File

@ -53,7 +53,6 @@
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "utils/guc.h"
@ -305,11 +304,9 @@ WalWriterMain(void)
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. Note
* that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
* if it is set, recheck with PostmasterIsAlive before believing it.
* necessity for manual cleanup of all postmaster children.
*/
if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
if (rc & WL_POSTMASTER_DEATH)
exit(1);
}
}