diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 5d09822c81..1197b4fa87 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -218,6 +218,7 @@ int ReservedBackends; /* The socket(s) we're listening to. */ #define MAXLISTEN 64 static pgsocket ListenSocket[MAXLISTEN]; + /* * These globals control the behavior of the postmaster in case some * backend dumps core. Normally, it kills all peers of the dead backend @@ -2887,6 +2888,8 @@ pmdie(SIGNAL_ARGS) sd_notify(0, "STOPPING=1"); #endif + /* tell children to shut down ASAP */ + SetQuitSignalReason(PMQUIT_FOR_STOP); TerminateChildren(SIGQUIT); pmState = PM_WAIT_BACKENDS; @@ -3464,6 +3467,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) LogChildExit(LOG, procname, pid, exitstatus); ereport(LOG, (errmsg("terminating any other active server processes"))); + SetQuitSignalReason(PMQUIT_FOR_CRASH); } /* Process background workers. */ diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c index 94c65877c1..8ef3f6da4a 100644 --- a/src/backend/storage/ipc/pmsignal.c +++ b/src/backend/storage/ipc/pmsignal.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * pmsignal.c - * routines for signaling the postmaster from its child processes + * routines for signaling between the postmaster and its child processes * * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group @@ -55,6 +55,10 @@ * but carries the extra information that the child is a WAL sender. * WAL senders too start in ACTIVE state, but switch to WALSENDER once they * start streaming the WAL (and they never go back to ACTIVE after that). + * + * We also have a shared-memory field that is used for communication in + * the opposite direction, from postmaster to children: it tells why the + * postmaster has broadcasted SIGQUIT signals, if indeed it has done so. */ #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */ @@ -65,8 +69,10 @@ /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */ struct PMSignalData { - /* per-reason flags */ + /* per-reason flags for signaling the postmaster */ sig_atomic_t PMSignalFlags[NUM_PMSIGNALS]; + /* global flags for signals from postmaster to children */ + QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */ /* per-child-process flags */ int num_child_flags; /* # of entries in PMChildFlags[] */ int next_child_flag; /* next slot to try to assign */ @@ -134,6 +140,7 @@ PMSignalShmemInit(void) if (!found) { + /* initialize all flags to zeroes */ MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize()); PMSignalState->num_child_flags = MaxLivePostmasterChildren(); } @@ -171,6 +178,34 @@ CheckPostmasterSignal(PMSignalReason reason) return false; } +/* + * SetQuitSignalReason - broadcast the reason for a system shutdown. + * Should be called by postmaster before sending SIGQUIT to children. + * + * Note: in a crash-and-restart scenario, the "reason" field gets cleared + * as a part of rebuilding shared memory; the postmaster need not do it + * explicitly. + */ +void +SetQuitSignalReason(QuitSignalReason reason) +{ + PMSignalState->sigquit_reason = reason; +} + +/* + * GetQuitSignalReason - obtain the reason for a system shutdown. + * Called by child processes when they receive SIGQUIT. + * If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT. + */ +QuitSignalReason +GetQuitSignalReason(void) +{ + /* This is called in signal handlers, so be extra paranoid. */ + if (!IsUnderPostmaster || PMSignalState == NULL) + return PMQUIT_NOT_SENT; + return PMSignalState->sigquit_reason; +} + /* * AssignPostmasterChildSlot - select an unused slot for a new postmaster diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 3679799e50..d35c5020ea 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -67,6 +67,7 @@ #include "rewrite/rewriteHandler.h" #include "storage/bufmgr.h" #include "storage/ipc.h" +#include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procsignal.h" #include "storage/sinval.h" @@ -2752,8 +2753,8 @@ drop_unnamed_stmt(void) /* * quickdie() occurs when signaled SIGQUIT by the postmaster. * - * Some backend has bought the farm, - * so we need to stop what we're doing and exit. + * Either some backend has bought the farm, or we've been told to shut down + * "immediately"; so we need to stop what we're doing and exit. */ void quickdie(SIGNAL_ARGS) @@ -2788,18 +2789,36 @@ quickdie(SIGNAL_ARGS) * wrong, so there's not much to lose. Assuming the postmaster is still * running, it will SIGKILL us soon if we get stuck for some reason. * - * Ideally this should be ereport(FATAL), but then we'd not get control - * back... + * Ideally these should be ereport(FATAL), but then we'd not get control + * back to force the correct type of process exit. */ - ereport(WARNING, - (errcode(ERRCODE_CRASH_SHUTDOWN), - errmsg("terminating connection because of crash of another server process"), - errdetail("The postmaster has commanded this server process to roll back" - " the current transaction and exit, because another" - " server process exited abnormally and possibly corrupted" - " shared memory."), - errhint("In a moment you should be able to reconnect to the" - " database and repeat your command."))); + switch (GetQuitSignalReason()) + { + case PMQUIT_NOT_SENT: + /* Hmm, SIGQUIT arrived out of the blue */ + ereport(WARNING, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("terminating connection because of unexpected SIGQUIT signal"))); + break; + case PMQUIT_FOR_CRASH: + /* A crash-and-restart cycle is in progress */ + ereport(WARNING, + (errcode(ERRCODE_CRASH_SHUTDOWN), + errmsg("terminating connection because of crash of another server process"), + errdetail("The postmaster has commanded this server process to roll back" + " the current transaction and exit, because another" + " server process exited abnormally and possibly corrupted" + " shared memory."), + errhint("In a moment you should be able to reconnect to the" + " database and repeat your command."))); + break; + case PMQUIT_FOR_STOP: + /* Immediate-mode stop */ + ereport(WARNING, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("terminating connection due to immediate shutdown command"))); + break; + } /* * We DO NOT want to run proc_exit() or atexit() callbacks -- we're here diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index 56c5ec4481..b6aa158160 100644 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * pmsignal.h - * routines for signaling the postmaster from its child processes + * routines for signaling between the postmaster and its child processes * * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group @@ -45,6 +45,16 @@ typedef enum NUM_PMSIGNALS /* Must be last value of enum! */ } PMSignalReason; +/* + * Reasons why the postmaster would send SIGQUIT to its children. + */ +typedef enum +{ + PMQUIT_NOT_SENT = 0, /* postmaster hasn't sent SIGQUIT */ + PMQUIT_FOR_CRASH, /* some other backend bought the farm */ + PMQUIT_FOR_STOP /* immediate stop was commanded */ +} QuitSignalReason; + /* PMSignalData is an opaque struct, details known only within pmsignal.c */ typedef struct PMSignalData PMSignalData; @@ -55,6 +65,8 @@ extern Size PMSignalShmemSize(void); extern void PMSignalShmemInit(void); extern void SendPostmasterSignal(PMSignalReason reason); extern bool CheckPostmasterSignal(PMSignalReason reason); +extern void SetQuitSignalReason(QuitSignalReason reason); +extern QuitSignalReason GetQuitSignalReason(void); extern int AssignPostmasterChildSlot(void); extern bool ReleasePostmasterChildSlot(int slot); extern bool IsPostmasterChildWalSender(int slot);