From 7e784d1dc191be24480a6b31a4ddc8e0e52be24d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 24 Dec 2020 12:58:32 -0500 Subject: [PATCH] Improve client error messages for immediate-stop situations. Up to now, if the DBA issued "pg_ctl stop -m immediate", the message sent to clients was the same as for a crash-and-restart situation. This is confusing, not least because the message claims that the database will soon be up again, something we have no business predicting. Improve things so that we can generate distinct messages for the two cases (and also recognize an ad-hoc SIGQUIT, should somebody try that). To do that, add a field to pmsignal.c's shared memory data structure that the postmaster sets just before broadcasting SIGQUIT to its children. No interlocking seems to be necessary; the intervening signal-sending and signal-receipt should sufficiently serialize accesses to the field. Hence, this isn't any riskier than the existing usages of pmsignal.c. We might in future extend this idea to improve other postmaster-to-children signal scenarios, although none of them currently seem to be as badly overloaded as SIGQUIT. Discussion: https://postgr.es/m/559291.1608587013@sss.pgh.pa.us --- src/backend/postmaster/postmaster.c | 4 +++ src/backend/storage/ipc/pmsignal.c | 39 +++++++++++++++++++++++-- src/backend/tcop/postgres.c | 45 ++++++++++++++++++++--------- src/include/storage/pmsignal.h | 14 ++++++++- 4 files changed, 86 insertions(+), 16 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 5d09822c81..1197b4fa87 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -218,6 +218,7 @@ int ReservedBackends; /* The socket(s) we're listening to. */ #define MAXLISTEN 64 static pgsocket ListenSocket[MAXLISTEN]; + /* * These globals control the behavior of the postmaster in case some * backend dumps core. Normally, it kills all peers of the dead backend @@ -2887,6 +2888,8 @@ pmdie(SIGNAL_ARGS) sd_notify(0, "STOPPING=1"); #endif + /* tell children to shut down ASAP */ + SetQuitSignalReason(PMQUIT_FOR_STOP); TerminateChildren(SIGQUIT); pmState = PM_WAIT_BACKENDS; @@ -3464,6 +3467,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) LogChildExit(LOG, procname, pid, exitstatus); ereport(LOG, (errmsg("terminating any other active server processes"))); + SetQuitSignalReason(PMQUIT_FOR_CRASH); } /* Process background workers. */ diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c index 94c65877c1..8ef3f6da4a 100644 --- a/src/backend/storage/ipc/pmsignal.c +++ b/src/backend/storage/ipc/pmsignal.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * pmsignal.c - * routines for signaling the postmaster from its child processes + * routines for signaling between the postmaster and its child processes * * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group @@ -55,6 +55,10 @@ * but carries the extra information that the child is a WAL sender. * WAL senders too start in ACTIVE state, but switch to WALSENDER once they * start streaming the WAL (and they never go back to ACTIVE after that). + * + * We also have a shared-memory field that is used for communication in + * the opposite direction, from postmaster to children: it tells why the + * postmaster has broadcasted SIGQUIT signals, if indeed it has done so. */ #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */ @@ -65,8 +69,10 @@ /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */ struct PMSignalData { - /* per-reason flags */ + /* per-reason flags for signaling the postmaster */ sig_atomic_t PMSignalFlags[NUM_PMSIGNALS]; + /* global flags for signals from postmaster to children */ + QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */ /* per-child-process flags */ int num_child_flags; /* # of entries in PMChildFlags[] */ int next_child_flag; /* next slot to try to assign */ @@ -134,6 +140,7 @@ PMSignalShmemInit(void) if (!found) { + /* initialize all flags to zeroes */ MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize()); PMSignalState->num_child_flags = MaxLivePostmasterChildren(); } @@ -171,6 +178,34 @@ CheckPostmasterSignal(PMSignalReason reason) return false; } +/* + * SetQuitSignalReason - broadcast the reason for a system shutdown. + * Should be called by postmaster before sending SIGQUIT to children. + * + * Note: in a crash-and-restart scenario, the "reason" field gets cleared + * as a part of rebuilding shared memory; the postmaster need not do it + * explicitly. + */ +void +SetQuitSignalReason(QuitSignalReason reason) +{ + PMSignalState->sigquit_reason = reason; +} + +/* + * GetQuitSignalReason - obtain the reason for a system shutdown. + * Called by child processes when they receive SIGQUIT. + * If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT. + */ +QuitSignalReason +GetQuitSignalReason(void) +{ + /* This is called in signal handlers, so be extra paranoid. */ + if (!IsUnderPostmaster || PMSignalState == NULL) + return PMQUIT_NOT_SENT; + return PMSignalState->sigquit_reason; +} + /* * AssignPostmasterChildSlot - select an unused slot for a new postmaster diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 3679799e50..d35c5020ea 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -67,6 +67,7 @@ #include "rewrite/rewriteHandler.h" #include "storage/bufmgr.h" #include "storage/ipc.h" +#include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procsignal.h" #include "storage/sinval.h" @@ -2752,8 +2753,8 @@ drop_unnamed_stmt(void) /* * quickdie() occurs when signaled SIGQUIT by the postmaster. * - * Some backend has bought the farm, - * so we need to stop what we're doing and exit. + * Either some backend has bought the farm, or we've been told to shut down + * "immediately"; so we need to stop what we're doing and exit. */ void quickdie(SIGNAL_ARGS) @@ -2788,18 +2789,36 @@ quickdie(SIGNAL_ARGS) * wrong, so there's not much to lose. Assuming the postmaster is still * running, it will SIGKILL us soon if we get stuck for some reason. * - * Ideally this should be ereport(FATAL), but then we'd not get control - * back... + * Ideally these should be ereport(FATAL), but then we'd not get control + * back to force the correct type of process exit. */ - ereport(WARNING, - (errcode(ERRCODE_CRASH_SHUTDOWN), - errmsg("terminating connection because of crash of another server process"), - errdetail("The postmaster has commanded this server process to roll back" - " the current transaction and exit, because another" - " server process exited abnormally and possibly corrupted" - " shared memory."), - errhint("In a moment you should be able to reconnect to the" - " database and repeat your command."))); + switch (GetQuitSignalReason()) + { + case PMQUIT_NOT_SENT: + /* Hmm, SIGQUIT arrived out of the blue */ + ereport(WARNING, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("terminating connection because of unexpected SIGQUIT signal"))); + break; + case PMQUIT_FOR_CRASH: + /* A crash-and-restart cycle is in progress */ + ereport(WARNING, + (errcode(ERRCODE_CRASH_SHUTDOWN), + errmsg("terminating connection because of crash of another server process"), + errdetail("The postmaster has commanded this server process to roll back" + " the current transaction and exit, because another" + " server process exited abnormally and possibly corrupted" + " shared memory."), + errhint("In a moment you should be able to reconnect to the" + " database and repeat your command."))); + break; + case PMQUIT_FOR_STOP: + /* Immediate-mode stop */ + ereport(WARNING, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("terminating connection due to immediate shutdown command"))); + break; + } /* * We DO NOT want to run proc_exit() or atexit() callbacks -- we're here diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index 56c5ec4481..b6aa158160 100644 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * pmsignal.h - * routines for signaling the postmaster from its child processes + * routines for signaling between the postmaster and its child processes * * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group @@ -45,6 +45,16 @@ typedef enum NUM_PMSIGNALS /* Must be last value of enum! */ } PMSignalReason; +/* + * Reasons why the postmaster would send SIGQUIT to its children. + */ +typedef enum +{ + PMQUIT_NOT_SENT = 0, /* postmaster hasn't sent SIGQUIT */ + PMQUIT_FOR_CRASH, /* some other backend bought the farm */ + PMQUIT_FOR_STOP /* immediate stop was commanded */ +} QuitSignalReason; + /* PMSignalData is an opaque struct, details known only within pmsignal.c */ typedef struct PMSignalData PMSignalData; @@ -55,6 +65,8 @@ extern Size PMSignalShmemSize(void); extern void PMSignalShmemInit(void); extern void SendPostmasterSignal(PMSignalReason reason); extern bool CheckPostmasterSignal(PMSignalReason reason); +extern void SetQuitSignalReason(QuitSignalReason reason); +extern QuitSignalReason GetQuitSignalReason(void); extern int AssignPostmasterChildSlot(void); extern bool ReleasePostmasterChildSlot(int slot); extern bool IsPostmasterChildWalSender(int slot);