From 5f60086e10a9b94642cebcc7633c282e4c921a24 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 30 Nov 2006 18:29:12 +0000 Subject: [PATCH] Minor adjustments to make failures in startup/shutdown behave more cleanly. StartupXLOG and ShutdownXLOG no longer need to be critical sections, because in all contexts where they are invoked, elog(ERROR) would be translated to elog(FATAL) anyway. (One change in bgwriter.c is needed to make this true: set ExitOnAnyError before trying to exit. This is a good fix anyway since the existing code would have gone into an infinite loop on elog(ERROR) during shutdown.) That avoids a misleading report of PANIC during semi-orderly failures. Modify the postmaster to include the startup process in the set of processes that get SIGTERM when a fast shutdown is requested, and also fix it to not try to restart the bgwriter if the bgwriter fails while trying to write the shutdown checkpoint. Net result is that "pg_ctl stop -m fast" does something reasonable for a system in warm standby mode, and so should Unix system shutdown (ie, universal SIGTERM). Per gripe from Stephen Harris and some corner-case testing of my own. --- src/backend/access/transam/xlog.c | 12 ++---------- src/backend/postmaster/bgwriter.c | 8 +++++++- src/backend/postmaster/postmaster.c | 26 +++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 883dbd42f7..98c610dcad 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.257 2006/11/21 20:59:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.258 2006/11/30 18:29:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -4651,8 +4651,6 @@ StartupXLOG(void) uint32 freespace; TransactionId oldestActiveXID; - CritSectionCount++; - /* * Read control file and check XLOG status looks valid. * @@ -5188,7 +5186,6 @@ StartupXLOG(void) ereport(LOG, (errmsg("database system is ready"))); - CritSectionCount--; /* Shut down readFile facility, free space */ if (readFile >= 0) @@ -5426,12 +5423,10 @@ ShutdownXLOG(int code, Datum arg) ereport(LOG, (errmsg("shutting down"))); - CritSectionCount++; CreateCheckPoint(true, true); ShutdownCLOG(); ShutdownSUBTRANS(); ShutdownMultiXact(); - CritSectionCount--; ereport(LOG, (errmsg("database system is shut down"))); @@ -5605,10 +5600,7 @@ CreateCheckPoint(bool shutdown, bool force) * * This I/O could fail for various reasons. If so, we will fail to * complete the checkpoint, but there is no reason to force a system - * panic. Accordingly, exit critical section while doing it. (If we are - * doing a shutdown checkpoint, we probably *should* panic --- but that - * will happen anyway because we'll still be inside the critical section - * established by ShutdownXLOG.) + * panic. Accordingly, exit critical section while doing it. */ END_CRIT_SECTION(); diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 5aa378ae70..4d5c1d1cca 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.31 2006/11/21 20:59:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.32 2006/11/30 18:29:12 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -350,6 +350,12 @@ BackgroundWriterMain(void) } if (shutdown_requested) { + /* + * From here on, elog(ERROR) should end with exit(1), not send + * control back to the sigsetjmp block above + */ + ExitOnAnyError = true; + /* Close down the database */ ShutdownXLOG(0, 0); DumpFreeSpaceMap(0, 0); /* Normal exit from the bgwriter is here */ diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 907a08bb23..dbe835f3d0 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.504 2006/11/28 12:54:41 petere Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.505 2006/11/30 18:29:12 tgl Exp $ * * NOTES * @@ -1934,8 +1934,13 @@ pmdie(SIGNAL_ARGS) * Note: if we previously got SIGTERM then we may send SIGUSR2 to * the bgwriter a second time here. This should be harmless. */ - if (StartupPID != 0 || FatalError) - break; /* let reaper() handle this */ + if (StartupPID != 0) + { + signal_child(StartupPID, SIGTERM); + break; /* let reaper() do the rest */ + } + if (FatalError) + break; /* let reaper() handle this case */ /* Start the bgwriter if not running */ if (BgWriterPID == 0) BgWriterPID = StartBackgroundWriter(); @@ -2108,6 +2113,21 @@ reaper(SIGNAL_ARGS) */ HandleChildCrash(pid, exitstatus, _("background writer process")); + + /* + * If the bgwriter crashed while trying to write the shutdown + * checkpoint, we may as well just stop here; any recovery + * required will happen on next postmaster start. + */ + if (Shutdown > NoShutdown && + !DLGetHead(BackendList) && AutoVacPID == 0) + { + ereport(LOG, + (errmsg("abnormal database system shutdown"))); + ExitPostmaster(1); + } + + /* Else, proceed as in normal crash recovery */ continue; }