From e82d9e6283d6ca19d1ea7547e7e9ae8399471e1a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 21 Nov 2006 00:49:55 +0000 Subject: [PATCH] Adjust elog.c so that elog(FATAL) exits (including cases where ERROR is promoted to FATAL) end in exit(1) not exit(0). Then change the postmaster to allow exit(1) without a system-wide panic, but not for the startup subprocess or the bgwriter. There were a couple of places that were using exit(1) to deliberately force a system-wide panic; adjust these to be exit(2) instead. This fixes the problem noted back in July that if the startup process exits with elog(ERROR), the postmaster would think everything is hunky-dory and proceed to start up. Alternative solutions such as trying to run the entire startup process as a critical section seem less clean, primarily because of the fact that a fair amount of startup code is shared by all postmaster children in the EXEC_BACKEND case. We'd need an ugly special case somewhere near the head of main.c to make it work if it's the child process's responsibility to determine what happens; and what's the point when the postmaster already treats different children differently? --- src/backend/bootstrap/bootstrap.c | 9 +------ src/backend/postmaster/bgwriter.c | 6 ++--- src/backend/postmaster/postmaster.c | 38 +++++++++++++++++------------ src/backend/tcop/postgres.c | 10 ++++---- src/backend/utils/error/elog.c | 16 ++++++------ 5 files changed, 39 insertions(+), 40 deletions(-) diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 70b9172cd7..b44c501cc6 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.225 2006/10/04 00:29:49 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.226 2006/11/21 00:49:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -421,15 +421,8 @@ BootstrapMain(int argc, char *argv[]) case BS_XLOG_STARTUP: bootstrap_signals(); StartupXLOG(); - - /* - * These next two functions don't consider themselves critical, - * but we'd best PANIC anyway if they fail. - */ - START_CRIT_SECTION(); LoadFreeSpaceMap(); BuildFlatFiles(false); - END_CRIT_SECTION(); proc_exit(0); /* startup done */ case BS_XLOG_BGWRITER: diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index e3a4925b91..f2cb3ff68d 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.29 2006/10/06 17:13:59 petere Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.30 2006/11/21 00:49:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -503,12 +503,12 @@ bg_quickdie(SIGNAL_ARGS) * corrupted, so we don't want to try to clean up our transaction. Just * nail the windows shut and get out of town. * - * Note we do exit(1) not exit(0). This is to force the postmaster into a + * Note we do exit(2) not exit(0). This is to force the postmaster into a * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random * backend. This is necessary precisely because we don't clean up our * shared memory state. */ - exit(1); + exit(2); } /* SIGHUP: set flag to re-read config file at next convenient time */ diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index caf7d9a82d..9d7b6065f4 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.501 2006/11/05 22:42:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.502 2006/11/21 00:49:55 tgl Exp $ * * NOTES * @@ -358,6 +358,10 @@ static void ShmemBackendArrayRemove(pid_t pid); #define StartupDataBase() StartChildProcess(BS_XLOG_STARTUP) #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER) +/* Macros to check exit status of a child process */ +#define EXIT_STATUS_0(st) ((st) == 0) +#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1) + /* * Postmaster main entry point @@ -2025,7 +2029,8 @@ reaper(SIGNAL_ARGS) if (StartupPID != 0 && pid == StartupPID) { StartupPID = 0; - if (exitstatus != 0) + /* Note: FATAL exit of startup is treated as catastrophic */ + if (!EXIT_STATUS_0(exitstatus)) { LogChildExit(LOG, _("startup process"), pid, exitstatus); @@ -2078,7 +2083,8 @@ reaper(SIGNAL_ARGS) if (BgWriterPID != 0 && pid == BgWriterPID) { BgWriterPID = 0; - if (exitstatus == 0 && Shutdown > NoShutdown && !FatalError && + if (EXIT_STATUS_0(exitstatus) && + Shutdown > NoShutdown && !FatalError && !DLGetHead(BackendList) && AutoVacPID == 0) { /* @@ -2096,7 +2102,8 @@ reaper(SIGNAL_ARGS) } /* - * Any unexpected exit of the bgwriter is treated as a crash. + * Any unexpected exit of the bgwriter (including FATAL exit) + * is treated as a crash. */ HandleChildCrash(pid, exitstatus, _("background writer process")); @@ -2104,15 +2111,16 @@ reaper(SIGNAL_ARGS) } /* - * Was it the autovacuum process? Normal exit can be ignored; we'll - * start a new one at the next iteration of the postmaster's main - * loop, if necessary. An unexpected exit is treated as a crash. + * Was it the autovacuum process? Normal or FATAL exit can be + * ignored; we'll start a new one at the next iteration of the + * postmaster's main loop, if necessary. Any other exit condition + * is treated as a crash. */ if (AutoVacPID != 0 && pid == AutoVacPID) { AutoVacPID = 0; autovac_stopped(); - if (exitstatus != 0) + if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) HandleChildCrash(pid, exitstatus, _("autovacuum process")); continue; @@ -2126,7 +2134,7 @@ reaper(SIGNAL_ARGS) if (PgArchPID != 0 && pid == PgArchPID) { PgArchPID = 0; - if (exitstatus != 0) + if (!EXIT_STATUS_0(exitstatus)) LogChildExit(LOG, _("archiver process"), pid, exitstatus); if (XLogArchivingActive() && @@ -2143,7 +2151,7 @@ reaper(SIGNAL_ARGS) if (PgStatPID != 0 && pid == PgStatPID) { PgStatPID = 0; - if (exitstatus != 0) + if (!EXIT_STATUS_0(exitstatus)) LogChildExit(LOG, _("statistics collector process"), pid, exitstatus); if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown) @@ -2157,7 +2165,7 @@ reaper(SIGNAL_ARGS) SysLoggerPID = 0; /* for safety's sake, launch new logger *first* */ SysLoggerPID = SysLogger_Start(); - if (exitstatus != 0) + if (!EXIT_STATUS_0(exitstatus)) LogChildExit(LOG, _("system logger process"), pid, exitstatus); continue; @@ -2229,12 +2237,12 @@ CleanupBackend(int pid, LogChildExit(DEBUG2, _("server process"), pid, exitstatus); /* - * If a backend dies in an ugly way (i.e. exit status not 0) then we must - * signal all other backends to quickdie. If exit status is zero we - * assume everything is hunky dory and simply remove the backend from the + * If a backend dies in an ugly way then we must signal all other backends + * to quickdie. If exit status is zero (normal) or one (FATAL exit), we + * assume everything is all right and simply remove the backend from the * active backend list. */ - if (exitstatus != 0) + if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) { HandleChildCrash(pid, exitstatus, _("server process")); return; diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index d019864fea..8b014c887f 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.516 2006/10/19 19:52:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.517 2006/11/21 00:49:55 tgl Exp $ * * NOTES * this is the "main" module of the postgres backend and @@ -2327,12 +2327,12 @@ quickdie(SIGNAL_ARGS) * corrupted, so we don't want to try to clean up our transaction. Just * nail the windows shut and get out of town. * - * Note we do exit(1) not exit(0). This is to force the postmaster into a + * Note we do exit(2) not exit(0). This is to force the postmaster into a * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random * backend. This is necessary precisely because we don't clean up our * shared memory state. */ - exit(1); + exit(2); } /* @@ -2374,7 +2374,7 @@ die(SIGNAL_ARGS) /* * Timeout or shutdown signal from postmaster during client authentication. - * Simply exit(0). + * Simply exit(1). * * XXX: possible future improvement: try to send a message indicating * why we are disconnecting. Problem is to be sure we don't block while @@ -2383,7 +2383,7 @@ die(SIGNAL_ARGS) void authdie(SIGNAL_ARGS) { - exit(0); + exit(1); } /* diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 61c413b4be..6b6a9c51cf 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -42,7 +42,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.175 2006/10/01 22:08:18 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.176 2006/11/21 00:49:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -421,25 +421,23 @@ errfinish(int dummy,...) * fflush here is just to improve the odds that we get to see the * error message, in case things are so hosed that proc_exit crashes. * Any other code you might be tempted to add here should probably be - * in an on_proc_exit callback instead. + * in an on_proc_exit or on_shmem_exit callback instead. */ fflush(stdout); fflush(stderr); /* - * If proc_exit is already running, we exit with nonzero exit code to - * indicate that something's pretty wrong. We also want to exit with - * nonzero exit code if not running under the postmaster (for example, - * if we are being run from the initdb script, we'd better return an - * error status). + * Do normal process-exit cleanup, then return exit code 1 to indicate + * FATAL termination. The postmaster may or may not consider this + * worthy of panic, depending on which subprocess returns it. */ - proc_exit(proc_exit_inprogress || !IsUnderPostmaster); + proc_exit(1); } if (elevel >= PANIC) { /* - * Serious crash time. Postmaster will observe nonzero process exit + * Serious crash time. Postmaster will observe SIGABRT process exit * status and kill the other backends too. * * XXX: what if we are *in* the postmaster? abort() won't kill our