Adjust elog.c so that elog(FATAL) exits (including cases where ERROR is
promoted to FATAL) end in exit(1) not exit(0). Then change the postmaster to allow exit(1) without a system-wide panic, but not for the startup subprocess or the bgwriter. There were a couple of places that were using exit(1) to deliberately force a system-wide panic; adjust these to be exit(2) instead. This fixes the problem noted back in July that if the startup process exits with elog(ERROR), the postmaster would think everything is hunky-dory and proceed to start up. Alternative solutions such as trying to run the entire startup process as a critical section seem less clean, primarily because of the fact that a fair amount of startup code is shared by all postmaster children in the EXEC_BACKEND case. We'd need an ugly special case somewhere near the head of main.c to make it work if it's the child process's responsibility to determine what happens; and what's the point when the postmaster already treats different children differently?
This commit is contained in:
parent
778bb7b60d
commit
e82d9e6283
|
@ -8,7 +8,7 @@
|
|||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.225 2006/10/04 00:29:49 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.226 2006/11/21 00:49:54 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -421,15 +421,8 @@ BootstrapMain(int argc, char *argv[])
|
|||
case BS_XLOG_STARTUP:
|
||||
bootstrap_signals();
|
||||
StartupXLOG();
|
||||
|
||||
/*
|
||||
* These next two functions don't consider themselves critical,
|
||||
* but we'd best PANIC anyway if they fail.
|
||||
*/
|
||||
START_CRIT_SECTION();
|
||||
LoadFreeSpaceMap();
|
||||
BuildFlatFiles(false);
|
||||
END_CRIT_SECTION();
|
||||
proc_exit(0); /* startup done */
|
||||
|
||||
case BS_XLOG_BGWRITER:
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.29 2006/10/06 17:13:59 petere Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.30 2006/11/21 00:49:55 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -503,12 +503,12 @@ bg_quickdie(SIGNAL_ARGS)
|
|||
* corrupted, so we don't want to try to clean up our transaction. Just
|
||||
* nail the windows shut and get out of town.
|
||||
*
|
||||
* Note we do exit(1) not exit(0). This is to force the postmaster into a
|
||||
* Note we do exit(2) not exit(0). This is to force the postmaster into a
|
||||
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
|
||||
* backend. This is necessary precisely because we don't clean up our
|
||||
* shared memory state.
|
||||
*/
|
||||
exit(1);
|
||||
exit(2);
|
||||
}
|
||||
|
||||
/* SIGHUP: set flag to re-read config file at next convenient time */
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.501 2006/11/05 22:42:09 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.502 2006/11/21 00:49:55 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
*
|
||||
|
@ -358,6 +358,10 @@ static void ShmemBackendArrayRemove(pid_t pid);
|
|||
#define StartupDataBase() StartChildProcess(BS_XLOG_STARTUP)
|
||||
#define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
|
||||
|
||||
/* Macros to check exit status of a child process */
|
||||
#define EXIT_STATUS_0(st) ((st) == 0)
|
||||
#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
|
||||
|
||||
|
||||
/*
|
||||
* Postmaster main entry point
|
||||
|
@ -2025,7 +2029,8 @@ reaper(SIGNAL_ARGS)
|
|||
if (StartupPID != 0 && pid == StartupPID)
|
||||
{
|
||||
StartupPID = 0;
|
||||
if (exitstatus != 0)
|
||||
/* Note: FATAL exit of startup is treated as catastrophic */
|
||||
if (!EXIT_STATUS_0(exitstatus))
|
||||
{
|
||||
LogChildExit(LOG, _("startup process"),
|
||||
pid, exitstatus);
|
||||
|
@ -2078,7 +2083,8 @@ reaper(SIGNAL_ARGS)
|
|||
if (BgWriterPID != 0 && pid == BgWriterPID)
|
||||
{
|
||||
BgWriterPID = 0;
|
||||
if (exitstatus == 0 && Shutdown > NoShutdown && !FatalError &&
|
||||
if (EXIT_STATUS_0(exitstatus) &&
|
||||
Shutdown > NoShutdown && !FatalError &&
|
||||
!DLGetHead(BackendList) && AutoVacPID == 0)
|
||||
{
|
||||
/*
|
||||
|
@ -2096,7 +2102,8 @@ reaper(SIGNAL_ARGS)
|
|||
}
|
||||
|
||||
/*
|
||||
* Any unexpected exit of the bgwriter is treated as a crash.
|
||||
* Any unexpected exit of the bgwriter (including FATAL exit)
|
||||
* is treated as a crash.
|
||||
*/
|
||||
HandleChildCrash(pid, exitstatus,
|
||||
_("background writer process"));
|
||||
|
@ -2104,15 +2111,16 @@ reaper(SIGNAL_ARGS)
|
|||
}
|
||||
|
||||
/*
|
||||
* Was it the autovacuum process? Normal exit can be ignored; we'll
|
||||
* start a new one at the next iteration of the postmaster's main
|
||||
* loop, if necessary. An unexpected exit is treated as a crash.
|
||||
* Was it the autovacuum process? Normal or FATAL exit can be
|
||||
* ignored; we'll start a new one at the next iteration of the
|
||||
* postmaster's main loop, if necessary. Any other exit condition
|
||||
* is treated as a crash.
|
||||
*/
|
||||
if (AutoVacPID != 0 && pid == AutoVacPID)
|
||||
{
|
||||
AutoVacPID = 0;
|
||||
autovac_stopped();
|
||||
if (exitstatus != 0)
|
||||
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
|
||||
HandleChildCrash(pid, exitstatus,
|
||||
_("autovacuum process"));
|
||||
continue;
|
||||
|
@ -2126,7 +2134,7 @@ reaper(SIGNAL_ARGS)
|
|||
if (PgArchPID != 0 && pid == PgArchPID)
|
||||
{
|
||||
PgArchPID = 0;
|
||||
if (exitstatus != 0)
|
||||
if (!EXIT_STATUS_0(exitstatus))
|
||||
LogChildExit(LOG, _("archiver process"),
|
||||
pid, exitstatus);
|
||||
if (XLogArchivingActive() &&
|
||||
|
@ -2143,7 +2151,7 @@ reaper(SIGNAL_ARGS)
|
|||
if (PgStatPID != 0 && pid == PgStatPID)
|
||||
{
|
||||
PgStatPID = 0;
|
||||
if (exitstatus != 0)
|
||||
if (!EXIT_STATUS_0(exitstatus))
|
||||
LogChildExit(LOG, _("statistics collector process"),
|
||||
pid, exitstatus);
|
||||
if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
|
||||
|
@ -2157,7 +2165,7 @@ reaper(SIGNAL_ARGS)
|
|||
SysLoggerPID = 0;
|
||||
/* for safety's sake, launch new logger *first* */
|
||||
SysLoggerPID = SysLogger_Start();
|
||||
if (exitstatus != 0)
|
||||
if (!EXIT_STATUS_0(exitstatus))
|
||||
LogChildExit(LOG, _("system logger process"),
|
||||
pid, exitstatus);
|
||||
continue;
|
||||
|
@ -2229,12 +2237,12 @@ CleanupBackend(int pid,
|
|||
LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
|
||||
|
||||
/*
|
||||
* If a backend dies in an ugly way (i.e. exit status not 0) then we must
|
||||
* signal all other backends to quickdie. If exit status is zero we
|
||||
* assume everything is hunky dory and simply remove the backend from the
|
||||
* If a backend dies in an ugly way then we must signal all other backends
|
||||
* to quickdie. If exit status is zero (normal) or one (FATAL exit), we
|
||||
* assume everything is all right and simply remove the backend from the
|
||||
* active backend list.
|
||||
*/
|
||||
if (exitstatus != 0)
|
||||
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
|
||||
{
|
||||
HandleChildCrash(pid, exitstatus, _("server process"));
|
||||
return;
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.516 2006/10/19 19:52:22 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.517 2006/11/21 00:49:55 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* this is the "main" module of the postgres backend and
|
||||
|
@ -2327,12 +2327,12 @@ quickdie(SIGNAL_ARGS)
|
|||
* corrupted, so we don't want to try to clean up our transaction. Just
|
||||
* nail the windows shut and get out of town.
|
||||
*
|
||||
* Note we do exit(1) not exit(0). This is to force the postmaster into a
|
||||
* Note we do exit(2) not exit(0). This is to force the postmaster into a
|
||||
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
|
||||
* backend. This is necessary precisely because we don't clean up our
|
||||
* shared memory state.
|
||||
*/
|
||||
exit(1);
|
||||
exit(2);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2374,7 +2374,7 @@ die(SIGNAL_ARGS)
|
|||
|
||||
/*
|
||||
* Timeout or shutdown signal from postmaster during client authentication.
|
||||
* Simply exit(0).
|
||||
* Simply exit(1).
|
||||
*
|
||||
* XXX: possible future improvement: try to send a message indicating
|
||||
* why we are disconnecting. Problem is to be sure we don't block while
|
||||
|
@ -2383,7 +2383,7 @@ die(SIGNAL_ARGS)
|
|||
void
|
||||
authdie(SIGNAL_ARGS)
|
||||
{
|
||||
exit(0);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.175 2006/10/01 22:08:18 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.176 2006/11/21 00:49:55 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -421,25 +421,23 @@ errfinish(int dummy,...)
|
|||
* fflush here is just to improve the odds that we get to see the
|
||||
* error message, in case things are so hosed that proc_exit crashes.
|
||||
* Any other code you might be tempted to add here should probably be
|
||||
* in an on_proc_exit callback instead.
|
||||
* in an on_proc_exit or on_shmem_exit callback instead.
|
||||
*/
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
|
||||
/*
|
||||
* If proc_exit is already running, we exit with nonzero exit code to
|
||||
* indicate that something's pretty wrong. We also want to exit with
|
||||
* nonzero exit code if not running under the postmaster (for example,
|
||||
* if we are being run from the initdb script, we'd better return an
|
||||
* error status).
|
||||
* Do normal process-exit cleanup, then return exit code 1 to indicate
|
||||
* FATAL termination. The postmaster may or may not consider this
|
||||
* worthy of panic, depending on which subprocess returns it.
|
||||
*/
|
||||
proc_exit(proc_exit_inprogress || !IsUnderPostmaster);
|
||||
proc_exit(1);
|
||||
}
|
||||
|
||||
if (elevel >= PANIC)
|
||||
{
|
||||
/*
|
||||
* Serious crash time. Postmaster will observe nonzero process exit
|
||||
* Serious crash time. Postmaster will observe SIGABRT process exit
|
||||
* status and kill the other backends too.
|
||||
*
|
||||
* XXX: what if we are *in* the postmaster? abort() won't kill our
|
||||
|
|
Loading…
Reference in New Issue