Adjust elog.c so that elog(FATAL) exits (including cases where ERROR is
promoted to FATAL) end in exit(1) not exit(0). Then change the postmaster to allow exit(1) without a system-wide panic, but not for the startup subprocess or the bgwriter. There were a couple of places that were using exit(1) to deliberately force a system-wide panic; adjust these to be exit(2) instead. This fixes the problem noted back in July that if the startup process exits with elog(ERROR), the postmaster would think everything is hunky-dory and proceed to start up. Alternative solutions such as trying to run the entire startup process as a critical section seem less clean, primarily because of the fact that a fair amount of startup code is shared by all postmaster children in the EXEC_BACKEND case. We'd need an ugly special case somewhere near the head of main.c to make it work if it's the child process's responsibility to determine what happens; and what's the point when the postmaster already treats different children differently?
This commit is contained in:
parent
778bb7b60d
commit
e82d9e6283
|
@ -8,7 +8,7 @@
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.225 2006/10/04 00:29:49 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.226 2006/11/21 00:49:54 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -421,15 +421,8 @@ BootstrapMain(int argc, char *argv[])
|
||||||
case BS_XLOG_STARTUP:
|
case BS_XLOG_STARTUP:
|
||||||
bootstrap_signals();
|
bootstrap_signals();
|
||||||
StartupXLOG();
|
StartupXLOG();
|
||||||
|
|
||||||
/*
|
|
||||||
* These next two functions don't consider themselves critical,
|
|
||||||
* but we'd best PANIC anyway if they fail.
|
|
||||||
*/
|
|
||||||
START_CRIT_SECTION();
|
|
||||||
LoadFreeSpaceMap();
|
LoadFreeSpaceMap();
|
||||||
BuildFlatFiles(false);
|
BuildFlatFiles(false);
|
||||||
END_CRIT_SECTION();
|
|
||||||
proc_exit(0); /* startup done */
|
proc_exit(0); /* startup done */
|
||||||
|
|
||||||
case BS_XLOG_BGWRITER:
|
case BS_XLOG_BGWRITER:
|
||||||
|
|
|
@ -37,7 +37,7 @@
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.29 2006/10/06 17:13:59 petere Exp $
|
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.30 2006/11/21 00:49:55 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -503,12 +503,12 @@ bg_quickdie(SIGNAL_ARGS)
|
||||||
* corrupted, so we don't want to try to clean up our transaction. Just
|
* corrupted, so we don't want to try to clean up our transaction. Just
|
||||||
* nail the windows shut and get out of town.
|
* nail the windows shut and get out of town.
|
||||||
*
|
*
|
||||||
* Note we do exit(1) not exit(0). This is to force the postmaster into a
|
* Note we do exit(2) not exit(0). This is to force the postmaster into a
|
||||||
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
|
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
|
||||||
* backend. This is necessary precisely because we don't clean up our
|
* backend. This is necessary precisely because we don't clean up our
|
||||||
* shared memory state.
|
* shared memory state.
|
||||||
*/
|
*/
|
||||||
exit(1);
|
exit(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* SIGHUP: set flag to re-read config file at next convenient time */
|
/* SIGHUP: set flag to re-read config file at next convenient time */
|
||||||
|
|
|
@ -37,7 +37,7 @@
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.501 2006/11/05 22:42:09 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.502 2006/11/21 00:49:55 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
*
|
*
|
||||||
|
@ -358,6 +358,10 @@ static void ShmemBackendArrayRemove(pid_t pid);
|
||||||
#define StartupDataBase() StartChildProcess(BS_XLOG_STARTUP)
|
#define StartupDataBase() StartChildProcess(BS_XLOG_STARTUP)
|
||||||
#define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
|
#define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
|
||||||
|
|
||||||
|
/* Macros to check exit status of a child process */
|
||||||
|
#define EXIT_STATUS_0(st) ((st) == 0)
|
||||||
|
#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Postmaster main entry point
|
* Postmaster main entry point
|
||||||
|
@ -2025,7 +2029,8 @@ reaper(SIGNAL_ARGS)
|
||||||
if (StartupPID != 0 && pid == StartupPID)
|
if (StartupPID != 0 && pid == StartupPID)
|
||||||
{
|
{
|
||||||
StartupPID = 0;
|
StartupPID = 0;
|
||||||
if (exitstatus != 0)
|
/* Note: FATAL exit of startup is treated as catastrophic */
|
||||||
|
if (!EXIT_STATUS_0(exitstatus))
|
||||||
{
|
{
|
||||||
LogChildExit(LOG, _("startup process"),
|
LogChildExit(LOG, _("startup process"),
|
||||||
pid, exitstatus);
|
pid, exitstatus);
|
||||||
|
@ -2078,7 +2083,8 @@ reaper(SIGNAL_ARGS)
|
||||||
if (BgWriterPID != 0 && pid == BgWriterPID)
|
if (BgWriterPID != 0 && pid == BgWriterPID)
|
||||||
{
|
{
|
||||||
BgWriterPID = 0;
|
BgWriterPID = 0;
|
||||||
if (exitstatus == 0 && Shutdown > NoShutdown && !FatalError &&
|
if (EXIT_STATUS_0(exitstatus) &&
|
||||||
|
Shutdown > NoShutdown && !FatalError &&
|
||||||
!DLGetHead(BackendList) && AutoVacPID == 0)
|
!DLGetHead(BackendList) && AutoVacPID == 0)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -2096,7 +2102,8 @@ reaper(SIGNAL_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Any unexpected exit of the bgwriter is treated as a crash.
|
* Any unexpected exit of the bgwriter (including FATAL exit)
|
||||||
|
* is treated as a crash.
|
||||||
*/
|
*/
|
||||||
HandleChildCrash(pid, exitstatus,
|
HandleChildCrash(pid, exitstatus,
|
||||||
_("background writer process"));
|
_("background writer process"));
|
||||||
|
@ -2104,15 +2111,16 @@ reaper(SIGNAL_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Was it the autovacuum process? Normal exit can be ignored; we'll
|
* Was it the autovacuum process? Normal or FATAL exit can be
|
||||||
* start a new one at the next iteration of the postmaster's main
|
* ignored; we'll start a new one at the next iteration of the
|
||||||
* loop, if necessary. An unexpected exit is treated as a crash.
|
* postmaster's main loop, if necessary. Any other exit condition
|
||||||
|
* is treated as a crash.
|
||||||
*/
|
*/
|
||||||
if (AutoVacPID != 0 && pid == AutoVacPID)
|
if (AutoVacPID != 0 && pid == AutoVacPID)
|
||||||
{
|
{
|
||||||
AutoVacPID = 0;
|
AutoVacPID = 0;
|
||||||
autovac_stopped();
|
autovac_stopped();
|
||||||
if (exitstatus != 0)
|
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
|
||||||
HandleChildCrash(pid, exitstatus,
|
HandleChildCrash(pid, exitstatus,
|
||||||
_("autovacuum process"));
|
_("autovacuum process"));
|
||||||
continue;
|
continue;
|
||||||
|
@ -2126,7 +2134,7 @@ reaper(SIGNAL_ARGS)
|
||||||
if (PgArchPID != 0 && pid == PgArchPID)
|
if (PgArchPID != 0 && pid == PgArchPID)
|
||||||
{
|
{
|
||||||
PgArchPID = 0;
|
PgArchPID = 0;
|
||||||
if (exitstatus != 0)
|
if (!EXIT_STATUS_0(exitstatus))
|
||||||
LogChildExit(LOG, _("archiver process"),
|
LogChildExit(LOG, _("archiver process"),
|
||||||
pid, exitstatus);
|
pid, exitstatus);
|
||||||
if (XLogArchivingActive() &&
|
if (XLogArchivingActive() &&
|
||||||
|
@ -2143,7 +2151,7 @@ reaper(SIGNAL_ARGS)
|
||||||
if (PgStatPID != 0 && pid == PgStatPID)
|
if (PgStatPID != 0 && pid == PgStatPID)
|
||||||
{
|
{
|
||||||
PgStatPID = 0;
|
PgStatPID = 0;
|
||||||
if (exitstatus != 0)
|
if (!EXIT_STATUS_0(exitstatus))
|
||||||
LogChildExit(LOG, _("statistics collector process"),
|
LogChildExit(LOG, _("statistics collector process"),
|
||||||
pid, exitstatus);
|
pid, exitstatus);
|
||||||
if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
|
if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
|
||||||
|
@ -2157,7 +2165,7 @@ reaper(SIGNAL_ARGS)
|
||||||
SysLoggerPID = 0;
|
SysLoggerPID = 0;
|
||||||
/* for safety's sake, launch new logger *first* */
|
/* for safety's sake, launch new logger *first* */
|
||||||
SysLoggerPID = SysLogger_Start();
|
SysLoggerPID = SysLogger_Start();
|
||||||
if (exitstatus != 0)
|
if (!EXIT_STATUS_0(exitstatus))
|
||||||
LogChildExit(LOG, _("system logger process"),
|
LogChildExit(LOG, _("system logger process"),
|
||||||
pid, exitstatus);
|
pid, exitstatus);
|
||||||
continue;
|
continue;
|
||||||
|
@ -2229,12 +2237,12 @@ CleanupBackend(int pid,
|
||||||
LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
|
LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If a backend dies in an ugly way (i.e. exit status not 0) then we must
|
* If a backend dies in an ugly way then we must signal all other backends
|
||||||
* signal all other backends to quickdie. If exit status is zero we
|
* to quickdie. If exit status is zero (normal) or one (FATAL exit), we
|
||||||
* assume everything is hunky dory and simply remove the backend from the
|
* assume everything is all right and simply remove the backend from the
|
||||||
* active backend list.
|
* active backend list.
|
||||||
*/
|
*/
|
||||||
if (exitstatus != 0)
|
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
|
||||||
{
|
{
|
||||||
HandleChildCrash(pid, exitstatus, _("server process"));
|
HandleChildCrash(pid, exitstatus, _("server process"));
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.516 2006/10/19 19:52:22 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.517 2006/11/21 00:49:55 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* this is the "main" module of the postgres backend and
|
* this is the "main" module of the postgres backend and
|
||||||
|
@ -2327,12 +2327,12 @@ quickdie(SIGNAL_ARGS)
|
||||||
* corrupted, so we don't want to try to clean up our transaction. Just
|
* corrupted, so we don't want to try to clean up our transaction. Just
|
||||||
* nail the windows shut and get out of town.
|
* nail the windows shut and get out of town.
|
||||||
*
|
*
|
||||||
* Note we do exit(1) not exit(0). This is to force the postmaster into a
|
* Note we do exit(2) not exit(0). This is to force the postmaster into a
|
||||||
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
|
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
|
||||||
* backend. This is necessary precisely because we don't clean up our
|
* backend. This is necessary precisely because we don't clean up our
|
||||||
* shared memory state.
|
* shared memory state.
|
||||||
*/
|
*/
|
||||||
exit(1);
|
exit(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2374,7 +2374,7 @@ die(SIGNAL_ARGS)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Timeout or shutdown signal from postmaster during client authentication.
|
* Timeout or shutdown signal from postmaster during client authentication.
|
||||||
* Simply exit(0).
|
* Simply exit(1).
|
||||||
*
|
*
|
||||||
* XXX: possible future improvement: try to send a message indicating
|
* XXX: possible future improvement: try to send a message indicating
|
||||||
* why we are disconnecting. Problem is to be sure we don't block while
|
* why we are disconnecting. Problem is to be sure we don't block while
|
||||||
|
@ -2383,7 +2383,7 @@ die(SIGNAL_ARGS)
|
||||||
void
|
void
|
||||||
authdie(SIGNAL_ARGS)
|
authdie(SIGNAL_ARGS)
|
||||||
{
|
{
|
||||||
exit(0);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.175 2006/10/01 22:08:18 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.176 2006/11/21 00:49:55 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -421,25 +421,23 @@ errfinish(int dummy,...)
|
||||||
* fflush here is just to improve the odds that we get to see the
|
* fflush here is just to improve the odds that we get to see the
|
||||||
* error message, in case things are so hosed that proc_exit crashes.
|
* error message, in case things are so hosed that proc_exit crashes.
|
||||||
* Any other code you might be tempted to add here should probably be
|
* Any other code you might be tempted to add here should probably be
|
||||||
* in an on_proc_exit callback instead.
|
* in an on_proc_exit or on_shmem_exit callback instead.
|
||||||
*/
|
*/
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If proc_exit is already running, we exit with nonzero exit code to
|
* Do normal process-exit cleanup, then return exit code 1 to indicate
|
||||||
* indicate that something's pretty wrong. We also want to exit with
|
* FATAL termination. The postmaster may or may not consider this
|
||||||
* nonzero exit code if not running under the postmaster (for example,
|
* worthy of panic, depending on which subprocess returns it.
|
||||||
* if we are being run from the initdb script, we'd better return an
|
|
||||||
* error status).
|
|
||||||
*/
|
*/
|
||||||
proc_exit(proc_exit_inprogress || !IsUnderPostmaster);
|
proc_exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (elevel >= PANIC)
|
if (elevel >= PANIC)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Serious crash time. Postmaster will observe nonzero process exit
|
* Serious crash time. Postmaster will observe SIGABRT process exit
|
||||||
* status and kill the other backends too.
|
* status and kill the other backends too.
|
||||||
*
|
*
|
||||||
* XXX: what if we are *in* the postmaster? abort() won't kill our
|
* XXX: what if we are *in* the postmaster? abort() won't kill our
|
||||||
|
|
Loading…
Reference in New Issue