diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 7dada6b67f..47059d303c 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1362,11 +1362,11 @@ echo -1000 > /proc/self/oom_score_adj This is the Immediate Shutdown mode. - The master postgres process will send a - SIGQUIT to all child processes and exit - immediately, without properly shutting itself down. The child processes - likewise exit immediately upon receiving - SIGQUIT. This will lead to recovery (by + The server will send SIGQUIT to all child + processes and wait for them to terminate. Those that don't terminate + within 5 seconds, will be sent SIGKILL by the + master postgres process, which will then terminate + without further waiting. This will lead to recovery (by replaying the WAL log) upon next start-up. This is recommended only in emergencies. diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 87e6062139..64467cb53c 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -275,6 +275,7 @@ static pid_t StartupPID = 0, #define NoShutdown 0 #define SmartShutdown 1 #define FastShutdown 2 +#define ImmediateShutdown 3 static int Shutdown = NoShutdown; @@ -345,6 +346,10 @@ typedef enum static PMState pmState = PM_INIT; +/* Start time of abort processing at immediate shutdown or child crash */ +static time_t AbortStartTime; +#define SIGKILL_CHILDREN_AFTER_SECS 5 + static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */ bool ClientAuthInProgress = false; /* T during new-client @@ -421,6 +426,7 @@ static void RandomSalt(char *md5Salt); static void signal_child(pid_t pid, int signal); static bool SignalSomeChildren(int signal, int targets); static bool SignalUnconnectedWorkers(int signal); +static void TerminateChildren(int signal); #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL) @@ -1427,8 +1433,18 @@ DetermineSleepTime(struct timeval * timeout) if (Shutdown > NoShutdown || (!StartWorkerNeeded && !HaveCrashedWorker)) { - timeout->tv_sec = 60; - timeout->tv_usec = 0; + if (AbortStartTime > 0) + { + /* remaining time, but at least 1 second */ + timeout->tv_sec = Min(SIGKILL_CHILDREN_AFTER_SECS - + (time(NULL) - AbortStartTime), 1); + timeout->tv_usec = 0; + } + else + { + timeout->tv_sec = 60; + timeout->tv_usec = 0; + } return; } @@ -1660,6 +1676,28 @@ ServerLoop(void) TouchSocketLockFiles(); last_touch_time = now; } + + /* + * If we already sent SIGQUIT to children and they are slow to shut + * down, it's time to send them SIGKILL. This doesn't happen normally, + * but under certain conditions backends can get stuck while shutting + * down. This is a last measure to get them unwedged. + * + * Note we also do this during recovery from a process crash. + */ + if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) && + now - AbortStartTime >= SIGKILL_CHILDREN_AFTER_SECS) + { + /* We were gentle with them before. Not anymore */ + TerminateChildren(SIGKILL); + + /* + * Additionally, unless we're recovering from a process crash, it's + * now the time for postmaster to abandon ship. + */ + if (!FatalError) + ExitPostmaster(1); + } } } @@ -2455,30 +2493,27 @@ pmdie(SIGNAL_ARGS) /* * Immediate Shutdown: * - * abort all children with SIGQUIT and exit without attempt to - * properly shut down data base system. + * abort all children with SIGQUIT, wait for them to exit, + * terminate remaining ones with SIGKILL, then exit without + * attempt to properly shut down the data base system. */ + if (Shutdown >= ImmediateShutdown) + break; + Shutdown = ImmediateShutdown; ereport(LOG, (errmsg("received immediate shutdown request"))); - SignalChildren(SIGQUIT); - if (StartupPID != 0) - signal_child(StartupPID, SIGQUIT); - if (BgWriterPID != 0) - signal_child(BgWriterPID, SIGQUIT); - if (CheckpointerPID != 0) - signal_child(CheckpointerPID, SIGQUIT); - if (WalWriterPID != 0) - signal_child(WalWriterPID, SIGQUIT); - if (WalReceiverPID != 0) - signal_child(WalReceiverPID, SIGQUIT); - if (AutoVacPID != 0) - signal_child(AutoVacPID, SIGQUIT); - if (PgArchPID != 0) - signal_child(PgArchPID, SIGQUIT); - if (PgStatPID != 0) - signal_child(PgStatPID, SIGQUIT); - SignalUnconnectedWorkers(SIGQUIT); - ExitPostmaster(0); + + TerminateChildren(SIGQUIT); + pmState = PM_WAIT_BACKENDS; + + /* set stopwatch for them to die */ + AbortStartTime = time(NULL); + + /* + * Now wait for backends to exit. If there are none, + * PostmasterStateMachine will take the next step. + */ + PostmasterStateMachine(); break; } @@ -2952,12 +2987,17 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) dlist_mutable_iter iter; slist_iter siter; Backend *bp; + bool take_action; /* - * Make log entry unless there was a previous crash (if so, nonzero exit - * status is to be expected in SIGQUIT response; don't clutter log) + * We only log messages and send signals if this is the first process crash + * and we're not doing an immediate shutdown; otherwise, we're only here to + * update postmaster's idea of live processes. If we have already signalled + * children, nonzero exit status is to be expected, so don't clutter log. */ - if (!FatalError) + take_action = !FatalError && Shutdown != ImmediateShutdown; + + if (take_action) { LogChildExit(LOG, procname, pid, exitstatus); ereport(LOG, @@ -3003,7 +3043,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) * (-s on command line), then we send SIGSTOP instead, so that we * can get core dumps from all backends by hand. */ - if (!FatalError) + if (take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3055,7 +3095,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) if (bp->bkend_type == BACKEND_TYPE_BGWORKER) continue; - if (!FatalError) + if (take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3069,7 +3109,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) /* Take care of the startup process too */ if (pid == StartupPID) StartupPID = 0; - else if (StartupPID != 0 && !FatalError) + else if (StartupPID != 0 && take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3081,7 +3121,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) /* Take care of the bgwriter too */ if (pid == BgWriterPID) BgWriterPID = 0; - else if (BgWriterPID != 0 && !FatalError) + else if (BgWriterPID != 0 && take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3093,7 +3133,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) /* Take care of the checkpointer too */ if (pid == CheckpointerPID) CheckpointerPID = 0; - else if (CheckpointerPID != 0 && !FatalError) + else if (CheckpointerPID != 0 && take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3105,7 +3145,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) /* Take care of the walwriter too */ if (pid == WalWriterPID) WalWriterPID = 0; - else if (WalWriterPID != 0 && !FatalError) + else if (WalWriterPID != 0 && take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3117,7 +3157,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) /* Take care of the walreceiver too */ if (pid == WalReceiverPID) WalReceiverPID = 0; - else if (WalReceiverPID != 0 && !FatalError) + else if (WalReceiverPID != 0 && take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3129,7 +3169,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) /* Take care of the autovacuum launcher too */ if (pid == AutoVacPID) AutoVacPID = 0; - else if (AutoVacPID != 0 && !FatalError) + else if (AutoVacPID != 0 && take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3144,7 +3184,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) * simplifies the state-machine logic in the case where a shutdown request * arrives during crash processing.) */ - if (PgArchPID != 0 && !FatalError) + if (PgArchPID != 0 && take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3159,7 +3199,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) * simplifies the state-machine logic in the case where a shutdown request * arrives during crash processing.) */ - if (PgStatPID != 0 && !FatalError) + if (PgStatPID != 0 && take_action) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", @@ -3171,7 +3211,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) /* We do NOT restart the syslogger */ - FatalError = true; + if (Shutdown != ImmediateShutdown) + FatalError = true; + /* We now transit into a state of waiting for children to die */ if (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || @@ -3180,6 +3222,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) pmState == PM_WAIT_READONLY || pmState == PM_SHUTDOWN) pmState = PM_WAIT_BACKENDS; + + /* + * .. and if this doesn't happen quickly enough, now the clock is ticking + * for us to kill them without mercy. + */ + if (AbortStartTime == 0) + AbortStartTime = time(NULL); } /* @@ -3316,7 +3365,7 @@ PostmasterStateMachine(void) WalWriterPID == 0 && AutoVacPID == 0) { - if (FatalError) + if (Shutdown >= ImmediateShutdown || FatalError) { /* * Start waiting for dead_end children to die. This state @@ -3326,7 +3375,8 @@ PostmasterStateMachine(void) /* * We already SIGQUIT'd the archiver and stats processes, if - * any, when we entered FatalError state. + * any, when we started immediate shutdown or entered + * FatalError state. */ } else @@ -3511,6 +3561,7 @@ signal_child(pid_t pid, int signal) case SIGTERM: case SIGQUIT: case SIGSTOP: + case SIGKILL: if (kill(-pid, signal) < 0) elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal); break; @@ -3597,6 +3648,33 @@ SignalSomeChildren(int signal, int target) return signaled; } +/* + * Send a termination signal to children. This considers all of our children + * processes, except syslogger and dead_end backends. + */ +static void +TerminateChildren(int signal) +{ + SignalChildren(signal); + if (StartupPID != 0) + signal_child(StartupPID, signal); + if (BgWriterPID != 0) + signal_child(BgWriterPID, signal); + if (CheckpointerPID != 0) + signal_child(CheckpointerPID, signal); + if (WalWriterPID != 0) + signal_child(WalWriterPID, signal); + if (WalReceiverPID != 0) + signal_child(WalReceiverPID, signal); + if (AutoVacPID != 0) + signal_child(AutoVacPID, signal); + if (PgArchPID != 0) + signal_child(PgArchPID, signal); + if (PgStatPID != 0) + signal_child(PgStatPID, signal); + SignalUnconnectedWorkers(signal); +} + /* * BackendStartup -- start backend process * diff --git a/src/port/kill.c b/src/port/kill.c index 0a810cd225..d0f23efb17 100644 --- a/src/port/kill.c +++ b/src/port/kill.c @@ -38,6 +38,26 @@ pgkill(int pid, int sig) errno = EINVAL; return -1; } + + /* special case for SIGKILL: just ask the system to terminate the target */ + if (sig == SIGKILL) + { + HANDLE prochandle; + + if ((prochandle = OpenProcess(PROCESS_TERMINATE, FALSE, (DWORD) pid)) == NULL) + { + errno = ESRCH; + return -1; + } + if (!TerminateProcess(prochandle, 255)) + { + _dosmaperr(GetLastError()); + CloseHandle(prochandle); + return -1; + } + CloseHandle(prochandle); + return 0; + } snprintf(pipename, sizeof(pipename), "\\\\.\\pipe\\pgsignal_%u", pid); if (CallNamedPipe(pipename, &sigData, 1, &sigRet, 1, &bytes, 1000))