From bf2617981ceb88bdcd3cba58308d68f0c4ea3de2 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Sat, 5 Oct 2013 23:24:50 -0300 Subject: [PATCH] Fix various bugs in postmaster SIGKILL processing Clamp the minimum sleep time during immediate shutdown or crash to a minimum of zero, not a maximum of one second. The previous code could result in a negative sleep time, leading to failure in select() calls. Also, on crash recovery, reset AbortStartTime as soon as SIGKILL is sent or abort processing has commenced instead of waiting until the startup process completes. Reset AbortStartTime as soon as SIGKILL is sent, too, to avoid doing that repeatedly. Per trouble report from Jeff Janes on CAMkU=1xd3=wFqZwwuXPWe4BQs3h1seYo8LV9JtSjW5RodoPxMg@mail.gmail.com Author: MauMau --- src/backend/postmaster/postmaster.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 01d2618ebd..9f721b7651 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -1422,9 +1422,9 @@ DetermineSleepTime(struct timeval * timeout) { if (AbortStartTime > 0) { - /* remaining time, but at least 1 second */ - timeout->tv_sec = Min(SIGKILL_CHILDREN_AFTER_SECS - - (time(NULL) - AbortStartTime), 1); + /* time left to abort; clamp to 0 in case it already expired */ + timeout->tv_sec = Max(SIGKILL_CHILDREN_AFTER_SECS - + (time(NULL) - AbortStartTime), 0); timeout->tv_usec = 0; } else @@ -1676,10 +1676,13 @@ ServerLoop(void) * Note we also do this during recovery from a process crash. */ if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) && + AbortStartTime > 0 && now - AbortStartTime >= SIGKILL_CHILDREN_AFTER_SECS) { /* We were gentle with them before. Not anymore */ TerminateChildren(SIGKILL); + /* reset flag so we don't SIGKILL again */ + AbortStartTime = 0; /* * Additionally, unless we're recovering from a process crash, it's @@ -2584,7 +2587,7 @@ reaper(SIGNAL_ARGS) * Startup succeeded, commence normal operations */ FatalError = false; - AbortStartTime = 0; + Assert(AbortStartTime == 0); ReachedNormalRunning = true; pmState = PM_RUN; @@ -3544,6 +3547,8 @@ PostmasterStateMachine(void) StartupPID = StartupDataBase(); Assert(StartupPID != 0); pmState = PM_STARTUP; + /* crash recovery started, reset SIGKILL flag */ + AbortStartTime = 0; } } @@ -4737,7 +4742,7 @@ sigusr1_handler(SIGNAL_ARGS) { /* WAL redo has started. We're out of reinitialization. */ FatalError = false; - AbortStartTime = 0; + Assert(AbortStartTime == 0); /* * Crank up the background tasks. It doesn't matter if this fails,