diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index 8078a38ed7..64c9722993 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -394,6 +394,27 @@ BackgroundWorkerStopNotifications(pid_t pid) } } +/* + * Reset background worker crash state. + * + * We assume that, after a crash-and-restart cycle, background workers should + * be restarted immediately, instead of waiting for bgw_restart_time to + * elapse. + */ +void +ResetBackgroundWorkerCrashTimes(void) +{ + slist_mutable_iter iter; + + slist_foreach_modify(iter, &BackgroundWorkerList) + { + RegisteredBgWorker *rw; + + rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); + rw->rw_crashed_at = 0; + } +} + #ifdef EXEC_BACKEND /* * In EXEC_BACKEND mode, workers use this to retrieve their details from @@ -478,13 +499,14 @@ bgworker_quickdie(SIGNAL_ARGS) on_exit_reset(); /* - * Note we do exit(0) here, not exit(2) like quickdie. The reason is that - * we don't want to be seen this worker as independently crashed, because - * then postmaster would delay restarting it again afterwards. If some - * idiot DBA manually sends SIGQUIT to a random bgworker, the "dead man - * switch" will ensure that postmaster sees this as a crash. + * Note we do exit(2) not exit(0). This is to force the postmaster into a + * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random + * backend. This is necessary precisely because we don't clean up our + * shared memory state. (The "dead man switch" mechanism in pmsignal.c + * should ensure the postmaster sees this as a crash, too, but no harm in + * being doubly sure.) */ - exit(0); + exit(2); } /* diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 0c6a4271a6..ec1a59d061 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2616,7 +2616,7 @@ reaper(SIGNAL_ARGS) if (PgStatPID == 0) PgStatPID = pgstat_start(); - /* some workers may be scheduled to start now */ + /* workers may be scheduled to start now */ maybe_start_bgworker(); /* at this point we are really open for business */ @@ -2860,7 +2860,6 @@ CleanupBackgroundWorker(int pid, { if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) { - rw->rw_crashed_at = GetCurrentTimestamp(); HandleChildCrash(pid, exitstatus, namebuf); return true; } @@ -2871,7 +2870,6 @@ CleanupBackgroundWorker(int pid, * Uh-oh, the child failed to clean itself up. Treat as a * crash after all. */ - rw->rw_crashed_at = GetCurrentTimestamp(); HandleChildCrash(pid, exitstatus, namebuf); return true; } @@ -3546,6 +3544,9 @@ PostmasterStateMachine(void) ereport(LOG, (errmsg("all server processes terminated; reinitializing"))); + /* allow background workers to immediately restart */ + ResetBackgroundWorkerCrashTimes(); + shmem_exit(1); reset_shared(PostPortNumber); diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h index 55401860d8..c2518a6c8d 100644 --- a/src/include/postmaster/bgworker_internals.h +++ b/src/include/postmaster/bgworker_internals.h @@ -43,6 +43,7 @@ extern void BackgroundWorkerStateChange(void); extern void ForgetBackgroundWorker(slist_mutable_iter *cur); extern void ReportBackgroundWorkerPID(RegisteredBgWorker *); extern void BackgroundWorkerStopNotifications(pid_t pid); +extern void ResetBackgroundWorkerCrashTimes(void); /* Function to start a background worker, called from postmaster.c */ extern void StartBackgroundWorker(void);