From be7558162acc5578d0b2cf0c8d4c76b6076ce352 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 7 May 2014 17:43:39 -0400 Subject: [PATCH] When a background worker exists with code 0, unregister it. The previous behavior was to restart immediately, which was generally viewed as less useful. Petr Jelinek, with some adjustments by me. --- doc/src/sgml/bgworker.sgml | 14 ++++++++++---- src/backend/postmaster/bgworker.c | 4 ++-- src/backend/postmaster/postmaster.c | 8 +++++++- src/include/postmaster/bgworker.h | 8 ++++---- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/doc/src/sgml/bgworker.sgml b/doc/src/sgml/bgworker.sgml index fd32d6cb0c..d3c8ddb382 100644 --- a/doc/src/sgml/bgworker.sgml +++ b/doc/src/sgml/bgworker.sgml @@ -166,10 +166,16 @@ typedef struct BackgroundWorker - Background workers are expected to be continuously running; if they exit - cleanly, postgres will restart them immediately. Consider doing - interruptible sleep when they have nothing to do; this can be achieved by - calling WaitLatch(). Make sure the + If bgw_restart_time for a background worker is + configured as BGW_NEVER_RESTART, or if it exits with an exit + code of 0 or is terminated by TerminateBackgroundWorker, + it will be automatically unregistered by the postmaster on exit. + Otherwise, it will be restarted after the time period configured via + bgw_restart_time, or immediately if the postmaster + reinitializes the cluster due to a backend failure. Backends which need + to suspend execution only temporarily should use an interruptible sleep + rather than exiting; this can be achieved by calling + WaitLatch(). Make sure the WL_POSTMASTER_DEATH flag is set when calling that function, and verify the return code for a prompt exit in the emergency case that postgres itself has terminated. diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index 64c9722993..85a3b3a077 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -884,8 +884,8 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker, * running but is no longer. * * In the latter case, the worker may be stopped temporarily (if it is - * configured for automatic restart, or if it exited with code 0) or gone - * for good (if it is configured not to restart and exited with code 1). + * configured for automatic restart and exited non-zero) or gone for + * good (if it exited with code 0 or if it is configured not to restart). */ BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 79d1c506cc..a5d5c2dbcb 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2845,11 +2845,17 @@ CleanupBackgroundWorker(int pid, snprintf(namebuf, MAXPGPATH, "%s: %s", _("worker process"), rw->rw_worker.bgw_name); - /* Delay restarting any bgworker that exits with a nonzero status. */ if (!EXIT_STATUS_0(exitstatus)) + { + /* Record timestamp, so we know when to restart the worker. */ rw->rw_crashed_at = GetCurrentTimestamp(); + } else + { + /* Zero exit status means terminate */ rw->rw_crashed_at = 0; + rw->rw_terminate = true; + } /* * Additionally, for shared-memory-connected workers, just like a diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h index c9550cc887..a3b3d5f1a3 100644 --- a/src/include/postmaster/bgworker.h +++ b/src/include/postmaster/bgworker.h @@ -16,10 +16,10 @@ * that the failure can only be transient (fork failure due to high load, * memory pressure, too many processes, etc); more permanent problems, like * failure to connect to a database, are detected later in the worker and dealt - * with just by having the worker exit normally. A worker which exits with a - * return code of 0 will be immediately restarted by the postmaster. A worker - * which exits with a return code of 1 will be restarted after the configured - * restart interval, or never if that interval is set to BGW_NEVER_RESTART. + * with just by having the worker exit normally. A worker which exits with + * a return code of 0 will never be restarted and will be removed from worker + * list. A worker which exits with a return code of 1 will be restarted after + * the configured restart interval (unless that interval is BGW_NEVER_RESTART). * The TerminateBackgroundWorker() function can be used to terminate a * dynamically registered background worker; the worker will be sent a SIGTERM * and will not be restarted after it exits. Whenever the postmaster knows