diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index 070072299f..3c182c97d4 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -155,7 +155,9 @@ static void free_readfile(char **optlines); static pgpid_t start_postmaster(void); static void read_post_opts(void); -static WaitPMResult wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint); +static WaitPMResult wait_for_postmaster_start(pgpid_t pm_pid, bool do_checkpoint); +static bool wait_for_postmaster_stop(void); +static bool wait_for_postmaster_promote(void); static bool postmaster_is_alive(pid_t pid); #if defined(HAVE_GETRLIMIT) && defined(RLIMIT_CORE) @@ -594,7 +596,7 @@ start_postmaster(void) * manager checkpoint, it's got nothing to do with database checkpoints!! */ static WaitPMResult -wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint) +wait_for_postmaster_start(pgpid_t pm_pid, bool do_checkpoint) { int i; @@ -704,6 +706,76 @@ wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint) } +/* + * Wait for the postmaster to stop. + * + * Returns true if the postmaster stopped cleanly (i.e., removed its pidfile). + * Returns false if the postmaster dies uncleanly, or if we time out. + */ +static bool +wait_for_postmaster_stop(void) +{ + int cnt; + + for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) + { + pgpid_t pid; + + if ((pid = get_pgpid(false)) == 0) + return true; /* pid file is gone */ + + if (kill((pid_t) pid, 0) != 0) + { + /* + * Postmaster seems to have died. Check the pid file once more to + * avoid a race condition, but give up waiting. + */ + if (get_pgpid(false) == 0) + return true; /* pid file is gone */ + return false; /* postmaster died untimely */ + } + + if (cnt % WAITS_PER_SEC == 0) + print_msg("."); + pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); + } + return false; /* timeout reached */ +} + + +/* + * Wait for the postmaster to promote. + * + * Returns true on success, else false. + * To avoid waiting uselessly, we check for postmaster death here too. + */ +static bool +wait_for_postmaster_promote(void) +{ + int cnt; + + for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) + { + pgpid_t pid; + DBState state; + + if ((pid = get_pgpid(false)) == 0) + return false; /* pid file is gone */ + if (kill((pid_t) pid, 0) != 0) + return false; /* postmaster died */ + + state = get_control_dbstate(); + if (state == DB_IN_PRODUCTION) + return true; /* successful promotion */ + + if (cnt % WAITS_PER_SEC == 0) + print_msg("."); + pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); + } + return false; /* timeout reached */ +} + + #if defined(HAVE_GETRLIMIT) && defined(RLIMIT_CORE) static void unlimit_core_size(void) @@ -917,7 +989,7 @@ do_start(void) print_msg(_("waiting for server to start...")); - switch (wait_for_postmaster(pm_pid, false)) + switch (wait_for_postmaster_start(pm_pid, false)) { case POSTMASTER_READY: print_msg(_(" done\n")); @@ -952,7 +1024,6 @@ do_start(void) static void do_stop(void) { - int cnt; pgpid_t pid; struct stat statbuf; @@ -1003,19 +1074,7 @@ do_stop(void) print_msg(_("waiting for server to shut down...")); - for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) - { - if ((pid = get_pgpid(false)) != 0) - { - if (cnt % WAITS_PER_SEC == 0) - print_msg("."); - pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); - } - else - break; - } - - if (pid != 0) /* pid file still exists */ + if (!wait_for_postmaster_stop()) { print_msg(_(" failed\n")); @@ -1039,7 +1098,6 @@ do_stop(void) static void do_restart(void) { - int cnt; pgpid_t pid; struct stat statbuf; @@ -1093,20 +1151,7 @@ do_restart(void) print_msg(_("waiting for server to shut down...")); /* always wait for restart */ - - for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) - { - if ((pid = get_pgpid(false)) != 0) - { - if (cnt % WAITS_PER_SEC == 0) - print_msg("."); - pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); - } - else - break; - } - - if (pid != 0) /* pid file still exists */ + if (!wait_for_postmaster_stop()) { print_msg(_(" failed\n")); @@ -1226,21 +1271,8 @@ do_promote(void) if (do_wait) { - DBState state = DB_STARTUP; - int cnt; - print_msg(_("waiting for server to promote...")); - for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) - { - state = get_control_dbstate(); - if (state == DB_IN_PRODUCTION) - break; - - if (cnt % WAITS_PER_SEC == 0) - print_msg("."); - pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); - } - if (state == DB_IN_PRODUCTION) + if (wait_for_postmaster_promote()) { print_msg(_(" done\n")); print_msg(_("server promoted\n")); @@ -1659,7 +1691,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv) if (do_wait) { write_eventlog(EVENTLOG_INFORMATION_TYPE, _("Waiting for server startup...\n")); - if (wait_for_postmaster(postmasterPID, true) != POSTMASTER_READY) + if (wait_for_postmaster_start(postmasterPID, true) != POSTMASTER_READY) { write_eventlog(EVENTLOG_ERROR_TYPE, _("Timed out waiting for server startup\n")); pgwin32_SetServiceStatus(SERVICE_STOPPED); @@ -1680,7 +1712,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv) { /* * status.dwCheckPoint can be incremented by - * wait_for_postmaster(), so it might not start from 0. + * wait_for_postmaster_start(), so it might not start from 0. */ int maxShutdownCheckPoint = status.dwCheckPoint + 12;