diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index 5bb0bc3961..a6f2140e3a 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -159,7 +159,9 @@ static void free_readfile(char **optlines); static pgpid_t start_postmaster(void); static void read_post_opts(void); -static WaitPMResult wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint); +static WaitPMResult wait_for_postmaster_start(pgpid_t pm_pid, bool do_checkpoint); +static bool wait_for_postmaster_stop(void); +static bool wait_for_postmaster_promote(void); static bool postmaster_is_alive(pid_t pid); #if defined(HAVE_GETRLIMIT) && defined(RLIMIT_CORE) @@ -590,7 +592,7 @@ start_postmaster(void) * manager checkpoint, it's got nothing to do with database checkpoints!! */ static WaitPMResult -wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint) +wait_for_postmaster_start(pgpid_t pm_pid, bool do_checkpoint) { int i; @@ -700,6 +702,76 @@ wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint) } +/* + * Wait for the postmaster to stop. + * + * Returns true if the postmaster stopped cleanly (i.e., removed its pidfile). + * Returns false if the postmaster dies uncleanly, or if we time out. + */ +static bool +wait_for_postmaster_stop(void) +{ + int cnt; + + for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) + { + pgpid_t pid; + + if ((pid = get_pgpid(false)) == 0) + return true; /* pid file is gone */ + + if (kill((pid_t) pid, 0) != 0) + { + /* + * Postmaster seems to have died. Check the pid file once more to + * avoid a race condition, but give up waiting. + */ + if (get_pgpid(false) == 0) + return true; /* pid file is gone */ + return false; /* postmaster died untimely */ + } + + if (cnt % WAITS_PER_SEC == 0) + print_msg("."); + pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); + } + return false; /* timeout reached */ +} + + +/* + * Wait for the postmaster to promote. + * + * Returns true on success, else false. + * To avoid waiting uselessly, we check for postmaster death here too. + */ +static bool +wait_for_postmaster_promote(void) +{ + int cnt; + + for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) + { + pgpid_t pid; + DBState state; + + if ((pid = get_pgpid(false)) == 0) + return false; /* pid file is gone */ + if (kill((pid_t) pid, 0) != 0) + return false; /* postmaster died */ + + state = get_control_dbstate(); + if (state == DB_IN_PRODUCTION) + return true; /* successful promotion */ + + if (cnt % WAITS_PER_SEC == 0) + print_msg("."); + pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); + } + return false; /* timeout reached */ +} + + #if defined(HAVE_GETRLIMIT) && defined(RLIMIT_CORE) static void unlimit_core_size(void) @@ -915,7 +987,7 @@ do_start(void) print_msg(_("waiting for server to start...")); - switch (wait_for_postmaster(pm_pid, false)) + switch (wait_for_postmaster_start(pm_pid, false)) { case POSTMASTER_READY: print_msg(_(" done\n")); @@ -950,7 +1022,6 @@ do_start(void) static void do_stop(void) { - int cnt; pgpid_t pid; struct stat statbuf; @@ -1001,19 +1072,7 @@ do_stop(void) print_msg(_("waiting for server to shut down...")); - for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) - { - if ((pid = get_pgpid(false)) != 0) - { - if (cnt % WAITS_PER_SEC == 0) - print_msg("."); - pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); - } - else - break; - } - - if (pid != 0) /* pid file still exists */ + if (!wait_for_postmaster_stop()) { print_msg(_(" failed\n")); @@ -1037,7 +1096,6 @@ do_stop(void) static void do_restart(void) { - int cnt; pgpid_t pid; struct stat statbuf; @@ -1091,20 +1149,7 @@ do_restart(void) print_msg(_("waiting for server to shut down...")); /* always wait for restart */ - - for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) - { - if ((pid = get_pgpid(false)) != 0) - { - if (cnt % WAITS_PER_SEC == 0) - print_msg("."); - pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); - } - else - break; - } - - if (pid != 0) /* pid file still exists */ + if (!wait_for_postmaster_stop()) { print_msg(_(" failed\n")); @@ -1229,21 +1274,8 @@ do_promote(void) if (do_wait) { - DBState state = DB_STARTUP; - int cnt; - print_msg(_("waiting for server to promote...")); - for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) - { - state = get_control_dbstate(); - if (state == DB_IN_PRODUCTION) - break; - - if (cnt % WAITS_PER_SEC == 0) - print_msg("."); - pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); - } - if (state == DB_IN_PRODUCTION) + if (wait_for_postmaster_promote()) { print_msg(_(" done\n")); print_msg(_("server promoted\n")); @@ -1688,7 +1720,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv) if (do_wait) { write_eventlog(EVENTLOG_INFORMATION_TYPE, _("Waiting for server startup...\n")); - if (wait_for_postmaster(postmasterPID, true) != POSTMASTER_READY) + if (wait_for_postmaster_start(postmasterPID, true) != POSTMASTER_READY) { write_eventlog(EVENTLOG_ERROR_TYPE, _("Timed out waiting for server startup\n")); pgwin32_SetServiceStatus(SERVICE_STOPPED); @@ -1709,7 +1741,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv) { /* * status.dwCheckPoint can be incremented by - * wait_for_postmaster(), so it might not start from 0. + * wait_for_postmaster_start(), so it might not start from 0. */ int maxShutdownCheckPoint = status.dwCheckPoint + 12;