diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 2a83671b53..80d12b26d7 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1690,6 +1690,11 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i backup: This WAL sender is sending a backup. + + + stopping: This WAL sender is stopping. + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a89d99838a..5d6f8b75b8 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8325,6 +8325,12 @@ ShutdownXLOG(int code, Datum arg) ereport(IsPostmasterEnvironment ? LOG : NOTICE, (errmsg("shutting down"))); + /* + * Wait for WAL senders to be in stopping state. This prevents commands + * from writing new WAL. + */ + WalSndWaitStopping(); + if (RecoveryInProgress()) CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); else diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 4a25ed8f5b..01f1c2805f 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2918,7 +2918,7 @@ reaper(SIGNAL_ARGS) * Waken walsenders for the last time. No regular backends * should be around anymore. */ - SignalChildren(SIGUSR2); + SignalChildren(SIGINT); pmState = PM_SHUTDOWN_2; @@ -3656,7 +3656,9 @@ PostmasterStateMachine(void) /* * If we get here, we are proceeding with normal shutdown. All * the regular children are gone, and it's time to tell the - * checkpointer to do a shutdown checkpoint. + * checkpointer to do a shutdown checkpoint. All WAL senders + * are told to switch to a stopping state so that the shutdown + * checkpoint can go ahead. */ Assert(Shutdown > NoShutdown); /* Start the checkpointer if not running */ @@ -3665,6 +3667,7 @@ PostmasterStateMachine(void) /* And tell it to shut down */ if (CheckpointerPID != 0) { + SignalSomeChildren(SIGUSR2, BACKEND_TYPE_WALSND); signal_child(CheckpointerPID, SIGUSR2); pmState = PM_SHUTDOWN; } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 2a6c8bb62d..45d027803a 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -24,11 +24,14 @@ * are treated as not a crash but approximately normal termination; * the walsender will exit quickly without sending any more XLOG records. * - * If the server is shut down, postmaster sends us SIGUSR2 after all - * regular backends have exited and the shutdown checkpoint has been written. - * This instructs walsender to send any outstanding WAL, including the - * shutdown checkpoint record, wait for it to be replicated to the standby, - * and then exit. + * If the server is shut down, postmaster sends us SIGUSR2 after all regular + * backends have exited. This causes the walsender to switch to the "stopping" + * state. In this state, the walsender will reject any replication command + * that may generate WAL activity. The checkpointer begins the shutdown + * checkpoint once all walsenders are confirmed as stopping. When the shutdown + * checkpoint finishes, the postmaster sends us SIGINT. This instructs + * walsender to send any outstanding WAL, including the shutdown checkpoint + * record, wait for it to be replicated to the standby, and then exit. * * * Portions Copyright (c) 2010-2017, PostgreSQL Global Development Group @@ -177,13 +180,14 @@ static bool WalSndCaughtUp = false; /* Flags set by signal handlers for later service in main loop */ static volatile sig_atomic_t got_SIGHUP = false; -static volatile sig_atomic_t walsender_ready_to_stop = false; +static volatile sig_atomic_t got_SIGINT = false; +static volatile sig_atomic_t got_SIGUSR2 = false; /* - * This is set while we are streaming. When not set, SIGUSR2 signal will be + * This is set while we are streaming. When not set, SIGINT signal will be * handled like SIGTERM. When set, the main loop is responsible for checking - * walsender_ready_to_stop and terminating when it's set (after streaming any - * remaining WAL). + * got_SIGINT and terminating when it's set (after streaming any remaining + * WAL). */ static volatile sig_atomic_t replication_active = false; @@ -213,6 +217,7 @@ static struct /* Signal handlers */ static void WalSndSigHupHandler(SIGNAL_ARGS); static void WalSndXLogSendHandler(SIGNAL_ARGS); +static void WalSndSwitchStopping(SIGNAL_ARGS); static void WalSndLastCycleHandler(SIGNAL_ARGS); /* Prototypes for private functions */ @@ -299,11 +304,14 @@ WalSndErrorCleanup(void) ReplicationSlotCleanup(); replication_active = false; - if (walsender_ready_to_stop) + if (got_SIGINT) proc_exit(0); /* Revert back to startup state */ WalSndSetState(WALSNDSTATE_STARTUP); + + if (got_SIGUSR2) + WalSndSetState(WALSNDSTATE_STOPPING); } /* @@ -676,7 +684,7 @@ StartReplication(StartReplicationCmd *cmd) WalSndLoop(XLogSendPhysical); replication_active = false; - if (walsender_ready_to_stop) + if (got_SIGINT) proc_exit(0); WalSndSetState(WALSNDSTATE_STARTUP); @@ -1053,7 +1061,7 @@ StartLogicalReplication(StartReplicationCmd *cmd) { ereport(LOG, (errmsg("terminating walsender process after promotion"))); - walsender_ready_to_stop = true; + got_SIGINT = true; } WalSndSetState(WALSNDSTATE_CATCHUP); @@ -1103,7 +1111,7 @@ StartLogicalReplication(StartReplicationCmd *cmd) ReplicationSlotRelease(); replication_active = false; - if (walsender_ready_to_stop) + if (got_SIGINT) proc_exit(0); WalSndSetState(WALSNDSTATE_STARTUP); @@ -1290,6 +1298,14 @@ WalSndWaitForWal(XLogRecPtr loc) else RecentFlushPtr = GetXLogReplayRecPtr(NULL); + /* + * If postmaster asked us to switch to the stopping state, do so. + * Shutdown is in progress and this will allow the checkpointer to + * move on with the shutdown checkpoint. + */ + if (got_SIGUSR2) + WalSndSetState(WALSNDSTATE_STOPPING); + /* * If postmaster asked us to stop, don't wait here anymore. This will * cause the xlogreader to return without reading a full record, which @@ -1299,7 +1315,7 @@ WalSndWaitForWal(XLogRecPtr loc) * RecentFlushPtr, so we can send all remaining data before shutting * down. */ - if (walsender_ready_to_stop) + if (got_SIGINT) break; /* @@ -1373,6 +1389,22 @@ exec_replication_command(const char *cmd_string) MemoryContext cmd_context; MemoryContext old_context; + /* + * If WAL sender has been told that shutdown is getting close, switch its + * status accordingly to handle the next replication commands correctly. + */ + if (got_SIGUSR2) + WalSndSetState(WALSNDSTATE_STOPPING); + + /* + * Throw error if in stopping mode. We need prevent commands that could + * generate WAL while the shutdown checkpoint is being written. To be + * safe, we just prohibit all new commands. + */ + if (MyWalSnd->state == WALSNDSTATE_STOPPING) + ereport(ERROR, + (errmsg("cannot execute new commands while WAL sender is in stopping mode"))); + /* * CREATE_REPLICATION_SLOT ... LOGICAL exports a snapshot until the next * command arrives. Clean up the old stuff if there's anything. @@ -2095,13 +2127,20 @@ WalSndLoop(WalSndSendDataCallback send_data) } /* - * When SIGUSR2 arrives, we send any outstanding logs up to the + * At the reception of SIGUSR2, switch the WAL sender to the stopping + * state. + */ + if (got_SIGUSR2) + WalSndSetState(WALSNDSTATE_STOPPING); + + /* + * When SIGINT arrives, we send any outstanding logs up to the * shutdown checkpoint record (i.e., the latest record), wait for * them to be replicated to the standby, and exit. This may be a * normal termination at shutdown, or a promotion, the walsender * is not sure which. */ - if (walsender_ready_to_stop) + if (got_SIGINT) WalSndDone(send_data); } @@ -2841,7 +2880,23 @@ WalSndXLogSendHandler(SIGNAL_ARGS) errno = save_errno; } -/* SIGUSR2: set flag to do a last cycle and shut down afterwards */ +/* SIGUSR2: set flag to switch to stopping state */ +static void +WalSndSwitchStopping(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_SIGUSR2 = true; + SetLatch(MyLatch); + + errno = save_errno; +} + +/* + * SIGINT: set flag to do a last cycle and shut down afterwards. The WAL + * sender should already have been switched to WALSNDSTATE_STOPPING at + * this point. + */ static void WalSndLastCycleHandler(SIGNAL_ARGS) { @@ -2856,7 +2911,7 @@ WalSndLastCycleHandler(SIGNAL_ARGS) if (!replication_active) kill(MyProcPid, SIGTERM); - walsender_ready_to_stop = true; + got_SIGINT = true; SetLatch(MyLatch); errno = save_errno; @@ -2869,14 +2924,14 @@ WalSndSignals(void) /* Set up signal handlers */ pqsignal(SIGHUP, WalSndSigHupHandler); /* set flag to read config * file */ - pqsignal(SIGINT, SIG_IGN); /* not used */ + pqsignal(SIGINT, WalSndLastCycleHandler); /* request a last cycle and + * shutdown */ pqsignal(SIGTERM, die); /* request shutdown */ pqsignal(SIGQUIT, quickdie); /* hard crash time */ InitializeTimeouts(); /* establishes SIGALRM handler */ pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, WalSndXLogSendHandler); /* request WAL sending */ - pqsignal(SIGUSR2, WalSndLastCycleHandler); /* request a last cycle and - * shutdown */ + pqsignal(SIGUSR2, WalSndSwitchStopping); /* switch to stopping state */ /* Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); @@ -2954,6 +3009,50 @@ WalSndWakeup(void) } } +/* + * Wait that all the WAL senders have reached the stopping state. This is + * used by the checkpointer to control when shutdown checkpoints can + * safely begin. + */ +void +WalSndWaitStopping(void) +{ + for (;;) + { + int i; + bool all_stopped = true; + + for (i = 0; i < max_wal_senders; i++) + { + WalSndState state; + WalSnd *walsnd = &WalSndCtl->walsnds[i]; + + SpinLockAcquire(&walsnd->mutex); + + if (walsnd->pid == 0) + { + SpinLockRelease(&walsnd->mutex); + continue; + } + + state = walsnd->state; + SpinLockRelease(&walsnd->mutex); + + if (state != WALSNDSTATE_STOPPING) + { + all_stopped = false; + break; + } + } + + /* safe to leave if confirmation is done for all WAL senders */ + if (all_stopped) + return; + + pg_usleep(10000L); /* wait for 10 msec */ + } +} + /* Set state for current walsender (only called in walsender) */ void WalSndSetState(WalSndState state) @@ -2987,6 +3086,8 @@ WalSndGetStateString(WalSndState state) return "catchup"; case WALSNDSTATE_STREAMING: return "streaming"; + case WALSNDSTATE_STOPPING: + return "stopping"; } return "UNKNOWN"; } diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h index 2ca903872e..99f12377e0 100644 --- a/src/include/replication/walsender.h +++ b/src/include/replication/walsender.h @@ -44,6 +44,7 @@ extern void WalSndSignals(void); extern Size WalSndShmemSize(void); extern void WalSndShmemInit(void); extern void WalSndWakeup(void); +extern void WalSndWaitStopping(void); extern void WalSndRqstFileReload(void); /* diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h index 2c59056cef..36311e124c 100644 --- a/src/include/replication/walsender_private.h +++ b/src/include/replication/walsender_private.h @@ -24,7 +24,8 @@ typedef enum WalSndState WALSNDSTATE_STARTUP = 0, WALSNDSTATE_BACKUP, WALSNDSTATE_CATCHUP, - WALSNDSTATE_STREAMING + WALSNDSTATE_STREAMING, + WALSNDSTATE_STOPPING } WalSndState; /*