Use WaitEventSet API for postmaster's event loop.

Switch to a design similar to regular backends, instead of the previous
arrangement where signal handlers did non-trivial state management and
called fork().  The main changes are:

* The postmaster now has its own local latch to wait on.  (For now, we
  don't want other backends setting its latch directly, but that could
  probably be made to work with more research on robustness.)

* The existing signal handlers are cut in two: a handle_pm_XXX() part
  that just sets pending_pm_XXX flags and the latch, and a
  process_pm_XXX() part that runs later when the latch is seen.

* Signal handlers are now installed with the regular pqsignal()
  function rather than the special pqsignal_pm() function; historical
  portability concerns about the effect of SA_RESTART on select() are no
  longer relevant, and we don't need to block signals anymore.

Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CA%2BhUKG%2BZ-HpOj1JsO9eWUP%2Bar7npSVinsC_npxSy%2BjdOMsx%3DGg%40mail.gmail.com
This commit is contained in:
Thomas Munro 2023-01-12 12:34:23 +13:00
parent d93d68aeea
commit 7389aad636
8 changed files with 271 additions and 230 deletions

View File

@ -683,8 +683,7 @@ Setup_AF_UNIX(const char *sock_path)
* server port. Set port->sock to the FD of the new connection.
*
* ASSUME: that this doesn't need to be non-blocking because
* the Postmaster uses select() to tell when the socket is ready for
* accept().
* the Postmaster waits for the socket to be ready to accept().
*
* RETURNS: STATUS_OK or STATUS_ERROR
*/

View File

@ -97,43 +97,3 @@ pqinitmask(void)
sigdelset(&StartupBlockSig, SIGALRM);
#endif
}
/*
* Set up a postmaster signal handler for signal "signo"
*
* Returns the previous handler.
*
* This is used only in the postmaster, which has its own odd approach to
* signal handling. For signals with handlers, we block all signals for the
* duration of signal handler execution. We also do not set the SA_RESTART
* flag; this should be safe given the tiny range of code in which the
* postmaster ever unblocks signals.
*
* pqinitmask() must have been invoked previously.
*/
pqsigfunc
pqsignal_pm(int signo, pqsigfunc func)
{
struct sigaction act,
oact;
act.sa_handler = func;
if (func == SIG_IGN || func == SIG_DFL)
{
/* in these cases, act the same as pqsignal() */
sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART;
}
else
{
act.sa_mask = BlockSig;
act.sa_flags = 0;
}
#ifdef SA_NOCLDSTOP
if (signo == SIGCHLD)
act.sa_flags |= SA_NOCLDSTOP;
#endif
if (sigaction(signo, &act, &oact) < 0)
return SIG_ERR;
return oact.sa_handler;
}

View File

@ -12,24 +12,28 @@
#include "postgres.h"
#include <fcntl.h>
#include <signal.h>
#include <time.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>
#include "libpq/pqsignal.h"
#include "postmaster/fork_process.h"
#ifndef WIN32
/*
* Wrapper for fork(). Return values are the same as those for fork():
* -1 if the fork failed, 0 in the child process, and the PID of the
* child in the parent process.
* child in the parent process. Signals are blocked while forking, so
* the child must unblock.
*/
pid_t
fork_process(void)
{
pid_t result;
const char *oomfilename;
sigset_t save_mask;
#ifdef LINUX_PROFILE
struct itimerval prof_itimer;
@ -51,6 +55,13 @@ fork_process(void)
getitimer(ITIMER_PROF, &prof_itimer);
#endif
/*
* We start postmaster children with signals blocked. This allows them to
* install their own handlers before unblocking, to avoid races where they
* might run the postmaster's handler and miss an important control signal.
* With more analysis this could potentially be relaxed.
*/
sigprocmask(SIG_SETMASK, &BlockSig, &save_mask);
result = fork();
if (result == 0)
{
@ -103,6 +114,11 @@ fork_process(void)
/* do post-fork initialization for random number generation */
pg_strong_random_init();
}
else
{
/* in parent, restore signal mask */
sigprocmask(SIG_SETMASK, &save_mask, NULL);
}
return result;
}

View File

@ -70,7 +70,6 @@
#include <time.h>
#include <sys/wait.h>
#include <ctype.h>
#include <sys/select.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <fcntl.h>
@ -362,6 +361,17 @@ static volatile sig_atomic_t WalReceiverRequested = false;
static volatile bool StartWorkerNeeded = true;
static volatile bool HaveCrashedWorker = false;
/* set when signals arrive */
static volatile sig_atomic_t pending_pm_pmsignal;
static volatile sig_atomic_t pending_pm_child_exit;
static volatile sig_atomic_t pending_pm_reload_request;
static volatile sig_atomic_t pending_pm_shutdown_request;
static volatile sig_atomic_t pending_pm_fast_shutdown_request;
static volatile sig_atomic_t pending_pm_immediate_shutdown_request;
/* event multiplexing object */
static WaitEventSet *pm_wait_set;
#ifdef USE_SSL
/* Set when and if SSL has been initialized properly */
static bool LoadedSSL = false;
@ -380,10 +390,14 @@ static void getInstallationPaths(const char *argv0);
static void checkControlFile(void);
static Port *ConnCreate(int serverFd);
static void ConnFree(Port *port);
static void SIGHUP_handler(SIGNAL_ARGS);
static void pmdie(SIGNAL_ARGS);
static void reaper(SIGNAL_ARGS);
static void sigusr1_handler(SIGNAL_ARGS);
static void handle_pm_pmsignal_signal(SIGNAL_ARGS);
static void handle_pm_child_exit_signal(SIGNAL_ARGS);
static void handle_pm_reload_request_signal(SIGNAL_ARGS);
static void handle_pm_shutdown_request_signal(SIGNAL_ARGS);
static void process_pm_pmsignal(void);
static void process_pm_child_exit(void);
static void process_pm_reload_request(void);
static void process_pm_shutdown_request(void);
static void process_startup_packet_die(SIGNAL_ARGS);
static void dummy_handler(SIGNAL_ARGS);
static void StartupPacketTimeoutHandler(void);
@ -401,7 +415,6 @@ static int BackendStartup(Port *port);
static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
static void processCancelRequest(Port *port, void *pkt);
static int initMasks(fd_set *rmask);
static void report_fork_failure_to_client(Port *port, int errnum);
static CAC_state canAcceptConnections(int backend_type);
static bool RandomCancelKey(int32 *cancel_key);
@ -609,26 +622,6 @@ PostmasterMain(int argc, char *argv[])
/*
* Set up signal handlers for the postmaster process.
*
* In the postmaster, we use pqsignal_pm() rather than pqsignal() (which
* is used by all child processes and client processes). That has a
* couple of special behaviors:
*
* 1. We tell sigaction() to block all signals for the duration of the
* signal handler. This is faster than our old approach of
* blocking/unblocking explicitly in the signal handler, and it should also
* prevent excessive stack consumption if signals arrive quickly.
*
* 2. We do not set the SA_RESTART flag. This is because signals will be
* blocked at all times except when ServerLoop is waiting for something to
* happen, and during that window, we want signals to exit the select(2)
* wait so that ServerLoop can respond if anything interesting happened.
* On some platforms, signals marked SA_RESTART would not cause the
* select() wait to end.
*
* Child processes will generally want SA_RESTART, so pqsignal() sets that
* flag. We expect children to set up their own handlers before
* unblocking signals.
*
* CAUTION: when changing this list, check for side-effects on the signal
* handling setup of child processes. See tcop/postgres.c,
* bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
@ -638,26 +631,19 @@ PostmasterMain(int argc, char *argv[])
pqinitmask();
PG_SETMASK(&BlockSig);
pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have
* children do same */
pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */
pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */
pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */
pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */
pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */
pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */
pqsignal_pm(SIGCHLD, reaper); /* handle child termination */
pqsignal(SIGHUP, handle_pm_reload_request_signal);
pqsignal(SIGINT, handle_pm_shutdown_request_signal);
pqsignal(SIGQUIT, handle_pm_shutdown_request_signal);
pqsignal(SIGTERM, handle_pm_shutdown_request_signal);
pqsignal(SIGALRM, SIG_IGN); /* ignored */
pqsignal(SIGPIPE, SIG_IGN); /* ignored */
pqsignal(SIGUSR1, handle_pm_pmsignal_signal);
pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
pqsignal(SIGCHLD, handle_pm_child_exit_signal);
#ifdef SIGURG
/*
* Ignore SIGURG for now. Child processes may change this (see
* InitializeLatchSupport), but they will not receive any such signals
* until they wait on a latch.
*/
pqsignal_pm(SIGURG, SIG_IGN); /* ignored */
#endif
/* This may configure SIGURG, depending on platform. */
InitializeLatchSupport();
InitProcessLocalLatch();
/*
* No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
@ -667,17 +653,20 @@ PostmasterMain(int argc, char *argv[])
* child processes should just allow the inherited settings to stand.
*/
#ifdef SIGTTIN
pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */
pqsignal(SIGTTIN, SIG_IGN); /* ignored */
#endif
#ifdef SIGTTOU
pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */
pqsignal(SIGTTOU, SIG_IGN); /* ignored */
#endif
/* ignore SIGXFSZ, so that ulimit violations work like disk full */
#ifdef SIGXFSZ
pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */
pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
#endif
/* Begin accepting signals. */
PG_SETMASK(&UnBlockSig);
/*
* Options setup
*/
@ -1698,105 +1687,107 @@ DetermineSleepTime(struct timeval *timeout)
}
}
/*
* Activate or deactivate notifications of server socket events. Since we
* don't currently have a way to remove events from an existing WaitEventSet,
* we'll just destroy and recreate the whole thing. This is called during
* shutdown so we can wait for backends to exit without accepting new
* connections, and during crash reinitialization when we need to start
* listening for new connections again. The WaitEventSet will be freed in fork
* children by ClosePostmasterPorts().
*/
static void
ConfigurePostmasterWaitSet(bool accept_connections)
{
int nsockets;
if (pm_wait_set)
FreeWaitEventSet(pm_wait_set);
pm_wait_set = NULL;
/* How many server sockets do we need to wait for? */
nsockets = 0;
if (accept_connections)
{
while (nsockets < MAXLISTEN &&
ListenSocket[nsockets] != PGINVALID_SOCKET)
++nsockets;
}
pm_wait_set = CreateWaitEventSet(CurrentMemoryContext, 1 + nsockets);
AddWaitEventToSet(pm_wait_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch,
NULL);
if (accept_connections)
{
for (int i = 0; i < nsockets; i++)
AddWaitEventToSet(pm_wait_set, WL_SOCKET_ACCEPT, ListenSocket[i],
NULL, NULL);
}
}
/*
* Main idle loop of postmaster
*
* NB: Needs to be called with signals blocked
*/
static int
ServerLoop(void)
{
fd_set readmask;
int nSockets;
time_t last_lockfile_recheck_time,
last_touch_time;
WaitEvent events[MAXLISTEN];
int nevents;
ConfigurePostmasterWaitSet(true);
last_lockfile_recheck_time = last_touch_time = time(NULL);
nSockets = initMasks(&readmask);
for (;;)
{
fd_set rmask;
int selres;
time_t now;
struct timeval timeout;
DetermineSleepTime(&timeout);
nevents = WaitEventSetWait(pm_wait_set,
timeout.tv_sec * 1000 + timeout.tv_usec / 1000,
events,
lengthof(events),
0 /* postmaster posts no wait_events */ );
/*
* Wait for a connection request to arrive.
*
* We block all signals except while sleeping. That makes it safe for
* signal handlers, which again block all signals while executing, to
* do nontrivial work.
*
* If we are in PM_WAIT_DEAD_END state, then we don't want to accept
* any new connections, so we don't call select(), and just sleep.
* Latch set by signal handler, or new connection pending on any of
* our sockets? If the latter, fork a child process to deal with it.
*/
memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
if (pmState == PM_WAIT_DEAD_END)
for (int i = 0; i < nevents; i++)
{
PG_SETMASK(&UnBlockSig);
pg_usleep(100000L); /* 100 msec seems reasonable */
selres = 0;
PG_SETMASK(&BlockSig);
}
else
{
/* must set timeout each time; some OSes change it! */
struct timeval timeout;
/* Needs to run with blocked signals! */
DetermineSleepTime(&timeout);
PG_SETMASK(&UnBlockSig);
selres = select(nSockets, &rmask, NULL, NULL, &timeout);
PG_SETMASK(&BlockSig);
}
/* Now check the select() result */
if (selres < 0)
{
if (errno != EINTR && errno != EWOULDBLOCK)
if (events[i].events & WL_LATCH_SET)
{
ereport(LOG,
(errcode_for_socket_access(),
errmsg("select() failed in postmaster: %m")));
return STATUS_ERROR;
ResetLatch(MyLatch);
/* Process work requested via signal handlers. */
if (pending_pm_shutdown_request)
process_pm_shutdown_request();
if (pending_pm_child_exit)
process_pm_child_exit();
if (pending_pm_reload_request)
process_pm_reload_request();
if (pending_pm_pmsignal)
process_pm_pmsignal();
}
}
/*
* New connection pending on any of our sockets? If so, fork a child
* process to deal with it.
*/
if (selres > 0)
{
int i;
for (i = 0; i < MAXLISTEN; i++)
else if (events[i].events & WL_SOCKET_ACCEPT)
{
if (ListenSocket[i] == PGINVALID_SOCKET)
break;
if (FD_ISSET(ListenSocket[i], &rmask))
Port *port;
port = ConnCreate(events[i].fd);
if (port)
{
Port *port;
BackendStartup(port);
port = ConnCreate(ListenSocket[i]);
if (port)
{
BackendStartup(port);
/*
* We no longer need the open socket or port structure
* in this process
*/
StreamClose(port->sock);
ConnFree(port);
}
/*
* We no longer need the open socket or port structure in
* this process
*/
StreamClose(port->sock);
ConnFree(port);
}
}
}
@ -1939,34 +1930,6 @@ ServerLoop(void)
}
}
/*
* Initialise the masks for select() for the ports we are listening on.
* Return the number of sockets to listen on.
*/
static int
initMasks(fd_set *rmask)
{
int maxsock = -1;
int i;
FD_ZERO(rmask);
for (i = 0; i < MAXLISTEN; i++)
{
int fd = ListenSocket[i];
if (fd == PGINVALID_SOCKET)
break;
FD_SET(fd, rmask);
if (fd > maxsock)
maxsock = fd;
}
return maxsock + 1;
}
/*
* Read a client's startup packet and do something according to it.
*
@ -2609,6 +2572,13 @@ ClosePostmasterPorts(bool am_syslogger)
{
int i;
/* Release resources held by the postmaster's WaitEventSet. */
if (pm_wait_set)
{
FreeWaitEventSetAfterFork(pm_wait_set);
pm_wait_set = NULL;
}
#ifndef WIN32
/*
@ -2707,15 +2677,46 @@ InitProcessGlobals(void)
#endif
}
/*
* SIGHUP -- reread config files, and tell children to do same
* Child processes use SIGUSR1 to notify us of 'pmsignals'. pg_ctl uses
* SIGUSR1 to ask postmaster to check for logrotate and promote files.
*/
static void
SIGHUP_handler(SIGNAL_ARGS)
handle_pm_pmsignal_signal(SIGNAL_ARGS)
{
int save_errno = errno;
pending_pm_pmsignal = true;
SetLatch(MyLatch);
errno = save_errno;
}
/*
* pg_ctl uses SIGHUP to request a reload of the configuration files.
*/
static void
handle_pm_reload_request_signal(SIGNAL_ARGS)
{
int save_errno = errno;
pending_pm_reload_request = true;
SetLatch(MyLatch);
errno = save_errno;
}
/*
* Re-read config files, and tell children to do same.
*/
static void
process_pm_reload_request(void)
{
pending_pm_reload_request = false;
ereport(DEBUG2,
(errmsg_internal("postmaster received reload request signal")));
if (Shutdown <= SmartShutdown)
{
ereport(LOG,
@ -2771,26 +2772,72 @@ SIGHUP_handler(SIGNAL_ARGS)
write_nondefault_variables(PGC_SIGHUP);
#endif
}
errno = save_errno;
}
/*
* pmdie -- signal handler for processing various postmaster signals.
* pg_ctl uses SIGTERM, SIGINT and SIGQUIT to request different types of
* shutdown.
*/
static void
pmdie(SIGNAL_ARGS)
handle_pm_shutdown_request_signal(SIGNAL_ARGS)
{
int save_errno = errno;
ereport(DEBUG2,
(errmsg_internal("postmaster received signal %d",
postgres_signal_arg)));
switch (postgres_signal_arg)
{
case SIGTERM:
/* smart is implied if the other two flags aren't set */
pending_pm_shutdown_request = true;
break;
case SIGINT:
pending_pm_fast_shutdown_request = true;
pending_pm_shutdown_request = true;
break;
case SIGQUIT:
pending_pm_immediate_shutdown_request = true;
pending_pm_shutdown_request = true;
break;
}
SetLatch(MyLatch);
errno = save_errno;
}
/*
* Process shutdown request.
*/
static void
process_pm_shutdown_request(void)
{
int mode;
ereport(DEBUG2,
(errmsg_internal("postmaster received shutdown request signal")));
pending_pm_shutdown_request = false;
/*
* If more than one shutdown request signal arrived since the last server
* loop, take the one that is the most immediate. That matches the
* priority that would apply if we processed them one by one in any order.
*/
if (pending_pm_immediate_shutdown_request)
{
pending_pm_immediate_shutdown_request = false;
pending_pm_fast_shutdown_request = false;
mode = ImmediateShutdown;
}
else if (pending_pm_fast_shutdown_request)
{
pending_pm_fast_shutdown_request = false;
mode = FastShutdown;
}
else
mode = SmartShutdown;
switch (mode)
{
case SmartShutdown:
/*
* Smart Shutdown:
@ -2830,7 +2877,7 @@ pmdie(SIGNAL_ARGS)
PostmasterStateMachine();
break;
case SIGINT:
case FastShutdown:
/*
* Fast Shutdown:
@ -2871,7 +2918,7 @@ pmdie(SIGNAL_ARGS)
PostmasterStateMachine();
break;
case SIGQUIT:
case ImmediateShutdown:
/*
* Immediate Shutdown:
@ -2908,20 +2955,30 @@ pmdie(SIGNAL_ARGS)
PostmasterStateMachine();
break;
}
}
static void
handle_pm_child_exit_signal(SIGNAL_ARGS)
{
int save_errno = errno;
pending_pm_child_exit = true;
SetLatch(MyLatch);
errno = save_errno;
}
/*
* Reaper -- signal handler to cleanup after a child process dies.
* Cleanup after a child process dies.
*/
static void
reaper(SIGNAL_ARGS)
process_pm_child_exit(void)
{
int save_errno = errno;
int pid; /* process id of dead child process */
int exitstatus; /* its exit status */
pending_pm_child_exit = false;
ereport(DEBUG4,
(errmsg_internal("reaping dead processes")));
@ -3213,8 +3270,6 @@ reaper(SIGNAL_ARGS)
* or actions to make.
*/
PostmasterStateMachine();
errno = save_errno;
}
/*
@ -3642,8 +3697,9 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
/*
* Advance the postmaster's state machine and take actions as appropriate
*
* This is common code for pmdie(), reaper() and sigusr1_handler(), which
* receive the signals that might mean we need to change state.
* This is common code for process_pm_shutdown_request(),
* process_pm_child_exit() and process_pm_pmsignal(), which process the signals
* that might mean we need to change state.
*/
static void
PostmasterStateMachine(void)
@ -3796,6 +3852,9 @@ PostmasterStateMachine(void)
if (pmState == PM_WAIT_DEAD_END)
{
/* Don't allow any new socket connection events. */
ConfigurePostmasterWaitSet(false);
/*
* PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
* (ie, no dead_end children remain), and the archiver is gone too.
@ -3905,6 +3964,9 @@ PostmasterStateMachine(void)
pmState = PM_STARTUP;
/* crash recovery started, reset SIGKILL flag */
AbortStartTime = 0;
/* start accepting server socket connection events again */
ConfigurePostmasterWaitSet(true);
}
}
@ -5013,12 +5075,16 @@ ExitPostmaster(int status)
}
/*
* sigusr1_handler - handle signal conditions from child processes
* Handle pmsignal conditions representing requests from backends,
* and check for promote and logrotate requests from pg_ctl.
*/
static void
sigusr1_handler(SIGNAL_ARGS)
process_pm_pmsignal(void)
{
int save_errno = errno;
pending_pm_pmsignal = false;
ereport(DEBUG2,
(errmsg_internal("postmaster received pmsignal signal")));
/*
* RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
@ -5134,7 +5200,7 @@ sigusr1_handler(SIGNAL_ARGS)
/*
* Try to advance postmaster's state machine, if a child requests it.
*
* Be careful about the order of this action relative to sigusr1_handler's
* Be careful about the order of this action relative to this function's
* other actions. Generally, this should be after other actions, in case
* they have effects PostmasterStateMachine would need to know about.
* However, we should do it before the CheckPromoteSignal step, which
@ -5159,8 +5225,6 @@ sigusr1_handler(SIGNAL_ARGS)
*/
signal_child(StartupPID, SIGUSR2);
}
errno = save_errno;
}
/*

View File

@ -24,7 +24,6 @@
#include <signal.h>
#include <unistd.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/time.h>

View File

@ -135,8 +135,7 @@ InitPostmasterChild(void)
/* Initialize process-local latch support */
InitializeLatchSupport();
MyLatch = &LocalLatchData;
InitLatch(MyLatch);
InitProcessLocalLatch();
InitializeLatchWaitSet();
/*
@ -189,8 +188,7 @@ InitStandaloneProcess(const char *argv0)
/* Initialize process-local latch support */
InitializeLatchSupport();
MyLatch = &LocalLatchData;
InitLatch(MyLatch);
InitProcessLocalLatch();
InitializeLatchWaitSet();
/*
@ -232,6 +230,13 @@ SwitchToSharedLatch(void)
SetLatch(MyLatch);
}
void
InitProcessLocalLatch(void)
{
MyLatch = &LocalLatchData;
InitLatch(MyLatch);
}
void
SwitchBackToLocalLatch(void)
{

View File

@ -53,7 +53,4 @@ extern PGDLLIMPORT sigset_t StartupBlockSig;
extern void pqinitmask(void);
/* pqsigfunc is declared in src/include/port.h */
extern pqsigfunc pqsignal_pm(int signo, pqsigfunc func);
#endif /* PQSIGNAL_H */

View File

@ -310,6 +310,7 @@ extern PGDLLIMPORT char *DatabasePath;
/* now in utils/init/miscinit.c */
extern void InitPostmasterChild(void);
extern void InitStandaloneProcess(const char *argv0);
extern void InitProcessLocalLatch(void);
extern void SwitchToSharedLatch(void);
extern void SwitchBackToLocalLatch(void);