In the postmaster, rely on the signal infrastructure to block signals.

POSIX sigaction(2) can be told to block a set of signals while a
signal handler executes.  Make use of that instead of manually
blocking and unblocking signals in the postmaster's signal handlers.
This should save a few cycles, but more importantly it prevents
recursive invocation of signal handlers when many signals arrive in
close succession.  (Assuming that the platform's signal infrastructure
is designed to avoid consuming stack space in that case, but this is
demonstrably true at least on Linux.)  The existing code has been seen
to recurse to the point of stack overflow, either in the postmaster
or in a forked-off child.

Back-patch of commit 9abb2bfc0.  At the time, we'd only seen excess
postmaster stack consumption in the buildfarm; but we now have a
user report of it, and that commit has aged enough to have a fair
amount of confidence that it doesn't break anything.

This still doesn't change anything about the way that it works on
Windows.  Perhaps someone else would like to fix that?

Per bug #16673 from David Geier.  Back-patch to 9.6.  Although
the problem exists in principle before that, we've only seen it
actually materialize in connection with heavy use of parallel
workers, so it doesn't seem necessary to do anything in 9.5;
and the relevant code is different there, too.

Discussion: https://postgr.es/m/16673-d278c604f8e34ec0@postgresql.org
Discussion: https://postgr.es/m/14878.1570820201@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2020-10-15 12:50:57 -04:00
parent 72b1574090
commit 8b53dbada4
5 changed files with 116 additions and 57 deletions

View File

@ -95,3 +95,52 @@ pqinitmask(void)
sigdelset(&StartupBlockSig, SIGALRM);
#endif
}
/*
* Set up a postmaster signal handler for signal "signo"
*
* Returns the previous handler.
*
* This is used only in the postmaster, which has its own odd approach to
* signal handling. For signals with handlers, we block all signals for the
* duration of signal handler execution. We also do not set the SA_RESTART
* flag; this should be safe given the tiny range of code in which the
* postmaster ever unblocks signals.
*
* pqinitmask() must have been invoked previously.
*
* On Windows, this function is just an alias for pqsignal()
* (and note that it's calling the code in src/backend/port/win32/signal.c,
* not src/port/pqsignal.c). On that platform, the postmaster's signal
* handlers still have to block signals for themselves.
*/
pqsigfunc
pqsignal_pm(int signo, pqsigfunc func)
{
#ifndef WIN32
struct sigaction act,
oact;
act.sa_handler = func;
if (func == SIG_IGN || func == SIG_DFL)
{
/* in these cases, act the same as pqsignal() */
sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART;
}
else
{
act.sa_mask = BlockSig;
act.sa_flags = 0;
}
#ifdef SA_NOCLDSTOP
if (signo == SIGCHLD)
act.sa_flags |= SA_NOCLDSTOP;
#endif
if (sigaction(signo, &act, &oact) < 0)
return SIG_ERR;
return oact.sa_handler;
#else /* WIN32 */
return pqsignal(signo, func);
#endif
}

View File

@ -618,14 +618,25 @@ PostmasterMain(int argc, char *argv[])
/*
* Set up signal handlers for the postmaster process.
*
* In the postmaster, we want to install non-ignored handlers *without*
* SA_RESTART. This is because they'll be blocked at all times except
* when ServerLoop is waiting for something to happen, and during that
* window, we want signals to exit the select(2) wait so that ServerLoop
* can respond if anything interesting happened. On some platforms,
* signals marked SA_RESTART would not cause the select() wait to end.
* Child processes will generally want SA_RESTART, but we expect them to
* set up their own handlers before unblocking signals.
* In the postmaster, we use pqsignal_pm() rather than pqsignal() (which
* is used by all child processes and client processes). That has a
* couple of special behaviors:
*
* 1. Except on Windows, we tell sigaction() to block all signals for the
* duration of the signal handler. This is faster than our old approach
* of blocking/unblocking explicitly in the signal handler, and it should
* also prevent excessive stack consumption if signals arrive quickly.
*
* 2. We do not set the SA_RESTART flag. This is because signals will be
* blocked at all times except when ServerLoop is waiting for something to
* happen, and during that window, we want signals to exit the select(2)
* wait so that ServerLoop can respond if anything interesting happened.
* On some platforms, signals marked SA_RESTART would not cause the
* select() wait to end.
*
* Child processes will generally want SA_RESTART, so pqsignal() sets that
* flag. We expect children to set up their own handlers before
* unblocking signals.
*
* CAUTION: when changing this list, check for side-effects on the signal
* handling setup of child processes. See tcop/postgres.c,
@ -637,18 +648,16 @@ PostmasterMain(int argc, char *argv[])
pqinitmask();
PG_SETMASK(&BlockSig);
pqsignal_no_restart(SIGHUP, SIGHUP_handler); /* reread config file and
* have children do same */
pqsignal_no_restart(SIGINT, pmdie); /* send SIGTERM and shut down */
pqsignal_no_restart(SIGQUIT, pmdie); /* send SIGQUIT and die */
pqsignal_no_restart(SIGTERM, pmdie); /* wait for children and shut down */
pqsignal(SIGALRM, SIG_IGN); /* ignored */
pqsignal(SIGPIPE, SIG_IGN); /* ignored */
pqsignal_no_restart(SIGUSR1, sigusr1_handler); /* message from child
* process */
pqsignal_no_restart(SIGUSR2, dummy_handler); /* unused, reserve for
* children */
pqsignal_no_restart(SIGCHLD, reaper); /* handle child termination */
pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have
* children do same */
pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */
pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */
pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */
pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */
pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */
pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */
pqsignal_pm(SIGCHLD, reaper); /* handle child termination */
/*
* No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
@ -658,15 +667,15 @@ PostmasterMain(int argc, char *argv[])
* child processes should just allow the inherited settings to stand.
*/
#ifdef SIGTTIN
pqsignal(SIGTTIN, SIG_IGN); /* ignored */
pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */
#endif
#ifdef SIGTTOU
pqsignal(SIGTTOU, SIG_IGN); /* ignored */
pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */
#endif
/* ignore SIGXFSZ, so that ulimit violations work like disk full */
#ifdef SIGXFSZ
pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */
#endif
/*
@ -2655,7 +2664,13 @@ SIGHUP_handler(SIGNAL_ARGS)
{
int save_errno = errno;
/*
* We rely on the signal mechanism to have blocked all signals ... except
* on Windows, which lacks sigaction(), so we have to do it manually.
*/
#ifdef WIN32
PG_SETMASK(&BlockSig);
#endif
if (Shutdown <= SmartShutdown)
{
@ -2715,7 +2730,9 @@ SIGHUP_handler(SIGNAL_ARGS)
#endif
}
#ifdef WIN32
PG_SETMASK(&UnBlockSig);
#endif
errno = save_errno;
}
@ -2729,7 +2746,13 @@ pmdie(SIGNAL_ARGS)
{
int save_errno = errno;
/*
* We rely on the signal mechanism to have blocked all signals ... except
* on Windows, which lacks sigaction(), so we have to do it manually.
*/
#ifdef WIN32
PG_SETMASK(&BlockSig);
#endif
ereport(DEBUG2,
(errmsg_internal("postmaster received signal %d",
@ -2858,7 +2881,9 @@ pmdie(SIGNAL_ARGS)
break;
}
#ifdef WIN32
PG_SETMASK(&UnBlockSig);
#endif
errno = save_errno;
}
@ -2873,7 +2898,13 @@ reaper(SIGNAL_ARGS)
int pid; /* process id of dead child process */
int exitstatus; /* its exit status */
/*
* We rely on the signal mechanism to have blocked all signals ... except
* on Windows, which lacks sigaction(), so we have to do it manually.
*/
#ifdef WIN32
PG_SETMASK(&BlockSig);
#endif
ereport(DEBUG4,
(errmsg_internal("reaping dead processes")));
@ -3176,7 +3207,9 @@ reaper(SIGNAL_ARGS)
PostmasterStateMachine();
/* Done with signal handler */
#ifdef WIN32
PG_SETMASK(&UnBlockSig);
#endif
errno = save_errno;
}
@ -5110,7 +5143,13 @@ sigusr1_handler(SIGNAL_ARGS)
{
int save_errno = errno;
/*
* We rely on the signal mechanism to have blocked all signals ... except
* on Windows, which lacks sigaction(), so we have to do it manually.
*/
#ifdef WIN32
PG_SETMASK(&BlockSig);
#endif
/* Process background worker state change. */
if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE))
@ -5270,7 +5309,9 @@ sigusr1_handler(SIGNAL_ARGS)
signal_child(StartupPID, SIGUSR2);
}
#ifdef WIN32
PG_SETMASK(&UnBlockSig);
#endif
errno = save_errno;
}

View File

@ -36,4 +36,7 @@ extern sigset_t UnBlockSig,
extern void pqinitmask(void);
/* pqsigfunc is declared in src/include/port.h */
extern pqsigfunc pqsignal_pm(int signo, pqsigfunc func);
#endif /* PQSIGNAL_H */

View File

@ -524,11 +524,6 @@ extern int pg_mkdir_p(char *path, int omode);
/* port/pqsignal.c */
typedef void (*pqsigfunc) (int signo);
extern pqsigfunc pqsignal(int signo, pqsigfunc func);
#ifndef WIN32
extern pqsigfunc pqsignal_no_restart(int signo, pqsigfunc func);
#else
#define pqsignal_no_restart(signo, func) pqsignal(signo, func)
#endif
/* port/quotes.c */
extern char *escape_single_quotes_ascii(const char *src);

View File

@ -58,33 +58,4 @@ pqsignal(int signo, pqsigfunc func)
#endif
}
/*
* Set up a signal handler, without SA_RESTART, for signal "signo"
*
* Returns the previous handler.
*
* On Windows, this would be identical to pqsignal(), so don't bother.
*/
#ifndef WIN32
pqsigfunc
pqsignal_no_restart(int signo, pqsigfunc func)
{
struct sigaction act,
oact;
act.sa_handler = func;
sigemptyset(&act.sa_mask);
act.sa_flags = 0;
#ifdef SA_NOCLDSTOP
if (signo == SIGCHLD)
act.sa_flags |= SA_NOCLDSTOP;
#endif
if (sigaction(signo, &act, &oact) < 0)
return SIG_ERR;
return oact.sa_handler;
}
#endif /* !WIN32 */
#endif /* !defined(WIN32) || defined(FRONTEND) */