From 7634bd4f6d38bdef1fe442df5c2e0da73f1f90f4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 11 Sep 2020 16:01:28 -0400 Subject: [PATCH] Accept SIGQUIT during error recovery in auxiliary processes. The bgwriter, checkpointer, walwriter, and walreceiver processes claimed to allow SIGQUIT "at all times". In reality SIGQUIT would get re-blocked during error recovery, because we didn't update the actual signal mask immediately, so sigsetjmp() would save and reinstate a mask that includes SIGQUIT. This appears to be simply a coding oversight. There's never a good reason to hold off SIGQUIT in these processes, because it's going to just call _exit(2) which should be safe enough, especially since the postmaster is going to tear down shared memory afterwards. Hence, stick in PG_SETMASK() calls to install the modified BlockSig mask immediately. Also try to improve the comments around sigsetjmp blocks. Most of them were just referencing postgres.c, which is misleading because actually postgres.c manages the signals differently. No back-patch, since there's no evidence that this is causing any problems in the field. Discussion: https://postgr.es/m/CALDaNm1d1hHPZUg3xU4XjtWBOLCrA+-2cJcLpw-cePZ=GgDVfA@mail.gmail.com --- src/backend/postmaster/autovacuum.c | 16 +++++++++++++++- src/backend/postmaster/bgwriter.c | 18 ++++++++++++++++-- src/backend/postmaster/checkpointer.c | 18 ++++++++++++++++-- src/backend/postmaster/walwriter.c | 18 ++++++++++++++++-- src/backend/replication/walreceiver.c | 3 ++- 5 files changed, 65 insertions(+), 8 deletions(-) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 1b8cd7bacd..19ba26b914 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -495,6 +495,12 @@ AutoVacLauncherMain(int argc, char *argv[]) * If an exception is encountered, processing resumes here. * * This code is a stripped down version of PostgresMain error recovery. + * + * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask + * (to wit, BlockSig) will be restored when longjmp'ing to here. Thus, + * signals will be blocked until we complete error recovery. It might + * seem that this policy makes the HOLD_INTERRUPTS() call redundant, but + * it is not since InterruptPending might be set already. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { @@ -1550,7 +1556,15 @@ AutoVacWorkerMain(int argc, char *argv[]) /* * If an exception is encountered, processing resumes here. * - * See notes in postgres.c about the design of this coding. + * Unlike most auxiliary processes, we don't attempt to continue + * processing after an error; we just clean up and exit. The autovac + * launcher is responsible for spawning another worker later. + * + * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask + * (to wit, BlockSig) will be restored when longjmp'ing to here. Thus, + * signals will be blocked until we exit. It might seem that this policy + * makes the HOLD_INTERRUPTS() call redundant, but it is not since + * InterruptPending might be set already. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 069e27e427..c96568149f 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -115,8 +115,9 @@ BackgroundWriterMain(void) */ pqsignal(SIGCHLD, SIG_DFL); - /* We allow SIGQUIT (quickdie) at all times */ + /* We allow SIGQUIT (SignalHandlerForCrashExit) at all times */ sigdelset(&BlockSig, SIGQUIT); + PG_SETMASK(&BlockSig); /* * We just started, assume there has been either a shutdown or @@ -140,7 +141,20 @@ BackgroundWriterMain(void) /* * If an exception is encountered, processing resumes here. * - * See notes in postgres.c about the design of this coding. + * You might wonder why this isn't coded as an infinite loop around a + * PG_TRY construct. The reason is that this is the bottom of the + * exception stack, and so with PG_TRY there would be no exception handler + * in force at all during the CATCH part. By leaving the outermost setjmp + * always active, we have at least some chance of recovering from an error + * during error recovery. (If we get into an infinite loop thereby, it + * will soon be stopped by overflow of elog.c's internal state stack.) + * + * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask + * (to wit, BlockSig) will be restored when longjmp'ing to here. Thus, + * signals other than SIGQUIT will be blocked until we complete error + * recovery. It might seem that this policy makes the HOLD_INTERRUPTS() + * call redundant, but it is not since InterruptPending might be set + * already. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 624a3238b8..45f5deca72 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -209,8 +209,9 @@ CheckpointerMain(void) */ pqsignal(SIGCHLD, SIG_DFL); - /* We allow SIGQUIT (quickdie) at all times */ + /* We allow SIGQUIT (SignalHandlerForCrashExit) at all times */ sigdelset(&BlockSig, SIGQUIT); + PG_SETMASK(&BlockSig); /* * Initialize so that first time-driven event happens at the correct time. @@ -231,7 +232,20 @@ CheckpointerMain(void) /* * If an exception is encountered, processing resumes here. * - * See notes in postgres.c about the design of this coding. + * You might wonder why this isn't coded as an infinite loop around a + * PG_TRY construct. The reason is that this is the bottom of the + * exception stack, and so with PG_TRY there would be no exception handler + * in force at all during the CATCH part. By leaving the outermost setjmp + * always active, we have at least some chance of recovering from an error + * during error recovery. (If we get into an infinite loop thereby, it + * will soon be stopped by overflow of elog.c's internal state stack.) + * + * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask + * (to wit, BlockSig) will be restored when longjmp'ing to here. Thus, + * signals other than SIGQUIT will be blocked until we complete error + * recovery. It might seem that this policy makes the HOLD_INTERRUPTS() + * call redundant, but it is not since InterruptPending might be set + * already. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index 45a2757969..358c0916ac 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -112,8 +112,9 @@ WalWriterMain(void) */ pqsignal(SIGCHLD, SIG_DFL); - /* We allow SIGQUIT (quickdie) at all times */ + /* We allow SIGQUIT (SignalHandlerForCrashExit) at all times */ sigdelset(&BlockSig, SIGQUIT); + PG_SETMASK(&BlockSig); /* * Create a memory context that we will do all our work in. We do this so @@ -129,7 +130,20 @@ WalWriterMain(void) /* * If an exception is encountered, processing resumes here. * - * This code is heavily based on bgwriter.c, q.v. + * You might wonder why this isn't coded as an infinite loop around a + * PG_TRY construct. The reason is that this is the bottom of the + * exception stack, and so with PG_TRY there would be no exception handler + * in force at all during the CATCH part. By leaving the outermost setjmp + * always active, we have at least some chance of recovering from an error + * during error recovery. (If we get into an infinite loop thereby, it + * will soon be stopped by overflow of elog.c's internal state stack.) + * + * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask + * (to wit, BlockSig) will be restored when longjmp'ing to here. Thus, + * signals other than SIGQUIT will be blocked until we complete error + * recovery. It might seem that this policy makes the HOLD_INTERRUPTS() + * call redundant, but it is not since InterruptPending might be set + * already. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 7c11e1ab44..b180598507 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -279,8 +279,9 @@ WalReceiverMain(void) /* Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); - /* We allow SIGQUIT (quickdie) at all times */ + /* We allow SIGQUIT (SignalHandlerForCrashExit) at all times */ sigdelset(&BlockSig, SIGQUIT); + PG_SETMASK(&BlockSig); /* Load the libpq-specific functions */ load_file("libpqwalreceiver", false);