mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-09-30 23:02:03 +02:00
Quick exit on log stream child exit in pg_basebackup
If the log streaming child process (thread on Windows) dies during backup then the whole backup will be aborted at the end of the backup. Instead, trap ungraceful termination of the log streaming child and exit early. This also adds a TAP test for simulating this by terminating the responsible backend. Reviewed-by: Michael Paquier <michael@paquier.xyz> Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Reviewed-by: Magnus Hagander <magnus@hagander.net> Discussion: https://postgr.es/m/0F69E282-97F9-4DB7-8D6D-F927AA6340C8@yesql.se Discussion: https://postgr.es/m/VI1PR83MB0189818B82C19059CB62E26199A89@VI1PR83MB0189.EURPRD83.prod.outlook.com
This commit is contained in:
parent
c7d7e12039
commit
0475a97f74
@ -174,6 +174,8 @@ static int bgpipe[2] = {-1, -1};
|
||||
/* Handle to child process */
|
||||
static pid_t bgchild = -1;
|
||||
static bool in_log_streamer = false;
|
||||
/* Flag to indicate if child process exited unexpectedly */
|
||||
static volatile sig_atomic_t bgchild_exited = false;
|
||||
|
||||
/* End position for xlog streaming, empty string if unknown yet */
|
||||
static XLogRecPtr xlogendptr;
|
||||
@ -277,6 +279,18 @@ disconnect_atexit(void)
|
||||
}
|
||||
|
||||
#ifndef WIN32
|
||||
/*
|
||||
* If the bgchild exits prematurely and raises a SIGCHLD signal, we can abort
|
||||
* processing rather than wait until the backup has finished and error out at
|
||||
* that time. On Windows, we use a background thread which can communicate
|
||||
* without the need for a signal handler.
|
||||
*/
|
||||
static void
|
||||
sigchld_handler(SIGNAL_ARGS)
|
||||
{
|
||||
bgchild_exited = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* On windows, our background thread dies along with the process. But on
|
||||
* Unix, if we have started a subprocess, we want to kill it off so it
|
||||
@ -285,7 +299,7 @@ disconnect_atexit(void)
|
||||
static void
|
||||
kill_bgchild_atexit(void)
|
||||
{
|
||||
if (bgchild > 0)
|
||||
if (bgchild > 0 && !bgchild_exited)
|
||||
kill(bgchild, SIGTERM);
|
||||
}
|
||||
#endif
|
||||
@ -572,17 +586,28 @@ LogStreamerMain(logstreamer_param *param)
|
||||
stream.do_sync);
|
||||
|
||||
if (!ReceiveXlogStream(param->bgconn, &stream))
|
||||
|
||||
{
|
||||
/*
|
||||
* Any errors will already have been reported in the function process,
|
||||
* but we need to tell the parent that we didn't shutdown in a nice
|
||||
* way.
|
||||
*/
|
||||
#ifdef WIN32
|
||||
/*
|
||||
* In order to signal the main thread of an ungraceful exit we
|
||||
* set the same flag that we use on Unix to signal SIGCHLD.
|
||||
*/
|
||||
bgchild_exited = true;
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!stream.walmethod->finish())
|
||||
{
|
||||
pg_log_error("could not finish writing WAL files: %m");
|
||||
#ifdef WIN32
|
||||
bgchild_exited = true;
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -1134,6 +1159,12 @@ ReceiveCopyData(PGconn *conn, WriteDataCallback callback,
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (bgchild_exited)
|
||||
{
|
||||
pg_log_error("background process terminated unexpectedly");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
(*callback) (r, copybuf, callback_data);
|
||||
|
||||
PQfreemem(copybuf);
|
||||
@ -2882,6 +2913,18 @@ main(int argc, char **argv)
|
||||
}
|
||||
atexit(disconnect_atexit);
|
||||
|
||||
#ifndef WIN32
|
||||
/*
|
||||
* Trap SIGCHLD to be able to handle the WAL stream process exiting. There
|
||||
* is no SIGCHLD on Windows, there we rely on the background thread setting
|
||||
* the signal variable on unexpected but graceful exit. If the WAL stream
|
||||
* thread crashes on Windows it will bring down the entire process as it's
|
||||
* a thread, so there is nothing to catch should that happen. A crash on
|
||||
* UNIX will be caught by the signal handler.
|
||||
*/
|
||||
pqsignal(SIGCHLD, sigchld_handler);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Set umask so that directories/files are created with the same
|
||||
* permissions as directories/files in the source data directory.
|
||||
|
@ -776,4 +776,38 @@ SKIP:
|
||||
rmtree("$tempdir/backup_gzip3");
|
||||
}
|
||||
|
||||
# Test background stream process terminating before the basebackup has
|
||||
# finished, the main process should exit gracefully with an error message on
|
||||
# stderr. To reduce the risk of timing related issues we invoke the base
|
||||
# backup with rate throttling enabled.
|
||||
$node->safe_psql('postgres',
|
||||
q{CREATE TABLE t AS SELECT a FROM generate_series(1,10000) AS a;});
|
||||
|
||||
my $sigchld_bb_timeout = IPC::Run::timer(60);
|
||||
my ($sigchld_bb_stdin, $sigchld_bb_stdout, $sigchld_bb_stderr) = ('', '', '');
|
||||
my $sigchld_bb = IPC::Run::start(
|
||||
[
|
||||
@pg_basebackup_defs, '--wal-method=stream', '-D', "$tempdir/sigchld",
|
||||
'--max-rate=32', '-d', $node->connstr('postgres')
|
||||
],
|
||||
'<',
|
||||
\$sigchld_bb_stdin,
|
||||
'>',
|
||||
\$sigchld_bb_stdout,
|
||||
'2>',
|
||||
\$sigchld_bb_stderr,
|
||||
$sigchld_bb_timeout);
|
||||
|
||||
is($node->poll_query_until('postgres',
|
||||
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE " .
|
||||
"application_name = '010_pg_basebackup.pl' AND wait_event = 'WalSenderMain' " .
|
||||
"AND backend_type = 'walsender' AND query ~ 'START_REPLICATION'"),
|
||||
"1",
|
||||
"Walsender killed");
|
||||
|
||||
ok(pump_until($sigchld_bb, $sigchld_bb_timeout, \$sigchld_bb_stderr,
|
||||
qr/background process terminated unexpectedly/),
|
||||
'background process exit message');
|
||||
$sigchld_bb->finish();
|
||||
|
||||
done_testing();
|
||||
|
Loading…
Reference in New Issue
Block a user