diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 8603cf3f94..863ac31c6b 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -9671,6 +9671,23 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' + + remove_temp_files_after_crash (boolean) + + remove_temp_files_after_crash configuration parameter + + + + + When set to on, which is the default, + PostgreSQL will automatically remove + temporary files after a backend crash. If disabled, the files will be + retained and may be used for debugging, for example. Repeated crashes + may however result in accumulation of useless files. + + + + data_sync_retry (boolean) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index e8af05c04e..fc9c769bc6 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -242,6 +242,7 @@ bool Db_user_namespace = false; bool enable_bonjour = false; char *bonjour_name; bool restart_after_crash = true; +bool remove_temp_files_after_crash = true; /* PIDs of special child processes; 0 when not running */ static pid_t StartupPID = 0, @@ -3976,6 +3977,10 @@ PostmasterStateMachine(void) ereport(LOG, (errmsg("all server processes terminated; reinitializing"))); + /* remove leftover temporary files after a crash */ + if (remove_temp_files_after_crash) + RemovePgTempFiles(); + /* allow background workers to immediately restart */ ResetBackgroundWorkerCrashTimes(); diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index b58502837a..110ba31517 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -3024,11 +3024,13 @@ CleanupTempFiles(bool isCommit, bool isProcExit) * remove any leftover files created by OpenTemporaryFile and any leftover * temporary relation files created by mdcreate. * - * NOTE: we could, but don't, call this during a post-backend-crash restart - * cycle. The argument for not doing it is that someone might want to examine - * the temp files for debugging purposes. This does however mean that - * OpenTemporaryFile had better allow for collision with an existing temp - * file name. + * During post-backend-crash restart cycle, this routine is called when + * remove_temp_files_after_crash GUC is enabled. Multiple crashes while + * queries are using temp files could result in useless storage usage that can + * only be reclaimed by a service restart. The argument against enabling it is + * that someone might want to examine the temporary files for debugging + * purposes. This does however mean that OpenTemporaryFile had better allow for + * collision with an existing temp file name. * * NOTE: this function and its subroutines generally report syscall failures * with ereport(LOG) and keep going. Removing temp files is not so critical diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index b263e3493b..5a3ca5b765 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1371,6 +1371,15 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"remove_temp_files_after_crash", PGC_SIGHUP, ERROR_HANDLING_OPTIONS, + gettext_noop("Remove temporary files after backend crash."), + NULL + }, + &remove_temp_files_after_crash, + true, + NULL, NULL, NULL + }, { {"log_duration", PGC_SUSET, LOGGING_WHAT, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 6647f8fd6e..3ff507d5f6 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -758,6 +758,8 @@ #exit_on_error = off # terminate session on any error? #restart_after_crash = on # reinitialize after backend crash? +#remove_temp_files_after_crash = on # remove temporary files after + # backend crash? #data_sync_retry = off # retry or panic on failure to fsync # data? # (change requires restart) diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index cfa59c4dc0..0efdd7c232 100644 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -29,6 +29,7 @@ extern bool log_hostname; extern bool enable_bonjour; extern char *bonjour_name; extern bool restart_after_crash; +extern bool remove_temp_files_after_crash; #ifdef WIN32 extern HANDLE PostmasterHandle; diff --git a/src/test/recovery/t/022_crash_temp_files.pl b/src/test/recovery/t/022_crash_temp_files.pl new file mode 100644 index 0000000000..c37b227770 --- /dev/null +++ b/src/test/recovery/t/022_crash_temp_files.pl @@ -0,0 +1,192 @@ +# Test remove of temporary files after a crash. +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; +use Config; +use Time::HiRes qw(usleep); + +plan tests => 9; + + +# To avoid hanging while expecting some specific input from a psql +# instance being driven by us, add a timeout high enough that it +# should never trigger even on very slow machines, unless something +# is really wrong. +my $psql_timeout = IPC::Run::timer(60); + +my $node = get_new_node('node_crash'); +$node->init(); +$node->start(); + +# By default, PostgresNode doesn't restart after crash +# Reduce work_mem to generate temporary file with a few number of rows +$node->safe_psql( + 'postgres', + q[ALTER SYSTEM SET remove_temp_files_after_crash = on; + ALTER SYSTEM SET log_connections = 1; + ALTER SYSTEM SET work_mem = '64kB'; + ALTER SYSTEM SET restart_after_crash = on; + SELECT pg_reload_conf();]); + +# create table, insert rows +$node->safe_psql( + 'postgres', + q[CREATE TABLE tab_crash (a text); + INSERT INTO tab_crash (a) SELECT gen_random_uuid() FROM generate_series(1, 500);]); + +# Run psql, keeping session alive, so we have an alive backend to kill. +my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +my $killme = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$killme_stdin, + '>', + \$killme_stdout, + '2>', + \$killme_stderr, + $psql_timeout); + +# Get backend pid +$killme_stdin .= q[ +SELECT pg_backend_pid(); +]; +ok(pump_until($killme, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + 'acquired pid for SIGKILL'); +my $pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +# Run the query that generates a temporary file and that will be killed before +# it finishes. Since the query that generates the temporary file does not +# return before the connection is killed, use a SELECT before to trigger +# pump_until. +$killme_stdin .= q[ +BEGIN; +SELECT $$in-progress-before-sigkill$$; +WITH foo AS (SELECT a FROM tab_crash ORDER BY a) SELECT a, pg_sleep(1) FROM foo; +]; +ok(pump_until($killme, \$killme_stdout, qr/in-progress-before-sigkill/m), + 'select in-progress-before-sigkill'); +$killme_stdout = ''; +$killme_stderr = ''; + +# Wait some time so the temporary file is generated by SELECT +usleep(10_000); + +# Kill with SIGKILL +my $ret = TestLib::system_log('pg_ctl', 'kill', 'KILL', $pid); +is($ret, 0, 'killed process with KILL'); + +# Close psql session +$killme->finish; + +# Wait till server restarts +$node->poll_query_until('postgres', 'SELECT 1', '1'); + +# Check for temporary files +is($node->safe_psql( + 'postgres', + 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'), + qq(0), 'no temporary files'); + +# +# Test old behavior (don't remove temporary files after crash) +# +$node->safe_psql( + 'postgres', + q[ALTER SYSTEM SET remove_temp_files_after_crash = off; + SELECT pg_reload_conf();]); + +# Restart psql session +($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +$killme->run(); + +# Get backend pid +$killme_stdin .= q[ +SELECT pg_backend_pid(); +]; +ok(pump_until($killme, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + 'acquired pid for SIGKILL'); +$pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +# Run the query that generates a temporary file and that will be killed before +# it finishes. Since the query that generates the temporary file does not +# return before the connection is killed, use a SELECT before to trigger +# pump_until. +$killme_stdin .= q[ +BEGIN; +SELECT $$in-progress-before-sigkill$$; +WITH foo AS (SELECT a FROM tab_crash ORDER BY a) SELECT a, pg_sleep(1) FROM foo; +]; +ok(pump_until($killme, \$killme_stdout, qr/in-progress-before-sigkill/m), + 'select in-progress-before-sigkill'); +$killme_stdout = ''; +$killme_stderr = ''; + +# Wait some time so the temporary file is generated by SELECT +usleep(10_000); + +# Kill with SIGKILL +$ret = TestLib::system_log('pg_ctl', 'kill', 'KILL', $pid); +is($ret, 0, 'killed process with KILL'); + +# Close psql session +$killme->finish; + +# Wait till server restarts +$node->poll_query_until('postgres', 'SELECT 1', '1'); + +# Check for temporary files -- should be there +is($node->safe_psql( + 'postgres', + 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'), + qq(1), 'one temporary file'); + +# Restart should remove the temporary files +$node->restart(); + +# Check the temporary files -- should be gone +is($node->safe_psql( + 'postgres', + 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'), + qq(0), 'temporary file was removed'); + +$node->stop(); + +# Pump until string is matched, or timeout occurs +sub pump_until +{ + my ($proc, $stream, $untl) = @_; + $proc->pump_nb(); + while (1) + { + last if $$stream =~ /$untl/; + if ($psql_timeout->is_expired) + { + diag("aborting wait: program timed out"); + diag("stream contents: >>", $$stream, "<<"); + diag("pattern searched for: ", $untl); + + return 0; + } + if (not $proc->pumpable()) + { + diag("aborting wait: program died"); + diag("stream contents: >>", $$stream, "<<"); + diag("pattern searched for: ", $untl); + + return 0; + } + $proc->pump(); + } + return 1; +}