diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 05cb6aa022..f181c4d144 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -217,8 +217,9 @@ static TimeLineID receiveTLI = 0; static bool lastFullPageWrites; /* - * Local copy of SharedRecoveryInProgress variable. True actually means "not - * known, need to check the shared state". + * Local copy of the state tracked by SharedRecoveryState in shared memory, + * It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually + * means "not known, need to check the shared state". */ static bool LocalRecoveryInProgress = true; @@ -643,10 +644,10 @@ typedef struct XLogCtlData TimeLineID PrevTimeLineID; /* - * SharedRecoveryInProgress indicates if we're still in crash or archive + * SharedRecoveryState indicates if we're still in crash or archive * recovery. Protected by info_lck. */ - bool SharedRecoveryInProgress; + RecoveryState SharedRecoveryState; /* * SharedHotStandbyActive indicates if we're still in crash or archive @@ -4357,6 +4358,16 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode, updateMinRecoveryPoint = true; UpdateControlFile(); + + /* + * We update SharedRecoveryState while holding the lock on + * ControlFileLock so both states are consistent in shared + * memory. + */ + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE; + SpinLockRelease(&XLogCtl->info_lck); + LWLockRelease(ControlFileLock); CheckRecoveryConsistency(); @@ -5069,7 +5080,7 @@ XLOGShmemInit(void) * in additional info.) */ XLogCtl->XLogCacheBlck = XLOGbuffers - 1; - XLogCtl->SharedRecoveryInProgress = true; + XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; XLogCtl->SharedHotStandbyActive = false; XLogCtl->WalWriterSleeping = false; @@ -6758,7 +6769,13 @@ StartupXLOG(void) */ dbstate_at_startup = ControlFile->state; if (InArchiveRecovery) + { ControlFile->state = DB_IN_ARCHIVE_RECOVERY; + + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE; + SpinLockRelease(&XLogCtl->info_lck); + } else { ereport(LOG, @@ -6771,6 +6788,10 @@ StartupXLOG(void) ControlFile->checkPointCopy.ThisTimeLineID, recoveryTargetTLI))); ControlFile->state = DB_IN_CRASH_RECOVERY; + + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; + SpinLockRelease(&XLogCtl->info_lck); } ControlFile->checkPoint = checkPointLoc; ControlFile->checkPointCopy = checkPoint; @@ -7785,7 +7806,7 @@ StartupXLOG(void) ControlFile->time = (pg_time_t) time(NULL); SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->SharedRecoveryInProgress = false; + XLogCtl->SharedRecoveryState = RECOVERY_STATE_DONE; SpinLockRelease(&XLogCtl->info_lck); UpdateControlFile(); @@ -7931,7 +7952,7 @@ RecoveryInProgress(void) */ volatile XLogCtlData *xlogctl = XLogCtl; - LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress; + LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE); /* * Initialize TimeLineID and RedoRecPtr when we discover that recovery @@ -7943,8 +7964,8 @@ RecoveryInProgress(void) { /* * If we just exited recovery, make sure we read TimeLineID and - * RedoRecPtr after SharedRecoveryInProgress (for machines with - * weak memory ordering). + * RedoRecPtr after SharedRecoveryState (for machines with weak + * memory ordering). */ pg_memory_barrier(); InitXLOGAccess(); @@ -7960,6 +7981,24 @@ RecoveryInProgress(void) } } +/* + * Returns current recovery state from shared memory. + * + * This returned state is kept consistent with the contents of the control + * file. See details about the possible values of RecoveryState in xlog.h. + */ +RecoveryState +GetRecoveryState(void) +{ + RecoveryState retval; + + SpinLockAcquire(&XLogCtl->info_lck); + retval = XLogCtl->SharedRecoveryState; + SpinLockRelease(&XLogCtl->info_lck); + + return retval; +} + /* * Is HotStandby active yet? This is only important in special backends * since normal backends won't ever be able to connect until this returns diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index e14bcf8ea6..b3e5338766 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -617,18 +617,25 @@ XLogArchiveCheckDone(const char *xlog) { char archiveStatusPath[MAXPGPATH]; struct stat stat_buf; - bool inRecovery = RecoveryInProgress(); + + /* The file is always deletable if archive_mode is "off". */ + if (!XLogArchivingActive()) + return true; /* - * The file is always deletable if archive_mode is "off". On standbys - * archiving is disabled if archive_mode is "on", and enabled with - * "always". On a primary, archiving is enabled if archive_mode is "on" - * or "always". + * During archive recovery, the file is deletable if archive_mode is not + * "always". */ - if (!((XLogArchivingActive() && !inRecovery) || - (XLogArchivingAlways() && inRecovery))) + if (!XLogArchivingAlways() && + GetRecoveryState() == RECOVERY_STATE_ARCHIVE) return true; + /* + * At this point of the logic, note that we are either a primary with + * archive_mode set to "on" or "always", or a standby with archive_mode + * set to "always". + */ + /* First check for .done --- this means archiver is done with it */ StatusFilePath(archiveStatusPath, xlog, ".done"); if (stat(archiveStatusPath, &stat_buf) == 0) diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index d519252aad..d6ab348cb6 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -164,6 +164,14 @@ typedef enum WalLevel WAL_LEVEL_LOGICAL } WalLevel; +/* Recovery states */ +typedef enum RecoveryState +{ + RECOVERY_STATE_CRASH = 0, /* crash recovery */ + RECOVERY_STATE_ARCHIVE, /* archive recovery */ + RECOVERY_STATE_DONE /* currently in production */ +} RecoveryState; + extern PGDLLIMPORT int wal_level; /* Is WAL archiving enabled (always or only while server is running normally)? */ @@ -277,6 +285,7 @@ extern const char *xlog_identify(uint8 info); extern void issue_xlog_fsync(int fd, XLogSegNo segno); extern bool RecoveryInProgress(void); +extern RecoveryState GetRecoveryState(void); extern bool HotStandbyActive(void); extern bool HotStandbyActiveInReplay(void); extern bool XLogInsertAllowed(void); diff --git a/src/test/recovery/t/020_archive_status.pl b/src/test/recovery/t/020_archive_status.pl new file mode 100644 index 0000000000..397352f058 --- /dev/null +++ b/src/test/recovery/t/020_archive_status.pl @@ -0,0 +1,214 @@ +# +# Tests related to WAL archiving and recovery. +# +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 16; +use Config; + +my $primary = get_new_node('master'); +$primary->init( + has_archiving => 1, + allows_streaming => 1); +$primary->append_conf('postgresql.conf', 'autovacuum = off'); +$primary->start; +my $primary_data = $primary->data_dir; + +# Temporarily use an archive_command value to make the archiver fail, +# knowing that archiving is enabled. Note that we cannot use a command +# that does not exist as in this case the archiver process would just exit +# without reporting the failure to pg_stat_archiver. This also cannot +# use a plain "false" as that's unportable on Windows. So, instead, as +# a portable solution, use an archive command based on a command known to +# work but will fail: copy with an incorrect original path. +my $incorrect_command = + $TestLib::windows_os + ? qq{copy "%p_does_not_exist" "%f_does_not_exist"} + : qq{cp "%p_does_not_exist" "%f_does_not_exist"}; +$primary->safe_psql( + 'postgres', qq{ + ALTER SYSTEM SET archive_command TO '$incorrect_command'; + SELECT pg_reload_conf(); +}); + +# Save the WAL segment currently in use and switch to a new segment. +# This will be used to track the activity of the archiver. +my $segment_name_1 = $primary->safe_psql('postgres', + q{SELECT pg_walfile_name(pg_current_wal_lsn())}); +my $segment_path_1 = "pg_wal/archive_status/$segment_name_1"; +my $segment_path_1_ready = "$segment_path_1.ready"; +my $segment_path_1_done = "$segment_path_1.done"; +$primary->safe_psql( + 'postgres', q{ + CREATE TABLE mine AS SELECT generate_series(1,10) AS x; + SELECT pg_switch_wal(); + CHECKPOINT; +}); + +# Wait for an archive failure. +$primary->poll_query_until('postgres', + q{SELECT failed_count > 0 FROM pg_stat_archiver}, 't') + or die "Timed out while waiting for archiving to fail"; +ok( -f "$primary_data/$segment_path_1_ready", + ".ready file exists for WAL segment $segment_name_1 waiting to be archived" +); +ok( !-f "$primary_data/$segment_path_1_done", + ".done file does not exist for WAL segment $segment_name_1 waiting to be archived" +); + +is( $primary->safe_psql( + 'postgres', q{ + SELECT archived_count, last_failed_wal + FROM pg_stat_archiver + }), + "0|$segment_name_1", + 'pg_stat_archiver failed to archive $segment_name_1'); + +# Crash the cluster for the next test in charge of checking that non-archived +# WAL segments are not removed. +$primary->stop('immediate'); + +# Recovery tests for the archiving with a standby partially check +# the recovery behavior when restoring a backup taken using a +# snapshot with no pg_start/stop_backup. In this situation, +# the recovered standby should enter first crash recovery then +# switch to regular archive recovery. Note that the base backup +# is taken here so as archive_command will fail. This is necessary +# for the assumptions of the tests done with the standbys below. +$primary->backup_fs_cold('backup'); + +$primary->start; +ok( -f "$primary_data/$segment_path_1_ready", + ".ready file for WAL segment $segment_name_1 still exists after crash recovery on primary" +); + +# Allow WAL archiving again and wait for a success. +$primary->safe_psql( + 'postgres', q{ + ALTER SYSTEM RESET archive_command; + SELECT pg_reload_conf(); +}); + +$primary->poll_query_until('postgres', + q{SELECT archived_count FROM pg_stat_archiver}, '1') + or die "Timed out while waiting for archiving to finish"; + +ok(!-f "$primary_data/$segment_path_1_ready", + ".ready file for archived WAL segment $segment_name_1 removed"); + +ok(-f "$primary_data/$segment_path_1_done", + ".done file for archived WAL segment $segment_name_1 exists"); + +is( $primary->safe_psql( + 'postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }), + $segment_name_1, + "archive success reported in pg_stat_archiver for WAL segment $segment_name_1" +); + +# Create some WAL activity and a new checkpoint so as the next standby can +# create a restartpoint. As this standby starts in crash recovery because +# of the cold backup taken previously, it needs a clean restartpoint to deal +# with existing status files. +my $segment_name_2 = $primary->safe_psql('postgres', + q{SELECT pg_walfile_name(pg_current_wal_lsn())}); +my $segment_path_2 = "pg_wal/archive_status/$segment_name_2"; +my $segment_path_2_ready = "$segment_path_2.ready"; +my $segment_path_2_done = "$segment_path_2.done"; +$primary->safe_psql( + 'postgres', q{ + INSERT INTO mine SELECT generate_series(10,20) AS x; + SELECT pg_switch_wal(); + CHECKPOINT; +}); + +$primary->poll_query_until('postgres', + q{ SELECT last_archived_wal FROM pg_stat_archiver }, + $segment_name_2) + or die "Timed out while waiting for archiving to finish"; + +# Test standby with archive_mode = on. +my $standby1 = get_new_node('standby'); +$standby1->init_from_backup($primary, 'backup', has_restoring => 1); +$standby1->append_conf('postgresql.conf', "archive_mode = on"); +my $standby1_data = $standby1->data_dir; +$standby1->start; +$standby1->safe_psql('postgres', q{CHECKPOINT}); + +# Recovery with archive_mode=on does not keep .ready signal files inherited +# from backup. Note that this WAL segment existed in the backup. +ok( !-f "$standby1_data/$segment_path_1_ready", + ".ready file for WAL segment $segment_name_1 present in backup got removed with archive_mode=on on standby" +); + +# Recovery with archive_mode=on should not create .ready files. +# Note that this segment did not exist in the backup. +ok( !-f "$standby1_data/$segment_path_2_ready", + ".ready file for WAL segment $segment_name_2 not created on standby when archive_mode=on on standby" +); + +# Recovery with archive_mode = on creates .done files. +ok( -f "$standby1_data/$segment_path_2_done", + ".done file for WAL segment $segment_name_2 created when archive_mode=on on standby" +); + +# Test recovery with archive_mode = always, which should always keep +# .ready files if archiving is enabled, though here we want the archive +# command to fail to persist the .ready files. Note that this node +# has inherited the archive command of the previous cold backup that +# will cause archiving failures. +my $standby2 = get_new_node('standby2'); +$standby2->init_from_backup($primary, 'backup', has_restoring => 1); +$standby2->append_conf('postgresql.conf', 'archive_mode = always'); +my $standby2_data = $standby2->data_dir; +$standby2->start; + +$standby2->safe_psql('postgres', q{CHECKPOINT}); + +ok( -f "$standby2_data/$segment_path_1_ready", + ".ready file for WAL segment $segment_name_1 existing in backup is kept with archive_mode=always on standby" +); + +ok( -f "$standby2_data/$segment_path_2_ready", + ".ready file for WAL segment $segment_name_2 created with archive_mode=always on standby" +); + +# Reset statistics of the archiver for the next checks. +$standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')}); + +# Now crash the cluster to check that recovery step does not +# remove non-archived WAL segments on a standby where archiving +# is enabled. +$standby2->stop('immediate'); +$standby2->start; + +ok( -f "$standby2_data/$segment_path_1_ready", + "WAL segment still ready to archive after crash recovery on standby with archive_mode=always" +); + +# Allow WAL archiving again, and wait for the segments to be archived. +$standby2->safe_psql( + 'postgres', q{ + ALTER SYSTEM RESET archive_command; + SELECT pg_reload_conf(); +}); +$standby2->poll_query_until('postgres', + q{SELECT last_archived_wal FROM pg_stat_archiver}, + $segment_name_2) + or die "Timed out while waiting for archiving to finish"; + +is( $standby2->safe_psql( + 'postgres', q{SELECT archived_count FROM pg_stat_archiver}), + '2', + "correct number of WAL segments archived from standby"); + +ok( !-f "$standby2_data/$segment_path_1_ready" + && !-f "$standby2_data/$segment_path_2_ready", + ".ready files removed after archive success with archive_mode=always on standby" +); + +ok( -f "$standby2_data/$segment_path_1_done" + && -f "$standby2_data/$segment_path_2_done", + ".done files created after archive success with archive_mode=always on standby" +);