diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 91e99d059f..932c32a5a1 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -12403,11 +12403,19 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, * pg_wal by now. Use XLOG_FROM_STREAM so that source * info is set correctly and XLogReceiptTime isn't * changed. + * + * NB: We must set readTimeLineHistory based on + * recoveryTargetTLI, not receiveTLI. Normally they'll + * be the same, but if recovery_target_timeline is + * 'latest' and archiving is configured, then it's + * possible that we managed to retrieve one or more + * new timeline history files from the archive, + * updating recoveryTargetTLI. */ if (readFile < 0) { if (!expectedTLEs) - expectedTLEs = readTimeLineHistory(receiveTLI); + expectedTLEs = readTimeLineHistory(recoveryTargetTLI); readFile = XLogFileRead(readSegNo, PANIC, receiveTLI, XLOG_FROM_STREAM, false); diff --git a/src/test/recovery/t/025_stuck_on_old_timeline.pl b/src/test/recovery/t/025_stuck_on_old_timeline.pl new file mode 100644 index 0000000000..0d96bb3c15 --- /dev/null +++ b/src/test/recovery/t/025_stuck_on_old_timeline.pl @@ -0,0 +1,96 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Testing streaming replication where standby is promoted and a new cascading +# standby (without WAL) is connected to the promoted standby. Both archiving +# and streaming are enabled, but only the history file is available from the +# archive, so the WAL files all have to be streamed. Test that the cascading +# standby can follow the new primary (promoted standby). +use strict; +use warnings; +use PostgresNode; +use TestLib; +use FindBin; +use Test::More tests => 1; + +# Initialize primary node +my $node_primary = get_new_node('primary'); + +# Set up an archive command that will copy the history file but not the WAL +# files. No real archive command should behave this way; the point is to +# simulate a race condition where the new cascading standby starts up after +# the timeline history file reaches the archive but before any of the WAL files +# get there. +$node_primary->init(allows_streaming => 1, has_archiving => 1); +my $perlbin = $^X; +$perlbin =~ s{\\}{\\\\}g if ($TestLib::windows_os); +my $archivedir_primary = $node_primary->archive_dir; +$node_primary->append_conf('postgresql.conf', qq( +archive_command = '$perlbin "$FindBin::RealBin/cp_history_files" "%p" "$archivedir_primary/%f"' +)); +$node_primary->start; + +# Take backup from primary +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create streaming standby linking to primary +my $node_standby = get_new_node('standby'); +$node_standby->init_from_backup($node_primary, $backup_name, + allows_streaming => 1, has_streaming => 1, has_archiving => 1); +$node_standby->start; + +# Take backup of standby, use -Xnone so that pg_wal is empty. +$node_standby->backup($backup_name, backup_options => ['-Xnone']); + +# Create cascading standby but don't start it yet. +# Must set up both streaming and archiving. +my $node_cascade = get_new_node('cascade'); +$node_cascade->init_from_backup($node_standby, $backup_name, + has_streaming => 1); +$node_cascade->enable_restoring($node_primary); +$node_cascade->append_conf('postgresql.conf', qq( +recovery_target_timeline='latest' +)); + +# Promote the standby. +$node_standby->promote; + +# Wait for promotion to complete +$node_standby->poll_query_until('postgres', + "SELECT NOT pg_is_in_recovery();") + or die "Timed out while waiting for promotion"; + +# Find next WAL segment to be archived +my $walfile_to_be_archived = $node_standby->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn());"); + +# Make WAL segment eligible for archival +$node_standby->safe_psql('postgres', 'SELECT pg_switch_wal()'); + +# Wait until the WAL segment has been archived. +# Since the history file gets created on promotion and is archived before any +# WAL segment, this is enough to guarantee that the history file was +# archived. +my $archive_wait_query = + "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;"; +$node_standby->poll_query_until('postgres', $archive_wait_query) + or die "Timed out while waiting for WAL segment to be archived"; +my $last_archived_wal_file = $walfile_to_be_archived; + +# Start cascade node +$node_cascade->start; + +# Create some content on promoted standby and check its presence on the +# cascading standby. +$node_standby->safe_psql('postgres', "CREATE TABLE tab_int AS SELECT 1 AS a"); + +# Wait for the replication to catch up +$node_standby->wait_for_catchup($node_cascade, 'replay', + $node_standby->lsn('insert')); + +# Check that cascading standby has the new content +my $result = + $node_cascade->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +print "cascade: $result\n"; +is($result, 1, 'check streamed content on cascade standby'); diff --git a/src/test/recovery/t/cp_history_files b/src/test/recovery/t/cp_history_files new file mode 100644 index 0000000000..cfeea41e5b --- /dev/null +++ b/src/test/recovery/t/cp_history_files @@ -0,0 +1,10 @@ +#!/usr/bin/perl + +use File::Copy; +use strict; +use warnings; + +die "wrong number of arguments" if @ARGV != 2; +my ($source, $target) = @ARGV; +exit if $source !~ /history/; +copy($source, $target) or die "couldn't copy $source to $target: $!";