From 582fbffb0ccc79de76a459df670b86d109d37ca5 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Mon, 7 Sep 2015 19:01:00 -0400 Subject: [PATCH] In the pg_rewind test suite, receive WAL fully before promoting. If a transaction never reaches the standby, later tests find unexpected cluster state. A "tail-copy: query result matches" test failure has been the usual symptom. Among the buildfarm members having run this test suite, most have exhibited that symptom at least once. Back-patch to 9.5, where pg_rewind was introduced. Michael Paquier, reported by Christoph Berg. --- src/bin/pg_rewind/RewindTest.pm | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/bin/pg_rewind/RewindTest.pm b/src/bin/pg_rewind/RewindTest.pm index 22e5cae45d..a4c17371dc 100644 --- a/src/bin/pg_rewind/RewindTest.pm +++ b/src/bin/pg_rewind/RewindTest.pm @@ -222,12 +222,8 @@ recovery_target_timeline='latest' '-l', "$log_path/standby.log", '-o', "-p $port_standby", 'start'); - # Wait until the standby has caught up with the primary, by polling - # pg_stat_replication. - my $caughtup_query = -"SELECT pg_current_xlog_location() = replay_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';"; - poll_query_until($caughtup_query, $connstr_master) - or die "Timed out while waiting for standby to catch up"; + # The standby may have WAL to apply before it matches the primary. That + # is fine, because no test examines the standby before promotion. } sub promote_standby @@ -235,6 +231,12 @@ sub promote_standby #### Now run the test-specific parts to run after standby has been started # up standby + # Wait for the standby to receive and write all WAL. + my $wal_received_query = +"SELECT pg_current_xlog_location() = write_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';"; + poll_query_until($wal_received_query, $connstr_master) + or die "Timed out while waiting for standby to receive and write WAL"; + # Now promote slave and insert some new data on master, this will put # the master out-of-sync with the standby. Wait until the standby is # out of recovery mode, and is ready to accept read-write connections.