From b7ad093d50e13aadfffb1662f53cd16a1c59e09d Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Wed, 1 Sep 2021 10:00:12 +0530 Subject: [PATCH] Fix the random test failure in 001_rep_changes. The check to test whether the subscription workers were restarting after a change in the subscription was failing. The reason was that the test was assuming the walsender started before it reaches the 'streaming' state and the walsender was exiting due to an error before that. Now, the walsender was erroring out before reaching the 'streaming' state because it tries to acquire the slot before the previous walsender has exited. In passing, improve the die messages so that it is easier to investigate the failures in the future if any. Reported-by: Michael Paquier, as per buildfarm Author: Ajin Cherian Reviewed-by: Masahiko Sawada, Amit Kapila Backpatch-through: 10, where this test was introduced Discussion: https://postgr.es/m/YRnhFxa9bo73wfpV@paquier.xyz --- src/test/subscription/t/001_rep_changes.pl | 26 ++++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl index dee5f5c30a..7dd69caacb 100644 --- a/src/test/subscription/t/001_rep_changes.pl +++ b/src/test/subscription/t/001_rep_changes.pl @@ -414,28 +414,30 @@ is( $result, qq(11.11|baz|1 'update works with dropped subscriber column'); # check that change of connection string and/or publication list causes -# restart of subscription workers. Not all of these are registered as tests -# as we need to poll for a change but the test suite will fail none the less -# when something goes wrong. +# restart of subscription workers. We check the state along with +# application_name to ensure that the walsender is (re)started. +# +# Not all of these are registered as tests as we need to poll for a change +# but the test suite will fail none the less when something goes wrong. my $oldpid = $node_publisher->safe_psql('postgres', - "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub';" + "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" ); $node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr sslmode=disable'" ); $node_publisher->poll_query_until('postgres', - "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub';" -) or die "Timed out while waiting for apply to restart"; + "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" +) or die "Timed out while waiting for apply to restart after changing CONNECTION"; $oldpid = $node_publisher->safe_psql('postgres', - "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub';" + "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" ); $node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_ins_only WITH (copy_data = false)" ); $node_publisher->poll_query_until('postgres', - "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub';" -) or die "Timed out while waiting for apply to restart"; + "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" +) or die "Timed out while waiting for apply to restart after changing PUBLICATION"; $node_publisher->safe_psql('postgres', "INSERT INTO tab_ins SELECT generate_series(1001,1100)"); @@ -483,13 +485,13 @@ is($result, qq(21|0|100), 'check replicated insert after alter publication'); # check restart on rename $oldpid = $node_publisher->safe_psql('postgres', - "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub';" + "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" ); $node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tap_sub RENAME TO tap_sub_renamed"); $node_publisher->poll_query_until('postgres', - "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub_renamed';" -) or die "Timed out while waiting for apply to restart"; + "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub_renamed' AND state = 'streaming';" +) or die "Timed out while waiting for apply to restart after renaming SUBSCRIPTION"; # check all the cleanup $node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub_renamed");