From b51985d8a0e1a7487d17559cbeefc87892c4d916 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Wed, 1 Sep 2021 09:16:35 +0530 Subject: [PATCH] Fix the random test failure in 001_rep_changes. The check to test whether the subscription workers were restarting after a change in the subscription was failing. The reason was that the test was assuming the walsender started before it reaches the 'streaming' state and the walsender was exiting due to an error before that. Now, the walsender was erroring out before reaching the 'streaming' state because it tries to acquire the slot before the previous walsender has exited. In passing, improve the die messages so that it is easier to investigate the failures in the future if any. Reported-by: Michael Paquier, as per buildfarm Author: Ajin Cherian Reviewed-by: Masahiko Sawada, Amit Kapila Backpatch-through: 10, where this test was introduced Discussion: https://postgr.es/m/YRnhFxa9bo73wfpV@paquier.xyz --- src/test/subscription/t/001_rep_changes.pl | 26 ++++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl index 311cca8a21..c3be32eaad 100644 --- a/src/test/subscription/t/001_rep_changes.pl +++ b/src/test/subscription/t/001_rep_changes.pl @@ -292,28 +292,30 @@ is( $result, qq(11.11|baz|1 'update works with dropped subscriber column'); # check that change of connection string and/or publication list causes -# restart of subscription workers. Not all of these are registered as tests -# as we need to poll for a change but the test suite will fail none the less -# when something goes wrong. +# restart of subscription workers. We check the state along with +# application_name to ensure that the walsender is (re)started. +# +# Not all of these are registered as tests as we need to poll for a change +# but the test suite will fail none the less when something goes wrong. my $oldpid = $node_publisher->safe_psql('postgres', - "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub';" + "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" ); $node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr sslmode=disable'" ); $node_publisher->poll_query_until('postgres', - "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub';" -) or die "Timed out while waiting for apply to restart"; + "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" +) or die "Timed out while waiting for apply to restart after changing CONNECTION"; $oldpid = $node_publisher->safe_psql('postgres', - "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub';" + "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" ); $node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_ins_only WITH (copy_data = false)" ); $node_publisher->poll_query_until('postgres', - "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub';" -) or die "Timed out while waiting for apply to restart"; + "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" +) or die "Timed out while waiting for apply to restart after changing PUBLICATION"; $node_publisher->safe_psql('postgres', "INSERT INTO tab_ins SELECT generate_series(1001,1100)"); @@ -361,13 +363,13 @@ is($result, qq(21|0|100), 'check replicated insert after alter publication'); # check restart on rename $oldpid = $node_publisher->safe_psql('postgres', - "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub';" + "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" ); $node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tap_sub RENAME TO tap_sub_renamed"); $node_publisher->poll_query_until('postgres', - "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub_renamed';" -) or die "Timed out while waiting for apply to restart"; + "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = 'tap_sub_renamed' AND state = 'streaming';" +) or die "Timed out while waiting for apply to restart after renaming SUBSCRIPTION"; # check all the cleanup $node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub_renamed");