Fix test race between primary XLOG_RUNNING_XACTS and standby logical slot.
Before the previous commit, the test could hang until LOG_SNAPSHOT_INTERVAL_MS (15s), until checkpoint_timeout (300s), or indefinitely. An indefinite hang was awfully improbable. It entailed the test reaching checkpoint_timeout before the DecodingContextFindStartpoint() of a CREATE SUBSCRIPTION, yet after the preceding WAL record. Back-patch to v16, which introduced the test. Bertrand Drouvot, reported by Noah Misch. Discussion: https://postgr.es/m/20240211010227.a2.nmisch@google.com
This commit is contained in:
parent
4791f87f34
commit
0e162810df
|
@ -3181,6 +3181,36 @@ $SIG{TERM} = $SIG{INT} = sub {
|
||||||
|
|
||||||
=pod
|
=pod
|
||||||
|
|
||||||
|
=item $node->log_standby_snapshot(self, standby, slot_name)
|
||||||
|
|
||||||
|
Log a standby snapshot on primary once the slot restart_lsn is determined on
|
||||||
|
the standby.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub log_standby_snapshot
|
||||||
|
{
|
||||||
|
my ($self, $standby, $slot_name) = @_;
|
||||||
|
|
||||||
|
# Once the slot's restart_lsn is determined, the standby looks for
|
||||||
|
# xl_running_xacts WAL record from the restart_lsn onwards. First wait
|
||||||
|
# until the slot restart_lsn is determined.
|
||||||
|
|
||||||
|
$standby->poll_query_until(
|
||||||
|
'postgres', qq[
|
||||||
|
SELECT restart_lsn IS NOT NULL
|
||||||
|
FROM pg_catalog.pg_replication_slots WHERE slot_name = '$slot_name'
|
||||||
|
])
|
||||||
|
or die
|
||||||
|
"timed out waiting for logical slot to calculate its restart_lsn";
|
||||||
|
|
||||||
|
# Then arrange for the xl_running_xacts record for which the standby is
|
||||||
|
# waiting.
|
||||||
|
$self->safe_psql('postgres', 'SELECT pg_log_standby_snapshot()');
|
||||||
|
}
|
||||||
|
|
||||||
|
=pod
|
||||||
|
|
||||||
=item $node->create_logical_slot_on_standby(self, primary, slot_name, dbname)
|
=item $node->create_logical_slot_on_standby(self, primary, slot_name, dbname)
|
||||||
|
|
||||||
Create logical replication slot on given standby
|
Create logical replication slot on given standby
|
||||||
|
@ -3206,21 +3236,9 @@ sub create_logical_slot_on_standby
|
||||||
'2>',
|
'2>',
|
||||||
\$stderr);
|
\$stderr);
|
||||||
|
|
||||||
# Once the slot's restart_lsn is determined, the standby looks for
|
# Arrange for the xl_running_xacts record for which pg_recvlogical is
|
||||||
# xl_running_xacts WAL record from the restart_lsn onwards. First wait
|
|
||||||
# until the slot restart_lsn is determined.
|
|
||||||
|
|
||||||
$self->poll_query_until(
|
|
||||||
'postgres', qq[
|
|
||||||
SELECT restart_lsn IS NOT NULL
|
|
||||||
FROM pg_catalog.pg_replication_slots WHERE slot_name = '$slot_name'
|
|
||||||
])
|
|
||||||
or die
|
|
||||||
"timed out waiting for logical slot to calculate its restart_lsn";
|
|
||||||
|
|
||||||
# Then arrange for the xl_running_xacts record for which pg_recvlogical is
|
|
||||||
# waiting.
|
# waiting.
|
||||||
$primary->safe_psql('postgres', 'SELECT pg_log_standby_snapshot()');
|
$primary->log_standby_snapshot($self, $slot_name);
|
||||||
|
|
||||||
$handle->finish();
|
$handle->finish();
|
||||||
|
|
||||||
|
|
|
@ -465,8 +465,8 @@ $psql_subscriber{subscriber_stdin} .= "\n";
|
||||||
|
|
||||||
$psql_subscriber{run}->pump_nb();
|
$psql_subscriber{run}->pump_nb();
|
||||||
|
|
||||||
# Speed up the subscription creation
|
# Log the standby snapshot to speed up the subscription creation
|
||||||
$node_primary->safe_psql('postgres', "SELECT pg_log_standby_snapshot()");
|
$node_primary->log_standby_snapshot($node_standby, 'tap_sub');
|
||||||
|
|
||||||
# Explicitly shut down psql instance gracefully - to avoid hangs
|
# Explicitly shut down psql instance gracefully - to avoid hangs
|
||||||
# or worse on windows
|
# or worse on windows
|
||||||
|
|
Loading…
Reference in New Issue