878 lines
33 KiB
Perl
878 lines
33 KiB
Perl
|
|
# Copyright (c) 2024, PostgreSQL Global Development Group
|
|
|
|
use strict;
|
|
use warnings;
|
|
use PostgreSQL::Test::Cluster;
|
|
use PostgreSQL::Test::Utils;
|
|
use Test::More;
|
|
|
|
##################################################
|
|
# Test that when a subscription with failover enabled is created, it will alter
|
|
# the failover property of the corresponding slot on the publisher.
|
|
##################################################
|
|
|
|
# Create publisher
|
|
my $publisher = PostgreSQL::Test::Cluster->new('publisher');
|
|
# Make sure pg_hba.conf is set up to allow connections from repl_role.
|
|
# This is only needed on Windows machines that don't use UNIX sockets.
|
|
$publisher->init(
|
|
allows_streaming => 'logical',
|
|
auth_extra => [ '--create-role', 'repl_role' ]);
|
|
# Disable autovacuum to avoid generating xid during stats update as otherwise
|
|
# the new XID could then be replicated to standby at some random point making
|
|
# slots at primary lag behind standby during slot sync.
|
|
$publisher->append_conf('postgresql.conf', 'autovacuum = off');
|
|
$publisher->start;
|
|
|
|
$publisher->safe_psql('postgres',
|
|
"CREATE PUBLICATION regress_mypub FOR ALL TABLES;");
|
|
|
|
my $publisher_connstr = $publisher->connstr . ' dbname=postgres';
|
|
|
|
# Create a subscriber node, wait for sync to complete
|
|
my $subscriber1 = PostgreSQL::Test::Cluster->new('subscriber1');
|
|
$subscriber1->init;
|
|
$subscriber1->start;
|
|
|
|
# Capture the time before the logical failover slot is created on the
|
|
# primary. We later call this publisher as primary anyway.
|
|
my $slot_creation_time_on_primary = $publisher->safe_psql(
|
|
'postgres', qq[
|
|
SELECT current_timestamp;
|
|
]);
|
|
|
|
# Create a subscription that enables failover.
|
|
$subscriber1->safe_psql('postgres',
|
|
"CREATE SUBSCRIPTION regress_mysub1 CONNECTION '$publisher_connstr' PUBLICATION regress_mypub WITH (slot_name = lsub1_slot, copy_data = false, failover = true, enabled = false);"
|
|
);
|
|
|
|
# Confirm that the failover flag on the slot is turned on
|
|
is( $publisher->safe_psql(
|
|
'postgres',
|
|
q{SELECT failover from pg_replication_slots WHERE slot_name = 'lsub1_slot';}
|
|
),
|
|
"t",
|
|
'logical slot has failover true on the publisher');
|
|
|
|
##################################################
|
|
# Test that changing the failover property of a subscription updates the
|
|
# corresponding failover property of the slot.
|
|
##################################################
|
|
|
|
# Disable failover
|
|
$subscriber1->safe_psql('postgres',
|
|
"ALTER SUBSCRIPTION regress_mysub1 SET (failover = false)");
|
|
|
|
# Confirm that the failover flag on the slot has now been turned off
|
|
is( $publisher->safe_psql(
|
|
'postgres',
|
|
q{SELECT failover from pg_replication_slots WHERE slot_name = 'lsub1_slot';}
|
|
),
|
|
"f",
|
|
'logical slot has failover false on the publisher');
|
|
|
|
# Enable failover
|
|
$subscriber1->safe_psql('postgres',
|
|
"ALTER SUBSCRIPTION regress_mysub1 SET (failover = true)");
|
|
|
|
# Confirm that the failover flag on the slot has now been turned on
|
|
is( $publisher->safe_psql(
|
|
'postgres',
|
|
q{SELECT failover from pg_replication_slots WHERE slot_name = 'lsub1_slot';}
|
|
),
|
|
"t",
|
|
'logical slot has failover true on the publisher');
|
|
|
|
##################################################
|
|
# Test that the failover option cannot be changed for enabled subscriptions.
|
|
##################################################
|
|
|
|
# Enable subscription
|
|
$subscriber1->safe_psql('postgres',
|
|
"ALTER SUBSCRIPTION regress_mysub1 ENABLE");
|
|
|
|
# Disable failover for enabled subscription
|
|
my ($result, $stdout, $stderr) = $subscriber1->psql('postgres',
|
|
"ALTER SUBSCRIPTION regress_mysub1 SET (failover = false)");
|
|
ok( $stderr =~ /ERROR: cannot set failover for enabled subscription/,
|
|
"altering failover is not allowed for enabled subscription");
|
|
|
|
##################################################
|
|
# Test that pg_sync_replication_slots() cannot be executed on a non-standby server.
|
|
##################################################
|
|
|
|
($result, $stdout, $stderr) =
|
|
$publisher->psql('postgres', "SELECT pg_sync_replication_slots();");
|
|
ok( $stderr =~
|
|
/ERROR: replication slots can only be synchronized to a standby server/,
|
|
"cannot sync slots on a non-standby server");
|
|
|
|
##################################################
|
|
# Test logical failover slots corresponding to different plugins can be
|
|
# synced to the standby.
|
|
#
|
|
# Configure standby1 to replicate and synchronize logical slots configured
|
|
# for failover on the primary
|
|
#
|
|
# failover slot lsub1_slot | output_plugin: pgoutput
|
|
# failover slot lsub2_slot | output_plugin: test_decoding
|
|
# primary ---> |
|
|
# physical slot sb1_slot --->| ----> standby1 (connected via streaming replication)
|
|
# | lsub1_slot, lsub2_slot (synced_slot)
|
|
##################################################
|
|
|
|
my $primary = $publisher;
|
|
my $backup_name = 'backup';
|
|
$primary->backup($backup_name);
|
|
|
|
# Create a standby
|
|
my $standby1 = PostgreSQL::Test::Cluster->new('standby1');
|
|
$standby1->init_from_backup(
|
|
$primary, $backup_name,
|
|
has_streaming => 1,
|
|
has_restoring => 1);
|
|
|
|
# Increase the log_min_messages setting to DEBUG2 on both the standby and
|
|
# primary to debug test failures, if any.
|
|
my $connstr_1 = $primary->connstr;
|
|
$standby1->append_conf(
|
|
'postgresql.conf', qq(
|
|
hot_standby_feedback = on
|
|
primary_slot_name = 'sb1_slot'
|
|
primary_conninfo = '$connstr_1 dbname=postgres'
|
|
log_min_messages = 'debug2'
|
|
));
|
|
|
|
$primary->append_conf('postgresql.conf', "log_min_messages = 'debug2'");
|
|
$primary->reload;
|
|
|
|
# Drop the subscription to prevent further advancement of the restart_lsn for
|
|
# the lsub1_slot.
|
|
$subscriber1->safe_psql('postgres', "DROP SUBSCRIPTION regress_mysub1;");
|
|
|
|
# To ensure that restart_lsn has moved to a recent WAL position, we re-create
|
|
# the lsub1_slot.
|
|
$primary->psql('postgres',
|
|
q{SELECT pg_create_logical_replication_slot('lsub1_slot', 'pgoutput', false, false, true);}
|
|
);
|
|
|
|
$primary->psql('postgres',
|
|
q{SELECT pg_create_logical_replication_slot('lsub2_slot', 'test_decoding', false, false, true);}
|
|
);
|
|
|
|
$primary->psql('postgres',
|
|
q{SELECT pg_create_physical_replication_slot('sb1_slot');});
|
|
|
|
# Start the standby so that slot syncing can begin
|
|
$standby1->start;
|
|
|
|
# Capture the inactive_since of the slot from the primary. Note that the slot
|
|
# will be inactive since the corresponding subscription was dropped.
|
|
my $inactive_since_on_primary =
|
|
$primary->validate_slot_inactive_since('lsub1_slot', $slot_creation_time_on_primary);
|
|
|
|
# Wait for the standby to catch up so that the standby is not lagging behind
|
|
# the failover slots.
|
|
$primary->wait_for_replay_catchup($standby1);
|
|
|
|
# Synchronize the primary server slots to the standby.
|
|
$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
|
|
|
|
# Confirm that the logical failover slots are created on the standby and are
|
|
# flagged as 'synced'
|
|
is( $standby1->safe_psql(
|
|
'postgres',
|
|
q{SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN ('lsub1_slot', 'lsub2_slot') AND synced AND NOT temporary;}
|
|
),
|
|
"t",
|
|
'logical slots have synced as true on standby');
|
|
|
|
# Capture the inactive_since of the synced slot on the standby
|
|
my $inactive_since_on_standby =
|
|
$standby1->validate_slot_inactive_since('lsub1_slot', $slot_creation_time_on_primary);
|
|
|
|
# Synced slot on the standby must get its own inactive_since
|
|
is( $standby1->safe_psql(
|
|
'postgres',
|
|
"SELECT '$inactive_since_on_primary'::timestamptz < '$inactive_since_on_standby'::timestamptz;"
|
|
),
|
|
"t",
|
|
'synchronized slot has got its own inactive_since');
|
|
|
|
##################################################
|
|
# Test that the synchronized slot will be dropped if the corresponding remote
|
|
# slot on the primary server has been dropped.
|
|
##################################################
|
|
|
|
$primary->psql('postgres', "SELECT pg_drop_replication_slot('lsub2_slot');");
|
|
|
|
$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
|
|
|
|
is( $standby1->safe_psql(
|
|
'postgres',
|
|
q{SELECT count(*) = 0 FROM pg_replication_slots WHERE slot_name = 'lsub2_slot';}
|
|
),
|
|
"t",
|
|
'synchronized slot has been dropped');
|
|
|
|
##################################################
|
|
# Test that if the synchronized slot is invalidated while the remote slot is
|
|
# still valid, the slot will be dropped and re-created on the standby by
|
|
# executing pg_sync_replication_slots() again.
|
|
##################################################
|
|
|
|
# Configure the max_slot_wal_keep_size so that the synced slot can be
|
|
# invalidated due to wal removal.
|
|
$standby1->append_conf('postgresql.conf', 'max_slot_wal_keep_size = 64kB');
|
|
$standby1->reload;
|
|
|
|
# Generate some activity and switch WAL file on the primary
|
|
$primary->advance_wal(1);
|
|
$primary->psql('postgres', "CHECKPOINT");
|
|
$primary->wait_for_replay_catchup($standby1);
|
|
|
|
# Request a checkpoint on the standby to trigger the WAL file(s) removal
|
|
$standby1->safe_psql('postgres', "CHECKPOINT");
|
|
|
|
# Check if the synced slot is invalidated
|
|
is( $standby1->safe_psql(
|
|
'postgres',
|
|
q{SELECT invalidation_reason = 'wal_removed' FROM pg_replication_slots WHERE slot_name = 'lsub1_slot';}
|
|
),
|
|
"t",
|
|
'synchronized slot has been invalidated');
|
|
|
|
# Reset max_slot_wal_keep_size to avoid further wal removal
|
|
$standby1->append_conf('postgresql.conf', 'max_slot_wal_keep_size = -1');
|
|
$standby1->reload;
|
|
|
|
# Capture the time before the logical failover slot is created on the primary.
|
|
$slot_creation_time_on_primary = $publisher->safe_psql(
|
|
'postgres', qq[
|
|
SELECT current_timestamp;
|
|
]);
|
|
|
|
# To ensure that restart_lsn has moved to a recent WAL position, we re-create
|
|
# the lsub1_slot.
|
|
$primary->safe_psql(
|
|
'postgres', qq[
|
|
SELECT pg_drop_replication_slot('lsub1_slot');
|
|
SELECT pg_create_logical_replication_slot('lsub1_slot', 'pgoutput', false, false, true);
|
|
]);
|
|
|
|
# Capture the inactive_since of the slot from the primary. Note that the slot
|
|
# will be inactive since the corresponding subscription was dropped.
|
|
$inactive_since_on_primary =
|
|
$primary->validate_slot_inactive_since('lsub1_slot', $slot_creation_time_on_primary);
|
|
|
|
# Wait for the standby to catch up so that the standby is not lagging behind
|
|
# the failover slots.
|
|
$primary->wait_for_replay_catchup($standby1);
|
|
|
|
my $log_offset = -s $standby1->logfile;
|
|
|
|
# Synchronize the primary server slots to the standby.
|
|
$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
|
|
|
|
# Confirm that the invalidated slot has been dropped.
|
|
$standby1->wait_for_log(qr/dropped replication slot "lsub1_slot" of dbid [0-9]+/,
|
|
$log_offset);
|
|
|
|
# Confirm that the logical slot has been re-created on the standby and is
|
|
# flagged as 'synced'
|
|
is( $standby1->safe_psql(
|
|
'postgres',
|
|
q{SELECT invalidation_reason IS NULL AND synced AND NOT temporary FROM pg_replication_slots WHERE slot_name = 'lsub1_slot';}
|
|
),
|
|
"t",
|
|
'logical slot is re-synced');
|
|
|
|
# Reset the log_min_messages to the default value.
|
|
$primary->append_conf('postgresql.conf', "log_min_messages = 'warning'");
|
|
$primary->reload;
|
|
|
|
$standby1->append_conf('postgresql.conf', "log_min_messages = 'warning'");
|
|
$standby1->reload;
|
|
|
|
##################################################
|
|
# Test that a synchronized slot can not be decoded, altered or dropped by the
|
|
# user
|
|
##################################################
|
|
|
|
# Attempting to perform logical decoding on a synced slot should result in an error
|
|
($result, $stdout, $stderr) = $standby1->psql('postgres',
|
|
"select * from pg_logical_slot_get_changes('lsub1_slot', NULL, NULL);");
|
|
ok( $stderr =~
|
|
/ERROR: cannot use replication slot "lsub1_slot" for logical decoding/,
|
|
"logical decoding is not allowed on synced slot");
|
|
|
|
# Attempting to alter a synced slot should result in an error
|
|
($result, $stdout, $stderr) = $standby1->psql(
|
|
'postgres',
|
|
qq[ALTER_REPLICATION_SLOT lsub1_slot (failover);],
|
|
replication => 'database');
|
|
ok($stderr =~ /ERROR: cannot alter replication slot "lsub1_slot"/,
|
|
"synced slot on standby cannot be altered");
|
|
|
|
# Attempting to drop a synced slot should result in an error
|
|
($result, $stdout, $stderr) = $standby1->psql('postgres',
|
|
"SELECT pg_drop_replication_slot('lsub1_slot');");
|
|
ok($stderr =~ /ERROR: cannot drop replication slot "lsub1_slot"/,
|
|
"synced slot on standby cannot be dropped");
|
|
|
|
##################################################
|
|
# Test that we cannot synchronize slots if dbname is not specified in the
|
|
# primary_conninfo.
|
|
##################################################
|
|
|
|
$standby1->append_conf('postgresql.conf', "primary_conninfo = '$connstr_1'");
|
|
$standby1->reload;
|
|
|
|
($result, $stdout, $stderr) =
|
|
$standby1->psql('postgres', "SELECT pg_sync_replication_slots();");
|
|
ok( $stderr =~
|
|
/ERROR: slot synchronization requires dbname to be specified in primary_conninfo/,
|
|
"cannot sync slots if dbname is not specified in primary_conninfo");
|
|
|
|
# Add the dbname back to the primary_conninfo for further tests
|
|
$standby1->append_conf('postgresql.conf', "primary_conninfo = '$connstr_1 dbname=postgres'");
|
|
$standby1->reload;
|
|
|
|
##################################################
|
|
# Test that we cannot synchronize slots to a cascading standby server.
|
|
##################################################
|
|
|
|
# Create a cascading standby
|
|
$backup_name = 'backup2';
|
|
$standby1->backup($backup_name);
|
|
|
|
my $cascading_standby = PostgreSQL::Test::Cluster->new('cascading_standby');
|
|
$cascading_standby->init_from_backup(
|
|
$standby1, $backup_name,
|
|
has_streaming => 1,
|
|
has_restoring => 1);
|
|
|
|
my $cascading_connstr = $standby1->connstr;
|
|
$cascading_standby->append_conf(
|
|
'postgresql.conf', qq(
|
|
hot_standby_feedback = on
|
|
primary_slot_name = 'cascading_sb_slot'
|
|
primary_conninfo = '$cascading_connstr dbname=postgres'
|
|
));
|
|
|
|
$standby1->psql('postgres',
|
|
q{SELECT pg_create_physical_replication_slot('cascading_sb_slot');});
|
|
|
|
$cascading_standby->start;
|
|
|
|
($result, $stdout, $stderr) =
|
|
$cascading_standby->psql('postgres', "SELECT pg_sync_replication_slots();");
|
|
ok( $stderr =~
|
|
/ERROR: cannot synchronize replication slots from a standby server/,
|
|
"cannot sync slots to a cascading standby server");
|
|
|
|
$cascading_standby->stop;
|
|
|
|
##################################################
|
|
# Create a failover slot and advance the restart_lsn to a position where a
|
|
# running transaction exists. This setup is for testing that the synced slots
|
|
# can achieve the consistent snapshot state starting from the restart_lsn
|
|
# after promotion without losing any data that otherwise would have been
|
|
# received from the primary.
|
|
##################################################
|
|
|
|
$primary->safe_psql('postgres',
|
|
"SELECT pg_create_logical_replication_slot('snap_test_slot', 'test_decoding', false, false, true);"
|
|
);
|
|
|
|
# Wait for the standby to catch up so that the standby is not lagging behind
|
|
# the failover slots.
|
|
$primary->wait_for_replay_catchup($standby1);
|
|
|
|
$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
|
|
|
|
# Two xl_running_xacts logs are generated here. When decoding the first log, it
|
|
# only serializes the snapshot, without advancing the restart_lsn to the latest
|
|
# position. This is because if a transaction is running, the restart_lsn can
|
|
# only move to a position before that transaction. Hence, the second
|
|
# xl_running_xacts log is needed, the decoding for which allows the restart_lsn
|
|
# to advance to the last serialized snapshot's position (the first log).
|
|
$primary->safe_psql(
|
|
'postgres', qq(
|
|
BEGIN;
|
|
SELECT txid_current();
|
|
SELECT pg_log_standby_snapshot();
|
|
COMMIT;
|
|
BEGIN;
|
|
SELECT txid_current();
|
|
SELECT pg_log_standby_snapshot();
|
|
COMMIT;
|
|
));
|
|
|
|
# Advance the restart_lsn to the position of the first xl_running_xacts log
|
|
# generated above. Note that there might be concurrent xl_running_xacts logs
|
|
# written by the bgwriter, which could cause the position to be advanced to an
|
|
# unexpected point, but that would be a rare scenario and doesn't affect the
|
|
# test results.
|
|
$primary->safe_psql('postgres',
|
|
"SELECT pg_replication_slot_advance('snap_test_slot', pg_current_wal_lsn());"
|
|
);
|
|
|
|
# Wait for the standby to catch up so that the standby is not lagging behind
|
|
# the failover slots.
|
|
$primary->wait_for_replay_catchup($standby1);
|
|
|
|
# Log a message that will be consumed on the standby after promotion using the
|
|
# synced slot. See the test where we promote standby (Promote the standby1 to
|
|
# primary.)
|
|
$primary->safe_psql('postgres',
|
|
"SELECT pg_logical_emit_message(false, 'test', 'test');"
|
|
);
|
|
|
|
# Get the confirmed_flush_lsn for the logical slot snap_test_slot on the primary
|
|
my $confirmed_flush_lsn = $primary->safe_psql('postgres',
|
|
"SELECT confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'snap_test_slot';");
|
|
|
|
$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
|
|
|
|
# Verify that confirmed_flush_lsn of snap_test_slot slot is synced to the standby
|
|
ok( $standby1->poll_query_until(
|
|
'postgres',
|
|
"SELECT '$confirmed_flush_lsn' = confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'snap_test_slot' AND synced AND NOT temporary;"),
|
|
'confirmed_flush_lsn of slot snap_test_slot synced to standby');
|
|
|
|
##################################################
|
|
# Test to confirm that the slot synchronization is protected from malicious
|
|
# users.
|
|
##################################################
|
|
|
|
$primary->psql('postgres', "CREATE DATABASE slotsync_test_db");
|
|
$primary->wait_for_replay_catchup($standby1);
|
|
|
|
$standby1->stop;
|
|
|
|
# On the primary server, create '=' operator in another schema mapped to
|
|
# inequality function and redirect the queries to use new operator by setting
|
|
# search_path. The new '=' operator is created with leftarg as 'bigint' and
|
|
# right arg as 'int' to redirect 'count(*) = 1' in slot sync's query to use
|
|
# new '=' operator.
|
|
$primary->safe_psql(
|
|
'slotsync_test_db', q{
|
|
|
|
CREATE ROLE repl_role REPLICATION LOGIN;
|
|
CREATE SCHEMA myschema;
|
|
|
|
CREATE FUNCTION myschema.myintne(bigint, int) RETURNS bool as $$
|
|
BEGIN
|
|
RETURN $1 <> $2;
|
|
END;
|
|
$$ LANGUAGE plpgsql immutable;
|
|
|
|
CREATE OPERATOR myschema.= (
|
|
leftarg = bigint,
|
|
rightarg = int,
|
|
procedure = myschema.myintne);
|
|
|
|
ALTER DATABASE slotsync_test_db SET SEARCH_PATH TO myschema,pg_catalog;
|
|
GRANT USAGE on SCHEMA myschema TO repl_role;
|
|
});
|
|
|
|
# Start the standby with changed primary_conninfo.
|
|
$standby1->append_conf('postgresql.conf', "primary_conninfo = '$connstr_1 dbname=slotsync_test_db user=repl_role'");
|
|
$standby1->start;
|
|
|
|
# Run the synchronization function. If the sync flow was not prepared
|
|
# to handle such attacks, it would have failed during the validation
|
|
# of the primary_slot_name itself resulting in
|
|
# ERROR: slot synchronization requires valid primary_slot_name
|
|
$standby1->safe_psql('slotsync_test_db', "SELECT pg_sync_replication_slots();");
|
|
|
|
# Reset the dbname and user in primary_conninfo to the earlier values.
|
|
$standby1->append_conf('postgresql.conf', "primary_conninfo = '$connstr_1 dbname=postgres'");
|
|
$standby1->reload;
|
|
|
|
# Drop the newly created database.
|
|
$primary->psql('postgres',
|
|
q{DROP DATABASE slotsync_test_db;});
|
|
|
|
##################################################
|
|
# Test to confirm that the slot sync worker exits on invalid GUC(s) and
|
|
# get started again on valid GUC(s).
|
|
##################################################
|
|
|
|
$log_offset = -s $standby1->logfile;
|
|
|
|
# Enable slot sync worker.
|
|
$standby1->append_conf('postgresql.conf', qq(sync_replication_slots = on));
|
|
$standby1->reload;
|
|
|
|
# Confirm that the slot sync worker is able to start.
|
|
$standby1->wait_for_log(qr/slot sync worker started/,
|
|
$log_offset);
|
|
|
|
$log_offset = -s $standby1->logfile;
|
|
|
|
# Disable another GUC required for slot sync.
|
|
$standby1->append_conf( 'postgresql.conf', qq(hot_standby_feedback = off));
|
|
$standby1->reload;
|
|
|
|
# Confirm that slot sync worker acknowledge the GUC change and logs the msg
|
|
# about wrong configuration.
|
|
$standby1->wait_for_log(qr/slot sync worker will restart because of a parameter change/,
|
|
$log_offset);
|
|
$standby1->wait_for_log(qr/slot synchronization requires hot_standby_feedback to be enabled/,
|
|
$log_offset);
|
|
|
|
$log_offset = -s $standby1->logfile;
|
|
|
|
# Re-enable the required GUC
|
|
$standby1->append_conf('postgresql.conf', "hot_standby_feedback = on");
|
|
$standby1->reload;
|
|
|
|
# Confirm that the slot sync worker is able to start now.
|
|
$standby1->wait_for_log(qr/slot sync worker started/,
|
|
$log_offset);
|
|
|
|
##################################################
|
|
# Test to confirm that confirmed_flush_lsn of the logical slot on the primary
|
|
# is synced to the standby via the slot sync worker.
|
|
##################################################
|
|
|
|
# Insert data on the primary
|
|
$primary->safe_psql(
|
|
'postgres', qq[
|
|
CREATE TABLE tab_int (a int PRIMARY KEY);
|
|
INSERT INTO tab_int SELECT generate_series(1, 10);
|
|
]);
|
|
|
|
# Subscribe to the new table data and wait for it to arrive
|
|
$subscriber1->safe_psql(
|
|
'postgres', qq[
|
|
CREATE TABLE tab_int (a int PRIMARY KEY);
|
|
CREATE SUBSCRIPTION regress_mysub1 CONNECTION '$publisher_connstr' PUBLICATION regress_mypub WITH (slot_name = lsub1_slot, failover = true, create_slot = false);
|
|
]);
|
|
|
|
$subscriber1->wait_for_subscription_sync;
|
|
|
|
# Do not allow any further advancement of the confirmed_flush_lsn for the
|
|
# lsub1_slot.
|
|
$subscriber1->safe_psql('postgres', "ALTER SUBSCRIPTION regress_mysub1 DISABLE");
|
|
|
|
# Wait for the replication slot to become inactive on the publisher
|
|
$primary->poll_query_until(
|
|
'postgres',
|
|
"SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE slot_name = 'lsub1_slot' AND active='f'",
|
|
1);
|
|
|
|
# Get the confirmed_flush_lsn for the logical slot lsub1_slot on the primary
|
|
my $primary_flush_lsn = $primary->safe_psql('postgres',
|
|
"SELECT confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'lsub1_slot';");
|
|
|
|
# Confirm that confirmed_flush_lsn of lsub1_slot slot is synced to the standby
|
|
ok( $standby1->poll_query_until(
|
|
'postgres',
|
|
"SELECT '$primary_flush_lsn' = confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'lsub1_slot' AND synced AND NOT temporary;"),
|
|
'confirmed_flush_lsn of slot lsub1_slot synced to standby');
|
|
|
|
##################################################
|
|
# Test that logical failover replication slots wait for the specified
|
|
# physical replication slots to receive the changes first. It uses the
|
|
# following set up:
|
|
#
|
|
# (physical standbys)
|
|
# | ----> standby1 (primary_slot_name = sb1_slot)
|
|
# | ----> standby2 (primary_slot_name = sb2_slot)
|
|
# primary ----- |
|
|
# (logical replication)
|
|
# | ----> subscriber1 (failover = true, slot_name = lsub1_slot)
|
|
# | ----> subscriber2 (failover = false, slot_name = lsub2_slot)
|
|
#
|
|
# standby_slot_names = 'sb1_slot'
|
|
#
|
|
# The setup is configured in such a way that the logical slot of subscriber1 is
|
|
# enabled for failover, and thus the subscriber1 will wait for the physical
|
|
# slot of standby1(sb1_slot) to catch up before receiving the decoded changes.
|
|
##################################################
|
|
|
|
$backup_name = 'backup3';
|
|
|
|
$primary->psql('postgres',
|
|
q{SELECT pg_create_physical_replication_slot('sb2_slot');});
|
|
|
|
$primary->backup($backup_name);
|
|
|
|
# Create another standby
|
|
my $standby2 = PostgreSQL::Test::Cluster->new('standby2');
|
|
$standby2->init_from_backup(
|
|
$primary, $backup_name,
|
|
has_streaming => 1,
|
|
has_restoring => 1);
|
|
$standby2->append_conf(
|
|
'postgresql.conf', qq(
|
|
primary_slot_name = 'sb2_slot'
|
|
));
|
|
$standby2->start;
|
|
$primary->wait_for_replay_catchup($standby2);
|
|
|
|
# Configure primary to disallow any logical slots that have enabled failover
|
|
# from getting ahead of the specified physical replication slot (sb1_slot).
|
|
$primary->append_conf(
|
|
'postgresql.conf', qq(
|
|
standby_slot_names = 'sb1_slot'
|
|
));
|
|
$primary->reload;
|
|
|
|
# Create another subscriber node without enabling failover, wait for sync to
|
|
# complete
|
|
my $subscriber2 = PostgreSQL::Test::Cluster->new('subscriber2');
|
|
$subscriber2->init;
|
|
$subscriber2->start;
|
|
$subscriber2->safe_psql(
|
|
'postgres', qq[
|
|
CREATE TABLE tab_int (a int PRIMARY KEY);
|
|
CREATE SUBSCRIPTION regress_mysub2 CONNECTION '$publisher_connstr' PUBLICATION regress_mypub WITH (slot_name = lsub2_slot);
|
|
]);
|
|
|
|
$subscriber2->wait_for_subscription_sync;
|
|
|
|
$subscriber1->safe_psql('postgres', "ALTER SUBSCRIPTION regress_mysub1 ENABLE");
|
|
|
|
my $offset = -s $primary->logfile;
|
|
|
|
# Stop the standby associated with the specified physical replication slot
|
|
# (sb1_slot) so that the logical replication slot (lsub1_slot) won't receive
|
|
# changes until the standby comes up.
|
|
$standby1->stop;
|
|
|
|
# Create some data on the primary
|
|
my $primary_row_count = 20;
|
|
$primary->safe_psql('postgres',
|
|
"INSERT INTO tab_int SELECT generate_series(11, $primary_row_count);");
|
|
|
|
# Wait until the standby2 that's still running gets the data from the primary
|
|
$primary->wait_for_replay_catchup($standby2);
|
|
$result = $standby2->safe_psql('postgres',
|
|
"SELECT count(*) = $primary_row_count FROM tab_int;");
|
|
is($result, 't', "standby2 gets data from primary");
|
|
|
|
# Wait for regress_mysub2 to get the data from the primary. This subscription
|
|
# was not enabled for failover so it gets the data without waiting for any
|
|
# standbys.
|
|
$primary->wait_for_catchup('regress_mysub2');
|
|
$result = $subscriber2->safe_psql('postgres',
|
|
"SELECT count(*) = $primary_row_count FROM tab_int;");
|
|
is($result, 't', "subscriber2 gets data from primary");
|
|
|
|
# Wait until the primary server logs a warning indicating that it is waiting
|
|
# for the sb1_slot to catch up.
|
|
$primary->wait_for_log(
|
|
qr/replication slot \"sb1_slot\" specified in parameter standby_slot_names does not have active_pid/,
|
|
$offset);
|
|
|
|
# The regress_mysub1 was enabled for failover so it doesn't get the data from
|
|
# primary and keeps waiting for the standby specified in standby_slot_names
|
|
# (sb1_slot aka standby1).
|
|
$result =
|
|
$subscriber1->safe_psql('postgres', "SELECT count(*) <> $primary_row_count FROM tab_int;");
|
|
is($result, 't',
|
|
"subscriber1 doesn't get data from primary until standby1 acknowledges changes"
|
|
);
|
|
|
|
# Start the standby specified in standby_slot_names (sb1_slot aka standby1) and
|
|
# wait for it to catch up with the primary.
|
|
$standby1->start;
|
|
$primary->wait_for_replay_catchup($standby1);
|
|
$result = $standby1->safe_psql('postgres',
|
|
"SELECT count(*) = $primary_row_count FROM tab_int;");
|
|
is($result, 't', "standby1 gets data from primary");
|
|
|
|
# Now that the standby specified in standby_slot_names is up and running, the
|
|
# primary can send the decoded changes to the subscription enabled for failover
|
|
# (i.e. regress_mysub1). While the standby was down, regress_mysub1 didn't
|
|
# receive any data from the primary. i.e. the primary didn't allow it to go
|
|
# ahead of standby.
|
|
$primary->wait_for_catchup('regress_mysub1');
|
|
$result = $subscriber1->safe_psql('postgres',
|
|
"SELECT count(*) = $primary_row_count FROM tab_int;");
|
|
is($result, 't',
|
|
"subscriber1 gets data from primary after standby1 acknowledges changes");
|
|
|
|
##################################################
|
|
# Verify that when using pg_logical_slot_get_changes to consume changes from a
|
|
# logical failover slot, it will also wait for the slots specified in
|
|
# standby_slot_names to catch up.
|
|
##################################################
|
|
|
|
# Stop the standby associated with the specified physical replication slot so
|
|
# that the logical replication slot won't receive changes until the standby
|
|
# slot's restart_lsn is advanced or the slot is removed from the
|
|
# standby_slot_names list.
|
|
$primary->safe_psql('postgres', "TRUNCATE tab_int;");
|
|
$primary->wait_for_catchup('regress_mysub1');
|
|
$standby1->stop;
|
|
|
|
# Disable the regress_mysub1 to prevent the logical walsender from generating
|
|
# more warnings.
|
|
$subscriber1->safe_psql('postgres', "ALTER SUBSCRIPTION regress_mysub1 DISABLE");
|
|
|
|
# Wait for the replication slot to become inactive on the publisher
|
|
$primary->poll_query_until(
|
|
'postgres',
|
|
"SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE slot_name = 'lsub1_slot' AND active = 'f'",
|
|
1);
|
|
|
|
# Create a logical 'test_decoding' replication slot with failover enabled
|
|
$primary->safe_psql('postgres',
|
|
"SELECT pg_create_logical_replication_slot('test_slot', 'test_decoding', false, false, true);"
|
|
);
|
|
|
|
my $back_q = $primary->background_psql(
|
|
'postgres',
|
|
on_error_stop => 0,
|
|
timeout => $PostgreSQL::Test::Utils::timeout_default);
|
|
|
|
# pg_logical_slot_get_changes will be blocked until the standby catches up,
|
|
# hence it needs to be executed in a background session.
|
|
$offset = -s $primary->logfile;
|
|
$back_q->query_until(
|
|
qr/logical_slot_get_changes/, q(
|
|
\echo logical_slot_get_changes
|
|
SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);
|
|
));
|
|
|
|
# Wait until the primary server logs a warning indicating that it is waiting
|
|
# for the sb1_slot to catch up.
|
|
$primary->wait_for_log(
|
|
qr/replication slot \"sb1_slot\" specified in parameter standby_slot_names does not have active_pid/,
|
|
$offset);
|
|
|
|
# Remove the standby from the standby_slot_names list and reload the
|
|
# configuration.
|
|
$primary->adjust_conf('postgresql.conf', 'standby_slot_names', "''");
|
|
$primary->reload;
|
|
|
|
# Since there are no slots in standby_slot_names, the function
|
|
# pg_logical_slot_get_changes should now return, and the session can be
|
|
# stopped.
|
|
$back_q->quit;
|
|
|
|
$primary->safe_psql('postgres',
|
|
"SELECT pg_drop_replication_slot('test_slot');"
|
|
);
|
|
|
|
# Add the physical slot (sb1_slot) back to the standby_slot_names for further
|
|
# tests.
|
|
$primary->adjust_conf('postgresql.conf', 'standby_slot_names', "'sb1_slot'");
|
|
$primary->reload;
|
|
|
|
# Enable the regress_mysub1 for further tests
|
|
$subscriber1->safe_psql('postgres', "ALTER SUBSCRIPTION regress_mysub1 ENABLE");
|
|
|
|
##################################################
|
|
# Test that logical replication will wait for the user-created inactive
|
|
# physical slot to catch up until we remove the slot from standby_slot_names.
|
|
##################################################
|
|
|
|
$offset = -s $primary->logfile;
|
|
|
|
# Create some data on the primary
|
|
$primary_row_count = 10;
|
|
$primary->safe_psql('postgres',
|
|
"INSERT INTO tab_int SELECT generate_series(1, $primary_row_count);");
|
|
|
|
# Wait until the primary server logs a warning indicating that it is waiting
|
|
# for the sb1_slot to catch up.
|
|
$primary->wait_for_log(
|
|
qr/replication slot \"sb1_slot\" specified in parameter standby_slot_names does not have active_pid/,
|
|
$offset);
|
|
|
|
# The regress_mysub1 doesn't get the data from primary because the specified
|
|
# standby slot (sb1_slot) in standby_slot_names is inactive.
|
|
$result =
|
|
$subscriber1->safe_psql('postgres', "SELECT count(*) = 0 FROM tab_int;");
|
|
is($result, 't',
|
|
"subscriber1 doesn't get data as the sb1_slot doesn't catch up");
|
|
|
|
# Remove the standby from the standby_slot_names list and reload the
|
|
# configuration.
|
|
$primary->adjust_conf('postgresql.conf', 'standby_slot_names', "''");
|
|
$primary->reload;
|
|
|
|
# Since there are no slots in standby_slot_names, the primary server should now
|
|
# send the decoded changes to the subscription.
|
|
$primary->wait_for_catchup('regress_mysub1');
|
|
$result = $subscriber1->safe_psql('postgres',
|
|
"SELECT count(*) = $primary_row_count FROM tab_int;");
|
|
is($result, 't',
|
|
"subscriber1 gets data from primary after standby1 is removed from the standby_slot_names list"
|
|
);
|
|
|
|
# Add the physical slot (sb1_slot) back to the standby_slot_names for further
|
|
# tests.
|
|
$primary->adjust_conf('postgresql.conf', 'standby_slot_names', "'sb1_slot'");
|
|
$primary->reload;
|
|
|
|
##################################################
|
|
# Promote the standby1 to primary. Confirm that:
|
|
# a) the slot 'lsub1_slot' and 'snap_test_slot' are retained on the new primary
|
|
# b) logical replication for regress_mysub1 is resumed successfully after failover
|
|
# c) changes can be consumed from the synced slot 'snap_test_slot'
|
|
##################################################
|
|
$standby1->start;
|
|
$primary->wait_for_replay_catchup($standby1);
|
|
|
|
# Capture the time before the standby is promoted
|
|
my $promotion_time_on_primary = $standby1->safe_psql(
|
|
'postgres', qq[
|
|
SELECT current_timestamp;
|
|
]);
|
|
|
|
$standby1->promote;
|
|
|
|
# Capture the inactive_since of the synced slot after the promotion.
|
|
# The expectation here is that the slot gets its inactive_since as part of the
|
|
# promotion. We do this check before the slot is enabled on the new primary
|
|
# below, otherwise, the slot gets active setting inactive_since to NULL.
|
|
my $inactive_since_on_new_primary =
|
|
$standby1->validate_slot_inactive_since('lsub1_slot', $promotion_time_on_primary);
|
|
|
|
is( $standby1->safe_psql(
|
|
'postgres',
|
|
"SELECT '$inactive_since_on_new_primary'::timestamptz > '$inactive_since_on_primary'::timestamptz"
|
|
),
|
|
"t",
|
|
'synchronized slot has got its own inactive_since on the new primary after promotion');
|
|
|
|
# Update subscription with the new primary's connection info
|
|
my $standby1_conninfo = $standby1->connstr . ' dbname=postgres';
|
|
$subscriber1->safe_psql('postgres',
|
|
"ALTER SUBSCRIPTION regress_mysub1 CONNECTION '$standby1_conninfo';");
|
|
|
|
# Confirm the synced slot 'lsub1_slot' is retained on the new primary
|
|
is($standby1->safe_psql('postgres',
|
|
q{SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN ('lsub1_slot', 'snap_test_slot') AND synced AND NOT temporary;}),
|
|
't',
|
|
'synced slot retained on the new primary');
|
|
|
|
# Insert data on the new primary
|
|
$standby1->safe_psql('postgres',
|
|
"INSERT INTO tab_int SELECT generate_series(11, 20);");
|
|
$standby1->wait_for_catchup('regress_mysub1');
|
|
|
|
# Confirm that data in tab_int replicated on the subscriber
|
|
is( $subscriber1->safe_psql('postgres', q{SELECT count(*) FROM tab_int;}),
|
|
"20",
|
|
'data replicated from the new primary');
|
|
|
|
# Consume the data from the snap_test_slot. The synced slot should reach a
|
|
# consistent point by restoring the snapshot at the restart_lsn serialized
|
|
# during slot synchronization.
|
|
$result = $standby1->safe_psql('postgres',
|
|
"SELECT count(*) FROM pg_logical_slot_get_changes('snap_test_slot', NULL, NULL) WHERE data ~ 'message*';"
|
|
);
|
|
|
|
is($result, '1', "data can be consumed using snap_test_slot");
|
|
|
|
done_testing();
|