From 374464c3e96fe3d55dc82a8417b034de14222a0b Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 30 Jan 2020 11:15:35 +0900 Subject: [PATCH] Fix slot data persistency when advancing physical replication slots Advancing a physical replication slot with pg_replication_slot_advance() did not mark the slot as dirty if any advancing was done, preventing the follow-up checkpoint to flush the slot data to disk. This caused the advancing to be lost even on clean restarts. This does not happen for logical slots as any advancing marked the slot as dirty. Per discussion, the original feature has been implemented so as in the event of a crash the slot may move backwards to a past LSN. This property is kept and more documentation is added about that. This commit adds some new TAP tests to check the persistency of physical and logical slots after advancing across clean restarts. Author: Alexey Kondratov, Michael Paquier Reviewed-by: Andres Freund, Kyotaro Horiguchi, Craig Ringer Discussion: https://postgr.es/m/059cc53a-8b14-653a-a24d-5f867503b0ee@postgrespro.ru Backpatch-through: 11 --- doc/src/sgml/func.sgml | 7 ++++-- src/backend/replication/slotfuncs.c | 23 +++++++++--------- src/test/recovery/t/001_stream_rep.pl | 27 ++++++++++++++++++++- src/test/recovery/t/006_logical_decoding.pl | 26 +++++++++++++++++++- 4 files changed, 67 insertions(+), 16 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index a9d0e989dc..3fd67e606f 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -19437,8 +19437,11 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); Advances the current confirmed position of a replication slot named slot_name. The slot will not be moved backwards, - and it will not be moved beyond the current insert location. Returns - name of the slot and real position to which it was advanced to. + and it will not be moved beyond the current insert location. Returns + the name of the slot and the real position to which it was advanced to. + The information of the updated slot is written out at the follow-up + checkpoint if any advancing is done. In the event of a crash, the + slot may return to an earlier position. diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 9bfeca2055..ffcce853c3 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -335,6 +335,14 @@ pg_physical_replication_slot_advance(XLogRecPtr moveto) MyReplicationSlot->data.restart_lsn = moveto; SpinLockRelease(&MyReplicationSlot->mutex); retlsn = moveto; + + /* + * Dirty the slot so as it is written out at the next checkpoint. + * Note that the LSN position advanced may still be lost in the + * event of a crash, but this makes the data consistent after a + * clean shutdown. + */ + ReplicationSlotMarkDirty(); } return retlsn; @@ -439,9 +447,9 @@ pg_logical_replication_slot_advance(XLogRecPtr moveto) * keep track of their progress, so we should make more of an * effort to save it for them. * - * Dirty the slot so it's written out at the next checkpoint. - * We'll still lose its position on crash, as documented, but it's - * better than always losing the position even on clean restart. + * Dirty the slot so it is written out at the next checkpoint. + * The LSN position advanced to may still be lost on a crash + * but this makes the data consistent after a clean shutdown. */ ReplicationSlotMarkDirty(); } @@ -538,15 +546,6 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS) values[0] = NameGetDatum(&MyReplicationSlot->data.name); nulls[0] = false; - /* Update the on disk state when lsn was updated. */ - if (XLogRecPtrIsInvalid(endlsn)) - { - ReplicationSlotMarkDirty(); - ReplicationSlotsComputeRequiredXmin(false); - ReplicationSlotsComputeRequiredLSN(); - ReplicationSlotSave(); - } - ReplicationSlotRelease(); /* Return the reached position. */ diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl index d2f11c36ba..acc349dec6 100644 --- a/src/test/recovery/t/001_stream_rep.pl +++ b/src/test/recovery/t/001_stream_rep.pl @@ -3,7 +3,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 32; +use Test::More tests => 34; # Initialize master node my $node_master = get_new_node('master'); @@ -334,3 +334,28 @@ is($catalog_xmin, '', is($xmin, '', 'xmin of cascaded slot null with hs feedback reset'); is($catalog_xmin, '', 'catalog xmin of cascaded slot still null with hs_feedback reset'); + +# Test physical slot advancing and its durability. Create a new slot on +# the primary, not used by any of the standbys. This reserves WAL at creation. +my $phys_slot = 'phys_slot'; +$node_master->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('$phys_slot', true);"); +$node_master->psql('postgres', " + CREATE TABLE tab_phys_slot (a int); + INSERT INTO tab_phys_slot VALUES (generate_series(1,10));"); +my $current_lsn = $node_master->safe_psql('postgres', + "SELECT pg_current_wal_lsn();"); +chomp($current_lsn); +my $psql_rc = $node_master->psql('postgres', + "SELECT pg_replication_slot_advance('$phys_slot', '$current_lsn'::pg_lsn);"); +is($psql_rc, '0', 'slot advancing with physical slot'); +my $phys_restart_lsn_pre = $node_master->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';"); +chomp($phys_restart_lsn_pre); +# Slot advance should persist across clean restarts. +$node_master->restart; +my $phys_restart_lsn_post = $node_master->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';"); +chomp($phys_restart_lsn_post); +ok(($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0, + "physical slot advance persists across restarts"); diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl index c23cc4dda7..721d54fc4e 100644 --- a/src/test/recovery/t/006_logical_decoding.pl +++ b/src/test/recovery/t/006_logical_decoding.pl @@ -7,7 +7,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 10; +use Test::More tests => 12; use Config; # Initialize master node @@ -135,5 +135,29 @@ is($node_master->psql('postgres', 'DROP DATABASE otherdb'), is($node_master->slot('otherdb_slot')->{'slot_name'}, undef, 'logical slot was actually dropped with DB'); +# Test logical slot advancing and its durability. +my $logical_slot = 'logical_slot'; +$node_master->safe_psql('postgres', + "SELECT pg_create_logical_replication_slot('$logical_slot', 'test_decoding', false);"); +$node_master->psql('postgres', " + CREATE TABLE tab_logical_slot (a int); + INSERT INTO tab_logical_slot VALUES (generate_series(1,10));"); +my $current_lsn = $node_master->safe_psql('postgres', + "SELECT pg_current_wal_lsn();"); +chomp($current_lsn); +my $psql_rc = $node_master->psql('postgres', + "SELECT pg_replication_slot_advance('$logical_slot', '$current_lsn'::pg_lsn);"); +is($psql_rc, '0', 'slot advancing with logical slot'); +my $logical_restart_lsn_pre = $node_master->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';"); +chomp($logical_restart_lsn_pre); +# Slot advance should persists across clean restarts. +$node_master->restart; +my $logical_restart_lsn_post = $node_master->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';"); +chomp($logical_restart_lsn_post); +ok(($logical_restart_lsn_pre cmp $logical_restart_lsn_post) == 0, + "logical slot advance persists across restarts"); + # done with the node $node_master->stop;