From 9d92582abf918215d27659d45a4c9e78bda50aff Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Wed, 23 Mar 2022 18:22:10 +0100 Subject: [PATCH] Fix "missing continuation record" after standby promotion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invalidate abortedRecPtr and missingContrecPtr after a missing continuation record is successfully skipped on a standby. This fixes a PANIC caused when a recently promoted standby attempts to write an OVERWRITE_RECORD with an LSN of the previously read aborted record. Backpatch to 10 (all stable versions). Author: Sami Imseih Reviewed-by: Kyotaro Horiguchi Reviewed-by: Álvaro Herrera Discussion: https://postgr.es/m/44D259DE-7542-49C4-8A52-2AB01534DCA9@amazon.com --- src/backend/access/transam/xlogrecovery.c | 4 ++++ src/test/recovery/t/026_overwrite_contrecord.pl | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 9feea3e6ec..8d2395dae2 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -1948,6 +1948,10 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI) LSN_FORMAT_ARGS(xlrec.overwritten_lsn), LSN_FORMAT_ARGS(record->overwrittenRecPtr)); + /* We have safely skipped the aborted record */ + abortedRecPtr = InvalidXLogRecPtr; + missingContrecPtr = InvalidXLogRecPtr; + ereport(LOG, (errmsg("successfully skipped missing contrecord at %X/%X, overwritten at %s", LSN_FORMAT_ARGS(xlrec.overwritten_lsn), diff --git a/src/test/recovery/t/026_overwrite_contrecord.pl b/src/test/recovery/t/026_overwrite_contrecord.pl index 0fd907f152..78feccd9aa 100644 --- a/src/test/recovery/t/026_overwrite_contrecord.pl +++ b/src/test/recovery/t/026_overwrite_contrecord.pl @@ -13,7 +13,7 @@ use Test::More; # Test: Create a physical replica that's missing the last WAL file, # then restart the primary to create a divergent WAL file and observe # that the replica replays the "overwrite contrecord" from that new -# file. +# file and the standby promotes successfully. my $node = PostgreSQL::Test::Cluster->new('primary'); $node->init(allows_streaming => 1); @@ -100,6 +100,9 @@ like( qr[successfully skipped missing contrecord at], "found log line in standby"); +# Verify promotion is successful +$node_standby->promote; + $node->stop; $node_standby->stop;