From 5ddf9973477729cf161b4ad0a1efd52f4fea9c88 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 26 Jan 2024 13:25:19 -0500 Subject: [PATCH] Temporary patch to help debug pg_walsummary test failures. The tests in 002_blocks.pl are failing in the buildfarm from time to time, but we don't know how to reproduce the failure elsewhere. The most obvious explanation seems to be the unexpected disappearance of a WAL summary file, so bump up the logging level in RemoveWalSummaryIfOlderThan to try to help us spot such problems, and print the cutoff time in addition to the removed filename. Also adjust 002_blocks.pl to dump out a directory listing of the relevant directory at various points. This patch should be reverted once we sort out what's happening here. Patch by me, reviewed by Nathan Bossart, who also reported the issue. Discussion: http://postgr.es/m/20240124170846.GA2643050@nathanxps13 --- src/backend/backup/walsummary.c | 7 +++++++ src/bin/pg_walsummary/t/002_blocks.pl | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/backend/backup/walsummary.c b/src/backend/backup/walsummary.c index b549673a9d..ae314d8b74 100644 --- a/src/backend/backup/walsummary.c +++ b/src/backend/backup/walsummary.c @@ -251,8 +251,15 @@ RemoveWalSummaryIfOlderThan(WalSummaryFile *ws, time_t cutoff_time) ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", path))); + /* XXX temporarily changed to debug buildfarm failures */ +#if 0 ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path))); +#else + ereport(LOG, + (errmsg_internal("removing file \"%s\" cutoff_time=%llu", path, + (unsigned long long) cutoff_time))); +#endif } /* diff --git a/src/bin/pg_walsummary/t/002_blocks.pl b/src/bin/pg_walsummary/t/002_blocks.pl index d609d2c547..40908da8cb 100644 --- a/src/bin/pg_walsummary/t/002_blocks.pl +++ b/src/bin/pg_walsummary/t/002_blocks.pl @@ -48,6 +48,7 @@ SELECT summarized_tli, summarized_lsn FROM pg_get_wal_summarizer_state() EOM ($summarized_tli, $summarized_lsn) = split(/\|/, $progress); note("after insert, summarized TLI $summarized_tli through $summarized_lsn"); +note_wal_summary_dir("after insert", $node1); # Update a row in the first block of the table and trigger a checkpoint. $node1->safe_psql('postgres', <data_dir; + my @wsfiles = grep { $_ ne '.' && $_ ne '..' } slurp_dir($wsdir); + note("$flair pg_wal/summaries has: @wsfiles"); +}