From 93d888232e80e4d676e24fe93ae6d27459d966be Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 8 Mar 2011 20:13:52 +0200 Subject: [PATCH] Don't throw a warning if vacuum sees PD_ALL_VISIBLE flag set on a page that contains newly-inserted tuples that according to our OldestXmin are not yet visible to everyone. The value returned by GetOldestXmin() is conservative, and it can move backwards on repeated calls, so if we see that contradiction between the PD_ALL_VISIBLE flag and status of tuples on the page, we have to assume it's because an earlier vacuum calculated a higher OldestXmin value, and all the tuples really are visible to everyone. We have received several reports of this bug, with the "PD_ALL_VISIBLE flag was incorrectly set in relation ..." warning appearing in logs. We were finally able to hunt it down with David Gould's help to run extra diagnostics in an environment where this happened frequently. Also reword the warning, per Robert Haas' suggestion, to not imply that the PD_ALL_VISIBLE flag is necessarily at fault, as it might also be a symptom of corruption on a tuple header. Backpatch to 8.4, where the PD_ALL_VISIBLE flag was introduced. --- src/backend/commands/vacuumlazy.c | 20 ++++++++++++++++++-- src/backend/storage/ipc/procarray.c | 12 ++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 515ea9173d..a5c024cc19 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -349,6 +349,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, Size freespace; bool all_visible_according_to_vm = false; bool all_visible; + bool has_dead_tuples; /* * Skip pages that don't require vacuuming according to the visibility @@ -500,6 +501,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * requiring freezing. */ all_visible = true; + has_dead_tuples = false; nfrozen = 0; hastup = false; prev_dead_count = vacrelstats->num_dead_tuples; @@ -640,6 +642,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, &vacrelstats->latestRemovedXid); tups_vacuumed += 1; + has_dead_tuples = true; } else { @@ -702,9 +705,22 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, PageSetAllVisible(page); SetBufferCommitInfoNeedsSave(buf); } - else if (PageIsAllVisible(page) && !all_visible) + /* + * It's possible for the value returned by GetOldestXmin() to move + * backwards, so it's not wrong for us to see tuples that appear to + * not be visible to everyone yet, while PD_ALL_VISIBLE is already + * set. The real safe xmin value never moves backwards, but + * GetOldestXmin() is conservative and sometimes returns a value + * that's unnecessarily small, so if we see that contradiction it + * just means that the tuples that we think are not visible to + * everyone yet actually are, and the PD_ALL_VISIBLE flag is correct. + * + * There should never be dead tuples on a page with PD_ALL_VISIBLE + * set, however. + */ + else if (PageIsAllVisible(page) && has_dead_tuples) { - elog(WARNING, "PD_ALL_VISIBLE flag was incorrectly set in relation \"%s\" page %u", + elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u", relname, blkno); PageClearAllVisible(page); SetBufferCommitInfoNeedsSave(buf); diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 2473881e8f..65fca7171a 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -1000,6 +1000,18 @@ TransactionIdIsActive(TransactionId xid) * This is also used to determine where to truncate pg_subtrans. allDbs * must be TRUE for that case, and ignoreVacuum FALSE. * + * Note: it's possible for the calculated value to move backwards on repeated + * calls. The calculated value is conservative, so that anything older is + * definitely not considered as running by anyone anymore, but the exact + * value calculated depends on a number of things. For example, if allDbs is + * TRUE and there are no transactions running in the current database, + * GetOldestXmin() returns latestCompletedXid. If a transaction begins after + * that, its xmin will include in-progress transactions in other databases + * that started earlier, so another call will return an lower value. The + * return value is also adjusted with vacuum_defer_cleanup_age, so increasing + * that setting on the fly is an easy way to have GetOldestXmin() move + * backwards. + * * Note: we include all currently running xids in the set of considered xids. * This ensures that if a just-started xact has not yet set its snapshot, * when it does set the snapshot it cannot set xmin less than what we compute.