During recovery, if we reach consistent state and still have entries in the

invalid-page hash table, PANIC immediately. Immediate PANIC is much better
than waiting for end-of-recovery, which is what we did before, because the
end-of-recovery might not come until months later if this is a standby
server.

Also refrain from creating a restartpoint if there are invalid-page entries
in the hash table. Restarting recovery from such a restartpoint would not
see the invalid references, and wouldn't be able to cross-check them when
consistency is reached. That wouldn't matter when things are going smoothly,
but the more sanity checks you have the better.

Fujii Masao
This commit is contained in:
Heikki Linnakangas 2011-12-02 10:49:54 +02:00
parent 15a5006aac
commit 1e616f6391
4 changed files with 70 additions and 28 deletions

View File

@ -562,7 +562,7 @@ static TimeLineID lastPageTLI = 0;
static XLogRecPtr minRecoveryPoint; /* local copy of
* ControlFile->minRecoveryPoint */
static bool updateMinRecoveryPoint = true;
static bool reachedMinRecoveryPoint = false;
bool reachedMinRecoveryPoint = false;
static bool InRedo = false;
@ -6758,12 +6758,6 @@ StartupXLOG(void)
/* Disallow XLogInsert again */
LocalXLogInsertAllowed = -1;
/*
* Check to see if the XLOG sequence contained any unresolved
* references to uninitialized pages.
*/
XLogCheckInvalidPages();
/*
* Perform a checkpoint to update all our recovery activity to disk.
*
@ -6906,6 +6900,12 @@ CheckRecoveryConsistency(void)
XLByteLE(minRecoveryPoint, EndRecPtr) &&
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
{
/*
* Check to see if the XLOG sequence contained any unresolved
* references to uninitialized pages.
*/
XLogCheckInvalidPages();
reachedMinRecoveryPoint = true;
ereport(LOG,
(errmsg("consistent recovery state reached at %X/%X",
@ -7907,7 +7907,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
volatile XLogCtlData *xlogctl = XLogCtl;
/*
* Is it safe to checkpoint? We must ask each of the resource managers
* Is it safe to restartpoint? We must ask each of the resource managers
* whether they have any partial state information that might prevent a
* correct restart from this point. If so, we skip this opportunity, but
* return at the next checkpoint record for another try.
@ -7926,6 +7926,22 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
}
}
/*
* Also refrain from creating a restartpoint if we have seen any references
* to non-existent pages. Restarting recovery from the restartpoint would
* not see the references, so we would lose the cross-check that the pages
* belonged to a relation that was dropped later.
*/
if (XLogHaveInvalidPages())
{
elog(trace_recovery(DEBUG2),
"could not record restart point at %X/%X because there "
"are unresolved references to invalid pages",
checkPoint->redo.xlogid,
checkPoint->redo.xrecoff);
return;
}
/*
* Copy the checkpoint record to shared memory, so that checkpointer
* can work out the next time it wants to perform a restartpoint.

View File

@ -52,6 +52,22 @@ typedef struct xl_invalid_page
static HTAB *invalid_page_tab = NULL;
/* Report a reference to an invalid page */
static void
report_invalid_page(int elevel, RelFileNode node, ForkNumber forkno,
BlockNumber blkno, bool present)
{
char *path = relpathperm(node, forkno);
if (present)
elog(elevel, "page %u of relation %s is uninitialized",
blkno, path);
else
elog(elevel, "page %u of relation %s does not exist",
blkno, path);
pfree(path);
}
/* Log a reference to an invalid page */
static void
log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
@ -61,23 +77,27 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
xl_invalid_page *hentry;
bool found;
/*
* Once recovery has reached a consistent state, the invalid-page table
* should be empty and remain so. If a reference to an invalid page is
* found after consistency is reached, PANIC immediately. This might
* seem aggressive, but it's better than letting the invalid reference
* linger in the hash table until the end of recovery and PANIC there,
* which might come only much later if this is a standby server.
*/
if (reachedMinRecoveryPoint)
{
report_invalid_page(WARNING, node, forkno, blkno, present);
elog(PANIC, "WAL contains references to invalid pages");
}
/*
* Log references to invalid pages at DEBUG1 level. This allows some
* tracing of the cause (note the elog context mechanism will tell us
* something about the XLOG record that generated the reference).
*/
if (log_min_messages <= DEBUG1 || client_min_messages <= DEBUG1)
{
char *path = relpathperm(node, forkno);
if (present)
elog(DEBUG1, "page %u of relation %s is uninitialized",
blkno, path);
else
elog(DEBUG1, "page %u of relation %s does not exist",
blkno, path);
pfree(path);
}
report_invalid_page(DEBUG1, node, forkno, blkno, present);
if (invalid_page_tab == NULL)
{
@ -181,6 +201,16 @@ forget_invalid_pages_db(Oid dbid)
}
}
/* Are there any unresolved references to invalid pages? */
bool
XLogHaveInvalidPages(void)
{
if (invalid_page_tab != NULL &&
hash_get_num_entries(invalid_page_tab) > 0)
return true;
return false;
}
/* Complain about any remaining invalid-page entries */
void
XLogCheckInvalidPages(void)
@ -200,15 +230,8 @@ XLogCheckInvalidPages(void)
*/
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
{
char *path = relpathperm(hentry->key.node, hentry->key.forkno);
if (hentry->present)
elog(WARNING, "page %u of relation %s was uninitialized",
hentry->key.blkno, path);
else
elog(WARNING, "page %u of relation %s did not exist",
hentry->key.blkno, path);
pfree(path);
report_invalid_page(WARNING, hentry->key.node, hentry->key.forkno,
hentry->key.blkno, hentry->present);
foundone = true;
}

View File

@ -190,6 +190,8 @@ typedef enum
extern XLogRecPtr XactLastRecEnd;
extern bool reachedMinRecoveryPoint;
/* these variables are GUC parameters related to XLOG */
extern int CheckPointSegments;
extern int wal_keep_segments;

View File

@ -14,6 +14,7 @@
#include "storage/bufmgr.h"
extern bool XLogHaveInvalidPages(void);
extern void XLogCheckInvalidPages(void);
extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum);