diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index d521fa6e7b..401d805a8f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.343 2009/06/11 14:48:54 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.344 2009/06/25 21:36:00 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -4912,12 +4912,18 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) { char recoveryPath[MAXPGPATH]; char xlogpath[MAXPGPATH]; + XLogRecPtr InvalidXLogRecPtr = {0, 0}; /* * We are no longer in archive recovery state. */ InArchiveRecovery = false; + /* + * Update min recovery point one last time. + */ + UpdateMinRecoveryPoint(InvalidXLogRecPtr, true); + /* * We should have the ending log segment currently open. Verify, and then * close it (to avoid problems on Windows with trying to rename or delete @@ -5156,6 +5162,7 @@ StartupXLOG(void) XLogRecord *record; uint32 freespace; TransactionId oldestActiveXID; + bool bgwriterLaunched = false; XLogCtl->SharedRecoveryInProgress = true; @@ -5472,7 +5479,11 @@ StartupXLOG(void) * process in addition to postmaster! */ if (InArchiveRecovery && IsUnderPostmaster) + { + SetForwardFsyncRequests(); SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED); + bgwriterLaunched = true; + } /* * main redo apply loop @@ -5709,12 +5720,6 @@ StartupXLOG(void) /* Pre-scan prepared transactions to find out the range of XIDs present */ oldestActiveXID = PrescanPreparedTransactions(); - /* - * Allow writing WAL for us, so that we can create a checkpoint record. - * But not yet for other backends! - */ - LocalRecoveryInProgress = false; - if (InRecovery) { int rmid; @@ -5743,7 +5748,12 @@ StartupXLOG(void) * the rule that TLI only changes in shutdown checkpoints, which * allows some extra error checking in xlog_redo. */ - CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); + if (bgwriterLaunched) + RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY | + CHECKPOINT_IMMEDIATE | + CHECKPOINT_WAIT); + else + CreateCheckPoint(CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IMMEDIATE); /* * And finally, execute the recovery_end_command, if any. @@ -5806,7 +5816,7 @@ StartupXLOG(void) } /* - * All done. Allow others to write WAL. + * All done. Allow backends to write WAL. */ XLogCtl->SharedRecoveryInProgress = false; } @@ -6123,12 +6133,13 @@ LogCheckpointStart(int flags, bool restartpoint) * the main message, but what about all the flags? */ if (restartpoint) - msg = "restartpoint starting:%s%s%s%s%s%s"; + msg = "restartpoint starting:%s%s%s%s%s%s%s"; else - msg = "checkpoint starting:%s%s%s%s%s%s"; + msg = "checkpoint starting:%s%s%s%s%s%s%s"; elog(LOG, msg, (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "", + (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "", (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "", (flags & CHECKPOINT_FORCE) ? " force" : "", (flags & CHECKPOINT_WAIT) ? " wait" : "", @@ -6190,10 +6201,12 @@ LogCheckpointEnd(bool restartpoint) * * flags is a bitwise OR of the following: * CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown. + * CHECKPOINT_END_OF_RECOVERY: checkpoint is for end of WAL recovery. * CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP, * ignoring checkpoint_completion_target parameter. * CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occured - * since the last one (implied by CHECKPOINT_IS_SHUTDOWN). + * since the last one (implied by CHECKPOINT_IS_SHUTDOWN and + * CHECKPOINT_END_OF_RECOVERY). * * Note: flags contains other bits, of interest here only for logging purposes. * In particular note that this routine is synchronous and does not pay @@ -6202,7 +6215,7 @@ LogCheckpointEnd(bool restartpoint) void CreateCheckPoint(int flags) { - bool shutdown = (flags & CHECKPOINT_IS_SHUTDOWN) != 0; + bool shutdown; CheckPoint checkPoint; XLogRecPtr recptr; XLogCtlInsert *Insert = &XLogCtl->Insert; @@ -6212,34 +6225,52 @@ CreateCheckPoint(int flags) uint32 _logSeg; TransactionId *inCommitXids; int nInCommit; + bool OldInRecovery = InRecovery; - /* shouldn't happen */ - if (RecoveryInProgress()) - elog(ERROR, "can't create a checkpoint during recovery"); + /* + * An end-of-recovery checkpoint is really a shutdown checkpoint, just + * issued at a different time. + */ + if (flags & ((CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY) != 0)) + shutdown = true; + else + shutdown = false; + + /* + * A startup checkpoint is created before anyone else is allowed to + * write WAL. To allow us to write the checkpoint record, set + * LocalRecoveryInProgress to false. This lets us write WAL, but others + * are still not allowed to do so. + */ + if (flags & CHECKPOINT_END_OF_RECOVERY) + { + Assert(RecoveryInProgress()); + LocalRecoveryInProgress = false; + InitXLOGAccess(); + + /* + * Before 8.4, end-of-recovery checkpoints were always performed by + * the startup process, and InRecovery was set true. InRecovery is not + * normally set in bgwriter, but we set it here temporarily to avoid + * confusing old code in the end-of-recovery checkpoint code path that + * rely on it. + */ + InRecovery = true; + } + else + { + /* shouldn't happen */ + if (RecoveryInProgress()) + elog(ERROR, "can't create a checkpoint during recovery"); + } /* * Acquire CheckpointLock to ensure only one checkpoint happens at a time. - * During normal operation, bgwriter is the only process that creates - * checkpoints, but at the end of archive recovery, the bgwriter can be - * busy creating a restartpoint while the startup process tries to perform - * the startup checkpoint. + * (This is just pro forma, since in the present system structure there is + * only one process that is allowed to issue checkpoints at any given + * time.) */ - if (!LWLockConditionalAcquire(CheckpointLock, LW_EXCLUSIVE)) - { - Assert(InRecovery); - - /* - * A restartpoint is in progress. Wait until it finishes. This can - * cause an extra restartpoint to be performed, but that's OK because - * we're just about to perform a checkpoint anyway. Flushing the - * buffers in this restartpoint can take some time, but that time is - * saved from the upcoming checkpoint so the net effect is zero. - */ - ereport(DEBUG2, (errmsg("hurrying in-progress restartpoint"))); - RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_WAIT); - - LWLockAcquire(CheckpointLock, LW_EXCLUSIVE); - } + LWLockAcquire(CheckpointLock, LW_EXCLUSIVE); /* * Prepare to accumulate statistics. @@ -6298,7 +6329,8 @@ CreateCheckPoint(int flags) * the end of the last checkpoint record, and its redo pointer must point * to itself. */ - if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_FORCE)) == 0) + if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | + CHECKPOINT_FORCE)) == 0) { XLogRecPtr curInsert; @@ -6542,6 +6574,9 @@ CreateCheckPoint(int flags) CheckpointStats.ckpt_segs_recycled); LWLockRelease(CheckpointLock); + + /* Restore old value */ + InRecovery = OldInRecovery; } /* diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 629565a810..b5fd31532e 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.60 2009/06/11 14:49:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.61 2009/06/25 21:36:00 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -448,6 +448,13 @@ BackgroundWriterMain(void) bgs->ckpt_started++; SpinLockRelease(&bgs->ckpt_lck); + /* + * The end-of-recovery checkpoint is a real checkpoint that's + * performed while we're still in recovery. + */ + if (flags & CHECKPOINT_END_OF_RECOVERY) + do_restartpoint = false; + /* * We will warn if (a) too soon since last checkpoint (whatever * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag @@ -895,10 +902,12 @@ BgWriterShmemInit(void) * * flags is a bitwise OR of the following: * CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown. + * CHECKPOINT_END_OF_RECOVERY: checkpoint is to finish WAL recovery. * CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP, * ignoring checkpoint_completion_target parameter. * CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occured - * since the last one (implied by CHECKPOINT_IS_SHUTDOWN). + * since the last one (implied by CHECKPOINT_IS_SHUTDOWN and + * CHECKPOINT_END_OF_RECOVERY). * CHECKPOINT_WAIT: wait for completion before returning (otherwise, * just signal bgwriter to do it, and return). * CHECKPOINT_CAUSE_XLOG: checkpoint is requested due to xlog filling. diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 8655e90d29..18402a6ad6 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.146 2009/06/11 14:49:02 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.147 2009/06/25 21:36:00 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -203,6 +203,21 @@ mdinit(void) } } +/* + * In archive recovery, we rely on bgwriter to do fsyncs(), but we don't + * know that we do archive recovery at process startup when pendingOpsTable + * has already been created. Calling this function drops pendingOpsTable + * and causes any subsequent requests to be forwarded to bgwriter. + */ +void +SetForwardFsyncRequests(void) +{ + /* Perform any pending ops we may have queued up */ + if (pendingOpsTable) + mdsync(); + pendingOpsTable = NULL; +} + /* * mdexists() -- Does the physical file exist? * diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ca1d236154..ea9e232a08 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.91 2009/02/18 15:58:41 heikki Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.92 2009/06/25 21:36:00 heikki Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -166,6 +166,8 @@ extern bool XLOG_DEBUG; /* These indicate the cause of a checkpoint request */ #define CHECKPOINT_CAUSE_XLOG 0x0010 /* XLOG consumption */ #define CHECKPOINT_CAUSE_TIME 0x0020 /* Elapsed time */ +#define CHECKPOINT_END_OF_RECOVERY 0x0040 /* Like shutdown checkpoint, but + * issued at end of WAL recovery */ /* Checkpoint statistics */ typedef struct CheckpointStatsData diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 1b4b06ac96..1761f1c808 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.67 2009/06/11 14:49:12 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.68 2009/06/25 21:36:00 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -109,6 +109,7 @@ extern void mdpreckpt(void); extern void mdsync(void); extern void mdpostckpt(void); +extern void SetForwardFsyncRequests(void); extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno); extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum);