diff --git a/doc/src/sgml/ref/pg_resetxlog.sgml b/doc/src/sgml/ref/pg_resetxlog.sgml index acdf0c7aed..2450462169 100644 --- a/doc/src/sgml/ref/pg_resetxlog.sgml +++ b/doc/src/sgml/ref/pg_resetxlog.sgml @@ -1,5 +1,5 @@ @@ -22,6 +22,7 @@ PostgreSQL documentation -n -ooid -x xid + -e xid_epoch -m mxid -O mxoff -l timelineid,fileid,seg @@ -61,9 +62,9 @@ PostgreSQL documentation by specifying the -f (force) switch. In this case plausible values will be substituted for the missing data. Most of the fields can be expected to match, but manual assistance may be needed for the next OID, - next transaction ID, next multitransaction ID and offset, + next transaction ID and epoch, next multitransaction ID and offset, WAL starting address, and database locale fields. - The first five of these can be set using the switches discussed below. + The first six of these can be set using the switches discussed below. pg_resetxlog's own environment is the source for its guess at the locale fields; take care that LANG and so forth match the environment that initdb was run in. @@ -76,11 +77,12 @@ PostgreSQL documentation - The -o, -x, -m, -O, + The -o, -x, -e, + -m, -O, and -l - switches allow the next OID, next transaction ID, next multitransaction - ID, next multitransaction offset, and WAL starting address values to - be set manually. These are only needed when + switches allow the next OID, next transaction ID, next transaction ID's + epoch, next multitransaction ID, next multitransaction offset, and WAL + starting address values to be set manually. These are only needed when pg_resetxlog is unable to determine appropriate values by reading pg_control. Safe values may be determined as follows: @@ -146,6 +148,18 @@ PostgreSQL documentation get the next-OID setting right. + + + + The transaction ID epoch is not actually stored anywhere in the database + except in the field that is set by pg_resetxlog, + so any value will work so far as the database itself is concerned. + You might need to adjust this value to ensure that replication + systems such as Slony-I work correctly — + if so, an appropriate value should be obtainable from the state of + the downstream replicated database. + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index d78f1c3074..16fb6b5e5e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.248 2006/08/17 23:04:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.249 2006/08/21 16:16:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -312,10 +312,8 @@ static XLogRecPtr RedoRecPtr; * new log file. * * CheckpointLock: must be held to do a checkpoint (ensures only one - * checkpointer at a time; even though the postmaster won't launch - * parallel checkpoint processes, we need this because manual checkpoints - * could be launched simultaneously). XXX now that all checkpoints are - * done by the bgwriter, isn't this lock redundant? + * checkpointer at a time; currently, with all checkpoints done by the + * bgwriter, this is just pro forma). * *---------- */ @@ -363,9 +361,13 @@ typedef struct XLogCtlData { /* Protected by WALInsertLock: */ XLogCtlInsert Insert; + /* Protected by info_lck: */ XLogwrtRqst LogwrtRqst; XLogwrtResult LogwrtResult; + uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */ + TransactionId ckptXid; + /* Protected by WALWriteLock: */ XLogCtlWrite Write; @@ -380,7 +382,7 @@ typedef struct XLogCtlData int XLogCacheBlck; /* highest allocated xlog buffer index */ TimeLineID ThisTimeLineID; - slock_t info_lck; /* locks shared LogwrtRqst/LogwrtResult */ + slock_t info_lck; /* locks shared variables shown above */ } XLogCtlData; static XLogCtlData *XLogCtl = NULL; @@ -4086,6 +4088,7 @@ BootStrapXLOG(void) checkPoint.redo.xrecoff = SizeOfXLogLongPHD; checkPoint.undo = checkPoint.redo; checkPoint.ThisTimeLineID = ThisTimeLineID; + checkPoint.nextXidEpoch = 0; checkPoint.nextXid = FirstNormalTransactionId; checkPoint.nextOid = FirstBootstrapObjectId; checkPoint.nextMulti = FirstMultiXactId; @@ -4752,8 +4755,9 @@ StartupXLOG(void) checkPoint.undo.xlogid, checkPoint.undo.xrecoff, wasShutdown ? "TRUE" : "FALSE"))); ereport(LOG, - (errmsg("next transaction ID: %u; next OID: %u", - checkPoint.nextXid, checkPoint.nextOid))); + (errmsg("next transaction ID: %u/%u; next OID: %u", + checkPoint.nextXidEpoch, checkPoint.nextXid, + checkPoint.nextOid))); ereport(LOG, (errmsg("next MultiXactId: %u; next MultiXactOffset: %u", checkPoint.nextMulti, checkPoint.nextMultiOffset))); @@ -5135,6 +5139,10 @@ StartupXLOG(void) /* start the archive_timeout timer running */ XLogCtl->Write.lastSegSwitchTime = ControlFile->time; + /* initialize shared-memory copy of latest checkpoint XID/epoch */ + XLogCtl->ckptXidEpoch = ControlFile->checkPointCopy.nextXidEpoch; + XLogCtl->ckptXid = ControlFile->checkPointCopy.nextXid; + /* Start up the commit log and related stuff, too */ StartupCLOG(); StartupSUBTRANS(oldestActiveXID); @@ -5364,6 +5372,46 @@ GetRecentNextXid(void) return ControlFile->checkPointCopy.nextXid; } +/* + * GetNextXidAndEpoch - get the current nextXid value and associated epoch + * + * This is exported for use by code that would like to have 64-bit XIDs. + * We don't really support such things, but all XIDs within the system + * can be presumed "close to" the result, and thus the epoch associated + * with them can be determined. + */ +void +GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch) +{ + uint32 ckptXidEpoch; + TransactionId ckptXid; + TransactionId nextXid; + + /* Must read checkpoint info first, else have race condition */ + { + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + SpinLockAcquire(&xlogctl->info_lck); + ckptXidEpoch = xlogctl->ckptXidEpoch; + ckptXid = xlogctl->ckptXid; + SpinLockRelease(&xlogctl->info_lck); + } + + /* Now fetch current nextXid */ + nextXid = ReadNewTransactionId(); + + /* + * nextXid is certainly logically later than ckptXid. So if it's + * numerically less, it must have wrapped into the next epoch. + */ + if (nextXid < ckptXid) + ckptXidEpoch++; + + *xid = nextXid; + *epoch = ckptXidEpoch; +} + /* * This must be called ONCE during postmaster or standalone-backend shutdown */ @@ -5531,6 +5579,11 @@ CreateCheckPoint(bool shutdown, bool force) checkPoint.nextXid = ShmemVariableCache->nextXid; LWLockRelease(XidGenLock); + /* Increase XID epoch if we've wrapped around since last checkpoint */ + checkPoint.nextXidEpoch = ControlFile->checkPointCopy.nextXidEpoch; + if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid) + checkPoint.nextXidEpoch++; + LWLockAcquire(OidGenLock, LW_SHARED); checkPoint.nextOid = ShmemVariableCache->nextOid; if (!shutdown) @@ -5600,6 +5653,17 @@ CreateCheckPoint(bool shutdown, bool force) UpdateControlFile(); LWLockRelease(ControlFileLock); + /* Update shared-memory copy of checkpoint XID/epoch */ + { + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + SpinLockAcquire(&xlogctl->info_lck); + xlogctl->ckptXidEpoch = checkPoint.nextXidEpoch; + xlogctl->ckptXid = checkPoint.nextXid; + SpinLockRelease(&xlogctl->info_lck); + } + /* * We are now done with critical updates; no need for system panic if we * have trouble while fooling with offline log segments. @@ -5803,6 +5867,10 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); + /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ + ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch; + ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; + /* * TLI may change in a shutdown checkpoint, but it shouldn't decrease */ @@ -5836,6 +5904,11 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) } MultiXactAdvanceNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); + + /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ + ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch; + ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; + /* TLI should not change in an on-line checkpoint */ if (checkPoint.ThisTimeLineID != ThisTimeLineID) ereport(PANIC, @@ -5861,10 +5934,11 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec) CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "checkpoint: redo %X/%X; undo %X/%X; " - "tli %u; xid %u; oid %u; multi %u; offset %u; %s", + "tli %u; xid %u/%u; oid %u; multi %u; offset %u; %s", checkpoint->redo.xlogid, checkpoint->redo.xrecoff, checkpoint->undo.xlogid, checkpoint->undo.xrecoff, - checkpoint->ThisTimeLineID, checkpoint->nextXid, + checkpoint->ThisTimeLineID, + checkpoint->nextXidEpoch, checkpoint->nextXid, checkpoint->nextOid, checkpoint->nextMulti, checkpoint->nextMultiOffset, diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index e0f3cfd2d9..cbde5357ed 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -6,7 +6,7 @@ * copyright (c) Oliver Elphick , 2001; * licence: BSD * - * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.30 2006/08/07 16:57:56 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.31 2006/08/21 16:16:31 tgl Exp $ */ #include "postgres.h" @@ -177,7 +177,8 @@ main(int argc, char *argv[]) ControlFile.checkPointCopy.undo.xrecoff); printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID); - printf(_("Latest checkpoint's NextXID: %u\n"), + printf(_("Latest checkpoint's NextXID: %u/%u\n"), + ControlFile.checkPointCopy.nextXidEpoch, ControlFile.checkPointCopy.nextXid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c index c7589e92c1..a8308388e8 100644 --- a/src/bin/pg_resetxlog/pg_resetxlog.c +++ b/src/bin/pg_resetxlog/pg_resetxlog.c @@ -23,7 +23,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.51 2006/08/07 16:57:56 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.52 2006/08/21 16:16:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -71,6 +71,7 @@ main(int argc, char *argv[]) int c; bool force = false; bool noupdate = false; + uint32 set_xid_epoch = -1; TransactionId set_xid = 0; Oid set_oid = 0; MultiXactId set_mxid = 0; @@ -104,7 +105,7 @@ main(int argc, char *argv[]) } - while ((c = getopt(argc, argv, "fl:m:no:O:x:")) != -1) + while ((c = getopt(argc, argv, "fl:m:no:O:x:e:")) != -1) { switch (c) { @@ -116,6 +117,21 @@ main(int argc, char *argv[]) noupdate = true; break; + case 'e': + set_xid_epoch = strtoul(optarg, &endptr, 0); + if (endptr == optarg || *endptr != '\0') + { + fprintf(stderr, _("%s: invalid argument for option -e\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + exit(1); + } + if (set_xid_epoch == -1) + { + fprintf(stderr, _("%s: transaction ID epoch (-e) must not be -1\n"), progname); + exit(1); + } + break; + case 'x': set_xid = strtoul(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0') @@ -271,6 +287,9 @@ main(int argc, char *argv[]) * Adjust fields if required by switches. (Do this now so that printout, * if any, includes these values.) */ + if (set_xid_epoch != -1) + ControlFile.checkPointCopy.nextXidEpoch = set_xid_epoch; + if (set_xid != 0) ControlFile.checkPointCopy.nextXid = set_xid; @@ -441,6 +460,7 @@ GuessControlValues(void) ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD; ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo; ControlFile.checkPointCopy.ThisTimeLineID = 1; + ControlFile.checkPointCopy.nextXidEpoch = 0; ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */ ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId; ControlFile.checkPointCopy.nextMulti = FirstMultiXactId; @@ -513,29 +533,50 @@ PrintControlValues(bool guessed) snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT, ControlFile.system_identifier); - printf(_("pg_control version number: %u\n"), ControlFile.pg_control_version); - printf(_("Catalog version number: %u\n"), ControlFile.catalog_version_no); - printf(_("Database system identifier: %s\n"), sysident_str); - printf(_("Current log file ID: %u\n"), ControlFile.logId); - printf(_("Next log file segment: %u\n"), ControlFile.logSeg); - printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID); - printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid); - printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); - printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti); - printf(_("Latest checkpoint's NextMultiOffset: %u\n"), ControlFile.checkPointCopy.nextMultiOffset); - printf(_("Maximum data alignment: %u\n"), ControlFile.maxAlign); + printf(_("pg_control version number: %u\n"), + ControlFile.pg_control_version); + printf(_("Catalog version number: %u\n"), + ControlFile.catalog_version_no); + printf(_("Database system identifier: %s\n"), + sysident_str); + printf(_("Current log file ID: %u\n"), + ControlFile.logId); + printf(_("Next log file segment: %u\n"), + ControlFile.logSeg); + printf(_("Latest checkpoint's TimeLineID: %u\n"), + ControlFile.checkPointCopy.ThisTimeLineID); + printf(_("Latest checkpoint's NextXID: %u/%u\n"), + ControlFile.checkPointCopy.nextXidEpoch, + ControlFile.checkPointCopy.nextXid); + printf(_("Latest checkpoint's NextOID: %u\n"), + ControlFile.checkPointCopy.nextOid); + printf(_("Latest checkpoint's NextMultiXactId: %u\n"), + ControlFile.checkPointCopy.nextMulti); + printf(_("Latest checkpoint's NextMultiOffset: %u\n"), + ControlFile.checkPointCopy.nextMultiOffset); + printf(_("Maximum data alignment: %u\n"), + ControlFile.maxAlign); /* we don't print floatFormat since can't say much useful about it */ - printf(_("Database block size: %u\n"), ControlFile.blcksz); - printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size); - printf(_("WAL block size: %u\n"), ControlFile.xlog_blcksz); - printf(_("Bytes per WAL segment: %u\n"), ControlFile.xlog_seg_size); - printf(_("Maximum length of identifiers: %u\n"), ControlFile.nameDataLen); - printf(_("Maximum columns in an index: %u\n"), ControlFile.indexMaxKeys); + printf(_("Database block size: %u\n"), + ControlFile.blcksz); + printf(_("Blocks per segment of large relation: %u\n"), + ControlFile.relseg_size); + printf(_("WAL block size: %u\n"), + ControlFile.xlog_blcksz); + printf(_("Bytes per WAL segment: %u\n"), + ControlFile.xlog_seg_size); + printf(_("Maximum length of identifiers: %u\n"), + ControlFile.nameDataLen); + printf(_("Maximum columns in an index: %u\n"), + ControlFile.indexMaxKeys); printf(_("Date/time type storage: %s\n"), (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers"))); - printf(_("Maximum length of locale name: %u\n"), ControlFile.localeBuflen); - printf(_("LC_COLLATE: %s\n"), ControlFile.lc_collate); - printf(_("LC_CTYPE: %s\n"), ControlFile.lc_ctype); + printf(_("Maximum length of locale name: %u\n"), + ControlFile.localeBuflen); + printf(_("LC_COLLATE: %s\n"), + ControlFile.lc_collate); + printf(_("LC_CTYPE: %s\n"), + ControlFile.lc_ctype); } @@ -810,6 +851,7 @@ usage(void) printf(_(" -o OID set next OID\n")); printf(_(" -O OFFSET set next multitransaction offset\n")); printf(_(" -x XID set next transaction ID\n")); + printf(_(" -e XIDEPOCH set next transaction ID epoch\n")); printf(_(" --help show this help, then exit\n")); printf(_(" --version output version information, then exit\n")); printf(_("\nReport bugs to .\n")); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 22b0f0bb7b..a5ae94b91a 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.73 2006/08/17 23:04:08 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.74 2006/08/21 16:16:31 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -166,5 +166,6 @@ extern void CreateCheckPoint(bool shutdown, bool force); extern void XLogPutNextOid(Oid nextOid); extern XLogRecPtr GetRedoRecPtr(void); extern TransactionId GetRecentNextXid(void); +extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); #endif /* XLOG_H */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 8e405f607e..2b109f2d5a 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.31 2006/08/07 16:57:57 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.32 2006/08/21 16:16:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,7 +22,7 @@ /* Version identifier for this pg_control format */ -#define PG_CONTROL_VERSION 821 +#define PG_CONTROL_VERSION 822 /* * Body of CheckPoint XLOG records. This is declared here because we keep @@ -36,6 +36,7 @@ typedef struct CheckPoint * transaction when we started (i.e. UNDO end * point) */ TimeLineID ThisTimeLineID; /* current TLI */ + uint32 nextXidEpoch; /* higher-order bits of nextXid */ TransactionId nextXid; /* next free XID */ Oid nextOid; /* next free OID */ MultiXactId nextMulti; /* next free MultiXactId */