Don't waste the last segment of each 4GB logical log file.

The comments claimed that wasting the last segment made it easier to do
calculations with XLogRecPtrs, because you don't have problems representing
last-byte-position-plus-1 that way. In my experience, however, it only made
things more complicated, because the there was two ways to represent the
boundary at the beginning of a logical log file: logid = n+1 and xrecoff = 0,
or as xlogid = n and xrecoff = 4GB - XLOG_SEG_SIZE. Some functions were
picky about which representation was used.

Also, use a 64-bit segment number instead of the log/seg combination, to
point to a certain WAL segment. We assume that all platforms have a working
64-bit integer type nowadays.

This is an incompatible change in WAL format, so bumping WAL version number.
This commit is contained in:
Heikki Linnakangas 2012-06-24 18:06:38 +03:00
parent 47c7365e79
commit dfda6ebaec
12 changed files with 413 additions and 554 deletions

File diff suppressed because it is too large Load Diff

View File

@ -271,8 +271,7 @@ pg_xlogfile_name_offset(PG_FUNCTION_ARGS)
char *locationstr; char *locationstr;
unsigned int uxlogid; unsigned int uxlogid;
unsigned int uxrecoff; unsigned int uxrecoff;
uint32 xlogid; XLogSegNo xlogsegno;
uint32 xlogseg;
uint32 xrecoff; uint32 xrecoff;
XLogRecPtr locationpoint; XLogRecPtr locationpoint;
char xlogfilename[MAXFNAMELEN]; char xlogfilename[MAXFNAMELEN];
@ -319,8 +318,8 @@ pg_xlogfile_name_offset(PG_FUNCTION_ARGS)
/* /*
* xlogfilename * xlogfilename
*/ */
XLByteToPrevSeg(locationpoint, xlogid, xlogseg); XLByteToPrevSeg(locationpoint, xlogsegno);
XLogFileName(xlogfilename, ThisTimeLineID, xlogid, xlogseg); XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno);
values[0] = CStringGetTextDatum(xlogfilename); values[0] = CStringGetTextDatum(xlogfilename);
isnull[0] = false; isnull[0] = false;
@ -328,7 +327,7 @@ pg_xlogfile_name_offset(PG_FUNCTION_ARGS)
/* /*
* offset * offset
*/ */
xrecoff = locationpoint.xrecoff - xlogseg * XLogSegSize; xrecoff = locationpoint.xrecoff % XLogSegSize;
values[1] = UInt32GetDatum(xrecoff); values[1] = UInt32GetDatum(xrecoff);
isnull[1] = false; isnull[1] = false;
@ -354,8 +353,7 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
char *locationstr; char *locationstr;
unsigned int uxlogid; unsigned int uxlogid;
unsigned int uxrecoff; unsigned int uxrecoff;
uint32 xlogid; XLogSegNo xlogsegno;
uint32 xlogseg;
XLogRecPtr locationpoint; XLogRecPtr locationpoint;
char xlogfilename[MAXFNAMELEN]; char xlogfilename[MAXFNAMELEN];
@ -378,8 +376,8 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
locationpoint.xlogid = uxlogid; locationpoint.xlogid = uxlogid;
locationpoint.xrecoff = uxrecoff; locationpoint.xrecoff = uxrecoff;
XLByteToPrevSeg(locationpoint, xlogid, xlogseg); XLByteToPrevSeg(locationpoint, xlogsegno);
XLogFileName(xlogfilename, ThisTimeLineID, xlogid, xlogseg); XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno);
PG_RETURN_TEXT_P(cstring_to_text(xlogfilename)); PG_RETURN_TEXT_P(cstring_to_text(xlogfilename));
} }
@ -514,6 +512,8 @@ pg_xlog_location_diff(PG_FUNCTION_ARGS)
XLogRecPtr loc1, XLogRecPtr loc1,
loc2; loc2;
Numeric result; Numeric result;
uint64 bytes1,
bytes2;
/* /*
* Read and parse input * Read and parse input
@ -533,33 +533,17 @@ pg_xlog_location_diff(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("could not parse transaction log location \"%s\"", str2))); errmsg("could not parse transaction log location \"%s\"", str2)));
/* bytes1 = (((uint64)loc1.xlogid) << 32L) + loc1.xrecoff;
* Sanity check bytes2 = (((uint64)loc2.xlogid) << 32L) + loc2.xrecoff;
*/
if (loc1.xrecoff > XLogFileSize)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("xrecoff \"%X\" is out of valid range, 0..%X", loc1.xrecoff, XLogFileSize)));
if (loc2.xrecoff > XLogFileSize)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("xrecoff \"%X\" is out of valid range, 0..%X", loc2.xrecoff, XLogFileSize)));
/* /*
* result = XLogFileSize * (xlogid1 - xlogid2) + xrecoff1 - xrecoff2 * result = bytes1 - bytes2.
*
* XXX: this won't handle values higher than 2^63 correctly.
*/ */
result = DatumGetNumeric(DirectFunctionCall2(numeric_sub, result = DatumGetNumeric(DirectFunctionCall2(numeric_sub,
DirectFunctionCall1(int8_numeric, Int64GetDatum((int64) loc1.xlogid)), DirectFunctionCall1(int8_numeric, Int64GetDatum((int64) bytes1)),
DirectFunctionCall1(int8_numeric, Int64GetDatum((int64) loc2.xlogid)))); DirectFunctionCall1(int8_numeric, Int64GetDatum((int64) bytes2))));
result = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
DirectFunctionCall1(int8_numeric, Int64GetDatum((int64) XLogFileSize)),
NumericGetDatum(result)));
result = DatumGetNumeric(DirectFunctionCall2(numeric_add,
NumericGetDatum(result),
DirectFunctionCall1(int8_numeric, Int64GetDatum((int64) loc1.xrecoff))));
result = DatumGetNumeric(DirectFunctionCall2(numeric_sub,
NumericGetDatum(result),
DirectFunctionCall1(int8_numeric, Int64GetDatum((int64) loc2.xrecoff))));
PG_RETURN_NUMERIC(result); PG_RETURN_NUMERIC(result);
} }

View File

@ -779,7 +779,7 @@ IsCheckpointOnSchedule(double progress)
{ {
recptr = GetInsertRecPtr(); recptr = GetInsertRecPtr();
elapsed_xlogs = elapsed_xlogs =
(((double) (int32) (recptr.xlogid - ckpt_start_recptr.xlogid)) * XLogSegsPerFile + (((double) ((uint64) (recptr.xlogid - ckpt_start_recptr.xlogid) << 32L)) +
((double) recptr.xrecoff - (double) ckpt_start_recptr.xrecoff) / XLogSegSize) / ((double) recptr.xrecoff - (double) ckpt_start_recptr.xrecoff) / XLogSegSize) /
CheckPointSegments; CheckPointSegments;

View File

@ -221,10 +221,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
* We've left the last tar file "open", so we can now append the * We've left the last tar file "open", so we can now append the
* required WAL files to it. * required WAL files to it.
*/ */
uint32 logid, XLogSegNo logsegno;
logseg; XLogSegNo endlogsegno;
uint32 endlogid,
endlogseg;
struct stat statbuf; struct stat statbuf;
MemSet(&statbuf, 0, sizeof(statbuf)); MemSet(&statbuf, 0, sizeof(statbuf));
@ -236,8 +234,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
statbuf.st_size = XLogSegSize; statbuf.st_size = XLogSegSize;
statbuf.st_mtime = time(NULL); statbuf.st_mtime = time(NULL);
XLByteToSeg(startptr, logid, logseg); XLByteToSeg(startptr, logsegno);
XLByteToPrevSeg(endptr, endlogid, endlogseg); XLByteToPrevSeg(endptr, endlogsegno);
while (true) while (true)
{ {
@ -245,7 +243,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
char fn[MAXPGPATH]; char fn[MAXPGPATH];
int i; int i;
XLogFilePath(fn, ThisTimeLineID, logid, logseg); XLogFilePath(fn, ThisTimeLineID, logsegno);
_tarWriteHeader(fn, NULL, &statbuf); _tarWriteHeader(fn, NULL, &statbuf);
/* Send the actual WAL file contents, block-by-block */ /* Send the actual WAL file contents, block-by-block */
@ -254,8 +252,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
char buf[TAR_SEND_SIZE]; char buf[TAR_SEND_SIZE];
XLogRecPtr ptr; XLogRecPtr ptr;
ptr.xlogid = logid; XLogSegNoOffsetToRecPtr(logsegno, TAR_SEND_SIZE * i, ptr);
ptr.xrecoff = logseg * XLogSegSize + TAR_SEND_SIZE * i;
/* /*
* Some old compilers, e.g. gcc 2.95.3/x86, think that passing * Some old compilers, e.g. gcc 2.95.3/x86, think that passing
@ -277,11 +274,10 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* Advance to the next WAL file */ /* Advance to the next WAL file */
NextLogSeg(logid, logseg); logsegno++;
/* Have we reached our stop position yet? */ /* Have we reached our stop position yet? */
if (logid > endlogid || if (logsegno > endlogsegno)
(logid == endlogid && logseg > endlogseg))
break; break;
} }

View File

@ -69,11 +69,12 @@ walrcv_disconnect_type walrcv_disconnect = NULL;
/* /*
* These variables are used similarly to openLogFile/Id/Seg/Off, * These variables are used similarly to openLogFile/Id/Seg/Off,
* but for walreceiver to write the XLOG. * but for walreceiver to write the XLOG. recvFileTLI is the TimeLineID
* corresponding the filename of recvFile, used for error messages.
*/ */
static int recvFile = -1; static int recvFile = -1;
static uint32 recvId = 0; static TimeLineID recvFileTLI = -1;
static uint32 recvSeg = 0; static XLogSegNo recvSegNo = 0;
static uint32 recvOff = 0; static uint32 recvOff = 0;
/* /*
@ -481,7 +482,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
{ {
int segbytes; int segbytes;
if (recvFile < 0 || !XLByteInSeg(recptr, recvId, recvSeg)) if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo))
{ {
bool use_existent; bool use_existent;
@ -501,15 +502,16 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
if (close(recvFile) != 0) if (close(recvFile) != 0)
ereport(PANIC, ereport(PANIC,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not close log file %u, segment %u: %m", errmsg("could not close log segment %s: %m",
recvId, recvSeg))); XLogFileNameP(recvFileTLI, recvSegNo))));
} }
recvFile = -1; recvFile = -1;
/* Create/use new log file */ /* Create/use new log file */
XLByteToSeg(recptr, recvId, recvSeg); XLByteToSeg(recptr, recvSegNo);
use_existent = true; use_existent = true;
recvFile = XLogFileInit(recvId, recvSeg, &use_existent, true); recvFile = XLogFileInit(recvSegNo, &use_existent, true);
recvFileTLI = ThisTimeLineID;
recvOff = 0; recvOff = 0;
} }
@ -527,9 +529,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
if (lseek(recvFile, (off_t) startoff, SEEK_SET) < 0) if (lseek(recvFile, (off_t) startoff, SEEK_SET) < 0)
ereport(PANIC, ereport(PANIC,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not seek in log file %u, " errmsg("could not seek in log segment %s, to offset %u: %m",
"segment %u to offset %u: %m", XLogFileNameP(recvFileTLI, recvSegNo),
recvId, recvSeg, startoff))); startoff)));
recvOff = startoff; recvOff = startoff;
} }
@ -544,9 +546,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
errno = ENOSPC; errno = ENOSPC;
ereport(PANIC, ereport(PANIC,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write to log file %u, segment %u " errmsg("could not write to log segment %s "
"at offset %u, length %lu: %m", "at offset %u, length %lu: %m",
recvId, recvSeg, XLogFileNameP(recvFileTLI, recvSegNo),
recvOff, (unsigned long) segbytes))); recvOff, (unsigned long) segbytes)));
} }
@ -575,7 +577,7 @@ XLogWalRcvFlush(bool dying)
/* use volatile pointer to prevent code rearrangement */ /* use volatile pointer to prevent code rearrangement */
volatile WalRcvData *walrcv = WalRcv; volatile WalRcvData *walrcv = WalRcv;
issue_xlog_fsync(recvFile, recvId, recvSeg); issue_xlog_fsync(recvFile, recvSegNo);
LogstreamResult.Flush = LogstreamResult.Write; LogstreamResult.Flush = LogstreamResult.Write;

View File

@ -87,8 +87,7 @@ int replication_timeout = 60 * 1000; /* maximum time to send one
* but for walsender to read the XLOG. * but for walsender to read the XLOG.
*/ */
static int sendFile = -1; static int sendFile = -1;
static uint32 sendId = 0; static XLogSegNo sendSegNo = 0;
static uint32 sendSeg = 0;
static uint32 sendOff = 0; static uint32 sendOff = 0;
/* /*
@ -977,10 +976,8 @@ XLogRead(char *buf, XLogRecPtr startptr, Size count)
char *p; char *p;
XLogRecPtr recptr; XLogRecPtr recptr;
Size nbytes; Size nbytes;
uint32 lastRemovedLog; XLogSegNo lastRemovedSegNo;
uint32 lastRemovedSeg; XLogSegNo segno;
uint32 log;
uint32 seg;
retry: retry:
p = buf; p = buf;
@ -995,7 +992,7 @@ retry:
startoff = recptr.xrecoff % XLogSegSize; startoff = recptr.xrecoff % XLogSegSize;
if (sendFile < 0 || !XLByteInSeg(recptr, sendId, sendSeg)) if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo))
{ {
char path[MAXPGPATH]; char path[MAXPGPATH];
@ -1003,8 +1000,8 @@ retry:
if (sendFile >= 0) if (sendFile >= 0)
close(sendFile); close(sendFile);
XLByteToSeg(recptr, sendId, sendSeg); XLByteToSeg(recptr, sendSegNo);
XLogFilePath(path, ThisTimeLineID, sendId, sendSeg); XLogFilePath(path, ThisTimeLineID, sendSegNo);
sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
if (sendFile < 0) if (sendFile < 0)
@ -1015,20 +1012,15 @@ retry:
* removed or recycled. * removed or recycled.
*/ */
if (errno == ENOENT) if (errno == ENOENT)
{
char filename[MAXFNAMELEN];
XLogFileName(filename, ThisTimeLineID, sendId, sendSeg);
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("requested WAL segment %s has already been removed", errmsg("requested WAL segment %s has already been removed",
filename))); XLogFileNameP(ThisTimeLineID, sendSegNo))));
}
else else
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not open file \"%s\" (log file %u, segment %u): %m", errmsg("could not open file \"%s\": %m",
path, sendId, sendSeg))); path)));
} }
sendOff = 0; sendOff = 0;
} }
@ -1039,8 +1031,9 @@ retry:
if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0) if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not seek in log file %u, segment %u to offset %u: %m", errmsg("could not seek in log segment %s to offset %u: %m",
sendId, sendSeg, startoff))); XLogFileNameP(ThisTimeLineID, sendSegNo),
startoff)));
sendOff = startoff; sendOff = startoff;
} }
@ -1052,11 +1045,13 @@ retry:
readbytes = read(sendFile, p, segbytes); readbytes = read(sendFile, p, segbytes);
if (readbytes <= 0) if (readbytes <= 0)
{
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read from log file %u, segment %u, offset %u, " errmsg("could not read from log segment %s, offset %u, length %lu: %m",
"length %lu: %m", XLogFileNameP(ThisTimeLineID, sendSegNo),
sendId, sendSeg, sendOff, (unsigned long) segbytes))); sendOff, (unsigned long) segbytes)));
}
/* Update state for read */ /* Update state for read */
XLByteAdvance(recptr, readbytes); XLByteAdvance(recptr, readbytes);
@ -1073,19 +1068,13 @@ retry:
* read() succeeds in that case, but the data we tried to read might * read() succeeds in that case, but the data we tried to read might
* already have been overwritten with new WAL records. * already have been overwritten with new WAL records.
*/ */
XLogGetLastRemoved(&lastRemovedLog, &lastRemovedSeg); XLogGetLastRemoved(&lastRemovedSegNo);
XLByteToSeg(startptr, log, seg); XLByteToSeg(startptr, segno);
if (log < lastRemovedLog || if (segno <= lastRemovedSegNo)
(log == lastRemovedLog && seg <= lastRemovedSeg))
{
char filename[MAXFNAMELEN];
XLogFileName(filename, ThisTimeLineID, log, seg);
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("requested WAL segment %s has already been removed", errmsg("requested WAL segment %s has already been removed",
filename))); XLogFileNameP(ThisTimeLineID, segno))));
}
/* /*
* During recovery, the currently-open WAL file might be replaced with the * During recovery, the currently-open WAL file might be replaced with the
@ -1165,24 +1154,13 @@ XLogSend(char *msgbuf, bool *caughtup)
* SendRqstPtr never points to the middle of a WAL record. * SendRqstPtr never points to the middle of a WAL record.
*/ */
startptr = sentPtr; startptr = sentPtr;
if (startptr.xrecoff >= XLogFileSize)
{
/*
* crossing a logid boundary, skip the non-existent last log segment
* in previous logical log file.
*/
startptr.xlogid += 1;
startptr.xrecoff = 0;
}
endptr = startptr; endptr = startptr;
XLByteAdvance(endptr, MAX_SEND_SIZE); XLByteAdvance(endptr, MAX_SEND_SIZE);
if (endptr.xlogid != startptr.xlogid) if (endptr.xlogid != startptr.xlogid)
{ {
/* Don't cross a logfile boundary within one message */ /* Don't cross a logfile boundary within one message */
Assert(endptr.xlogid == startptr.xlogid + 1); Assert(endptr.xlogid == startptr.xlogid + 1);
endptr.xlogid = startptr.xlogid; endptr.xrecoff = 0;
endptr.xrecoff = XLogFileSize;
} }
/* if we went beyond SendRqstPtr, back off */ /* if we went beyond SendRqstPtr, back off */
@ -1198,7 +1176,10 @@ XLogSend(char *msgbuf, bool *caughtup)
*caughtup = false; *caughtup = false;
} }
nbytes = endptr.xrecoff - startptr.xrecoff; if (endptr.xrecoff == 0)
nbytes = 0x100000000L - (uint64) startptr.xrecoff;
else
nbytes = endptr.xrecoff - startptr.xrecoff;
Assert(nbytes <= MAX_SEND_SIZE); Assert(nbytes <= MAX_SEND_SIZE);
/* /*

View File

@ -102,8 +102,7 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
struct dirent *dirent; struct dirent *dirent;
int i; int i;
bool b; bool b;
uint32 high_log = 0; XLogSegNo high_segno = 0;
uint32 high_seg = 0;
dir = opendir(basedir); dir = opendir(basedir);
if (dir == NULL) if (dir == NULL)
@ -117,9 +116,10 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
{ {
char fullpath[MAXPGPATH]; char fullpath[MAXPGPATH];
struct stat statbuf; struct stat statbuf;
uint32 tli, uint32 tli;
log, unsigned int log,
seg; seg;
XLogSegNo segno;
if (strcmp(dirent->d_name, ".") == 0 || strcmp(dirent->d_name, "..") == 0) if (strcmp(dirent->d_name, ".") == 0 || strcmp(dirent->d_name, "..") == 0)
continue; continue;
@ -151,6 +151,7 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
progname, dirent->d_name); progname, dirent->d_name);
disconnect_and_exit(1); disconnect_and_exit(1);
} }
segno = ((uint64) log) << 32 | seg;
/* Ignore any files that are for another timeline */ /* Ignore any files that are for another timeline */
if (tli != currenttimeline) if (tli != currenttimeline)
@ -168,11 +169,9 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
if (statbuf.st_size == XLOG_SEG_SIZE) if (statbuf.st_size == XLOG_SEG_SIZE)
{ {
/* Completed segment */ /* Completed segment */
if (log > high_log || if (segno > high_segno)
(log == high_log && seg > high_seg))
{ {
high_log = log; high_segno = segno;
high_seg = seg;
continue; continue;
} }
} }
@ -186,7 +185,7 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
closedir(dir); closedir(dir);
if (high_log > 0 || high_seg > 0) if (high_segno > 0)
{ {
XLogRecPtr high_ptr; XLogRecPtr high_ptr;
@ -194,10 +193,9 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
* Move the starting pointer to the start of the next segment, since * Move the starting pointer to the start of the next segment, since
* the highest one we've seen was completed. * the highest one we've seen was completed.
*/ */
NextLogSeg(high_log, high_seg); high_segno++;
high_ptr.xlogid = high_log; XLogSegNoOffsetToRecPtr(high_segno, 0, high_ptr);
high_ptr.xrecoff = high_seg * XLOG_SEG_SIZE;
return high_ptr; return high_ptr;
} }

View File

@ -55,9 +55,10 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, char *namebu
struct stat statbuf; struct stat statbuf;
char *zerobuf; char *zerobuf;
int bytes; int bytes;
XLogSegNo segno;
XLogFileName(namebuf, timeline, startpoint.xlogid, XLByteToSeg(startpoint, segno);
startpoint.xrecoff / XLOG_SEG_SIZE); XLogFileName(namebuf, timeline, segno);
snprintf(fn, sizeof(fn), "%s/%s.partial", basedir, namebuf); snprintf(fn, sizeof(fn), "%s/%s.partial", basedir, namebuf);
f = open(fn, O_WRONLY | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR); f = open(fn, O_WRONLY | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR);

View File

@ -60,8 +60,7 @@ extern char *optarg;
static ControlFileData ControlFile; /* pg_control values */ static ControlFileData ControlFile; /* pg_control values */
static uint32 newXlogId, static XLogSegNo newXlogSegNo; /* new XLOG segment # */
newXlogSeg; /* ID/Segment of new XLOG segment */
static bool guessed = false; /* T if we had to guess at any values */ static bool guessed = false; /* T if we had to guess at any values */
static const char *progname; static const char *progname;
@ -87,12 +86,9 @@ main(int argc, char *argv[])
Oid set_oid = 0; Oid set_oid = 0;
MultiXactId set_mxid = 0; MultiXactId set_mxid = 0;
MultiXactOffset set_mxoff = (MultiXactOffset) -1; MultiXactOffset set_mxoff = (MultiXactOffset) -1;
uint32 minXlogTli = 0, uint32 minXlogTli = 0;
minXlogId = 0, XLogSegNo minXlogSegNo = 0;
minXlogSeg = 0;
char *endptr; char *endptr;
char *endptr2;
char *endptr3;
char *DataDir; char *DataDir;
int fd; int fd;
char path[MAXPGPATH]; char path[MAXPGPATH];
@ -204,27 +200,13 @@ main(int argc, char *argv[])
break; break;
case 'l': case 'l':
minXlogTli = strtoul(optarg, &endptr, 0); if (strspn(optarg, "01234567890ABCDEFabcdef") != 24)
if (endptr == optarg || *endptr != ',')
{
fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
minXlogId = strtoul(endptr + 1, &endptr2, 0);
if (endptr2 == endptr + 1 || *endptr2 != ',')
{
fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
minXlogSeg = strtoul(endptr2 + 1, &endptr3, 0);
if (endptr3 == endptr2 + 1 || *endptr3 != '\0')
{ {
fprintf(stderr, _("%s: invalid argument for option -l\n"), progname); fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1); exit(1);
} }
XLogFromFileName(optarg, &minXlogTli, &minXlogSegNo);
break; break;
default: default:
@ -295,7 +277,7 @@ main(int argc, char *argv[])
GuessControlValues(); GuessControlValues();
/* /*
* Also look at existing segment files to set up newXlogId/newXlogSeg * Also look at existing segment files to set up newXlogSegNo
*/ */
FindEndOfXLOG(); FindEndOfXLOG();
@ -335,13 +317,8 @@ main(int argc, char *argv[])
if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID) if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli; ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
if (minXlogId > newXlogId || if (minXlogSegNo > newXlogSegNo)
(minXlogId == newXlogId && newXlogSegNo = minXlogSegNo;
minXlogSeg > newXlogSeg))
{
newXlogId = minXlogId;
newXlogSeg = minXlogSeg;
}
/* /*
* If we had to guess anything, and -f was not given, just print the * If we had to guess anything, and -f was not given, just print the
@ -545,6 +522,7 @@ static void
PrintControlValues(bool guessed) PrintControlValues(bool guessed)
{ {
char sysident_str[32]; char sysident_str[32];
char fname[MAXFNAMELEN];
if (guessed) if (guessed)
printf(_("Guessed pg_control values:\n\n")); printf(_("Guessed pg_control values:\n\n"));
@ -558,10 +536,10 @@ PrintControlValues(bool guessed)
snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT, snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
ControlFile.system_identifier); ControlFile.system_identifier);
printf(_("First log file ID after reset: %u\n"), XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo);
newXlogId);
printf(_("First log file segment after reset: %u\n"), printf(_("First log segment after reset: %s\n"),
newXlogSeg); fname);
printf(_("pg_control version number: %u\n"), printf(_("pg_control version number: %u\n"),
ControlFile.pg_control_version); ControlFile.pg_control_version);
printf(_("Catalog version number: %u\n"), printf(_("Catalog version number: %u\n"),
@ -624,11 +602,10 @@ RewriteControlFile(void)
/* /*
* Adjust fields as needed to force an empty XLOG starting at * Adjust fields as needed to force an empty XLOG starting at
* newXlogId/newXlogSeg. * newXlogSegNo.
*/ */
ControlFile.checkPointCopy.redo.xlogid = newXlogId; XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD,
ControlFile.checkPointCopy.redo.xrecoff = ControlFile.checkPointCopy.redo);
newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
ControlFile.checkPointCopy.time = (pg_time_t) time(NULL); ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
ControlFile.state = DB_SHUTDOWNED; ControlFile.state = DB_SHUTDOWNED;
@ -728,14 +705,17 @@ FindEndOfXLOG(void)
{ {
DIR *xldir; DIR *xldir;
struct dirent *xlde; struct dirent *xlde;
uint64 segs_per_xlogid;
uint64 xlogbytepos;
/* /*
* Initialize the max() computation using the last checkpoint address from * Initialize the max() computation using the last checkpoint address from
* old pg_control. Note that for the moment we are working with segment * old pg_control. Note that for the moment we are working with segment
* numbering according to the old xlog seg size. * numbering according to the old xlog seg size.
*/ */
newXlogId = ControlFile.checkPointCopy.redo.xlogid; segs_per_xlogid = (0x100000000L / ControlFile.xlog_seg_size);
newXlogSeg = ControlFile.checkPointCopy.redo.xrecoff / ControlFile.xlog_seg_size; newXlogSegNo = ((uint64) ControlFile.checkPointCopy.redo.xlogid) * segs_per_xlogid
+ (ControlFile.checkPointCopy.redo.xrecoff / ControlFile.xlog_seg_size);
/* /*
* Scan the pg_xlog directory to find existing WAL segment files. We * Scan the pg_xlog directory to find existing WAL segment files. We
@ -759,8 +739,10 @@ FindEndOfXLOG(void)
unsigned int tli, unsigned int tli,
log, log,
seg; seg;
XLogSegNo segno;
sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg); sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
segno = ((uint64) log) * segs_per_xlogid + seg;
/* /*
* Note: we take the max of all files found, regardless of their * Note: we take the max of all files found, regardless of their
@ -768,12 +750,8 @@ FindEndOfXLOG(void)
* timelines other than the target TLI, but this seems safer. * timelines other than the target TLI, but this seems safer.
* Better too large a result than too small... * Better too large a result than too small...
*/ */
if (log > newXlogId || if (segno > newXlogSegNo)
(log == newXlogId && seg > newXlogSeg)) newXlogSegNo = segno;
{
newXlogId = log;
newXlogSeg = seg;
}
} }
errno = 0; errno = 0;
} }
@ -799,11 +777,9 @@ FindEndOfXLOG(void)
* Finally, convert to new xlog seg size, and advance by one to ensure we * Finally, convert to new xlog seg size, and advance by one to ensure we
* are in virgin territory. * are in virgin territory.
*/ */
newXlogSeg *= ControlFile.xlog_seg_size; xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size;
newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize; newXlogSegNo = (xlogbytepos + XLogSegSize - 1) / XLogSegSize;
newXlogSegNo++;
/* be sure we wrap around correctly at end of a logfile */
NextLogSeg(newXlogId, newXlogSeg);
} }
@ -972,8 +948,7 @@ WriteEmptyXLOG(void)
record->xl_crc = crc; record->xl_crc = crc;
/* Write the first page */ /* Write the first page */
XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo);
newXlogId, newXlogSeg);
unlink(path); unlink(path);

View File

@ -267,12 +267,10 @@ extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern void XLogFlush(XLogRecPtr RecPtr); extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogBackgroundFlush(void); extern bool XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern int XLogFileInit(uint32 log, uint32 seg, extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
bool *use_existent, bool use_lock); extern int XLogFileOpen(XLogSegNo segno);
extern int XLogFileOpen(uint32 log, uint32 seg);
extern void XLogGetLastRemoved(XLogSegNo *segno);
extern void XLogGetLastRemoved(uint32 *log, uint32 *seg);
extern void XLogSetAsyncXactLSN(XLogRecPtr record); extern void XLogSetAsyncXactLSN(XLogRecPtr record);
extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup); extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup);
@ -280,7 +278,7 @@ extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup);
extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record); extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec); extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
extern void issue_xlog_fsync(int fd, uint32 log, uint32 seg); extern void issue_xlog_fsync(int fd, XLogSegNo segno);
extern bool RecoveryInProgress(void); extern bool RecoveryInProgress(void);
extern bool HotStandbyActive(void); extern bool HotStandbyActive(void);
@ -294,6 +292,7 @@ extern bool RecoveryIsPaused(void);
extern void SetRecoveryPause(bool recoveryPause); extern void SetRecoveryPause(bool recoveryPause);
extern TimestampTz GetLatestXTime(void); extern TimestampTz GetLatestXTime(void);
extern TimestampTz GetCurrentChunkReplayStartTime(void); extern TimestampTz GetCurrentChunkReplayStartTime(void);
extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
extern void UpdateControlFile(void); extern void UpdateControlFile(void);
extern uint64 GetSystemIdentifier(void); extern uint64 GetSystemIdentifier(void);

View File

@ -71,7 +71,7 @@ typedef struct XLogContRecord
/* /*
* Each page of XLOG file has a header like this: * Each page of XLOG file has a header like this:
*/ */
#define XLOG_PAGE_MAGIC 0xD071 /* can be used as WAL version indicator */ #define XLOG_PAGE_MAGIC 0xD072 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData typedef struct XLogPageHeaderData
{ {
@ -115,55 +115,27 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
(((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD) (((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD)
/* /*
* We break each logical log file (xlogid value) into segment files of the * The XLOG is split into WAL segments (physical files) of the size indicated
* size indicated by XLOG_SEG_SIZE. One possible segment at the end of each * by XLOG_SEG_SIZE.
* log file is wasted, to ensure that we don't have problems representing
* last-byte-position-plus-1.
*/ */
#define XLogSegSize ((uint32) XLOG_SEG_SIZE) #define XLogSegSize ((uint32) XLOG_SEG_SIZE)
#define XLogSegsPerFile (((uint32) 0xffffffff) / XLogSegSize) #define XLogSegmentsPerXLogId (0x100000000L / XLOG_SEG_SIZE)
#define XLogFileSize (XLogSegsPerFile * XLogSegSize)
#define XLogSegNoOffsetToRecPtr(segno, offset, dest) \
do { \
(dest).xlogid = (segno) / XLogSegmentsPerXLogId; \
(dest).xrecoff = ((segno) % XLogSegmentsPerXLogId) * XLOG_SEG_SIZE + (offset); \
} while (0)
/* /*
* Macros for manipulating XLOG pointers * Macros for manipulating XLOG pointers
*/ */
/* Increment an xlogid/segment pair */
#define NextLogSeg(logId, logSeg) \
do { \
if ((logSeg) >= XLogSegsPerFile-1) \
{ \
(logId)++; \
(logSeg) = 0; \
} \
else \
(logSeg)++; \
} while (0)
/* Decrement an xlogid/segment pair (assume it's not 0,0) */
#define PrevLogSeg(logId, logSeg) \
do { \
if (logSeg) \
(logSeg)--; \
else \
{ \
(logId)--; \
(logSeg) = XLogSegsPerFile-1; \
} \
} while (0)
/* Align a record pointer to next page */ /* Align a record pointer to next page */
#define NextLogPage(recptr) \ #define NextLogPage(recptr) \
do { \ do { \
if ((recptr).xrecoff % XLOG_BLCKSZ != 0) \ if ((recptr).xrecoff % XLOG_BLCKSZ != 0) \
(recptr).xrecoff += \ XLByteAdvance(recptr, (XLOG_BLCKSZ - (recptr).xrecoff % XLOG_BLCKSZ)); \
(XLOG_BLCKSZ - (recptr).xrecoff % XLOG_BLCKSZ); \
if ((recptr).xrecoff >= XLogFileSize) \
{ \
((recptr).xlogid)++; \
(recptr).xrecoff = 0; \
} \
} while (0) } while (0)
/* /*
@ -175,14 +147,11 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
* for example. (We can assume xrecoff is not zero, since no valid recptr * for example. (We can assume xrecoff is not zero, since no valid recptr
* can have that.) * can have that.)
*/ */
#define XLByteToSeg(xlrp, logId, logSeg) \ #define XLByteToSeg(xlrp, logSegNo) \
( logId = (xlrp).xlogid, \ logSegNo = ((uint64) (xlrp).xlogid * XLogSegmentsPerXLogId) + (xlrp).xrecoff / XLogSegSize
logSeg = (xlrp).xrecoff / XLogSegSize \
) #define XLByteToPrevSeg(xlrp, logSegNo) \
#define XLByteToPrevSeg(xlrp, logId, logSeg) \ logSegNo = ((uint64) (xlrp).xlogid * XLogSegmentsPerXLogId) + ((xlrp).xrecoff - 1) / XLogSegSize
( logId = (xlrp).xlogid, \
logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
)
/* /*
* Is an XLogRecPtr within a particular XLOG segment? * Is an XLogRecPtr within a particular XLOG segment?
@ -190,13 +159,16 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
* For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg, * For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg,
* a boundary byte is taken to be in the previous segment. * a boundary byte is taken to be in the previous segment.
*/ */
#define XLByteInSeg(xlrp, logId, logSeg) \ #define XLByteInSeg(xlrp, logSegNo) \
((xlrp).xlogid == (logId) && \ (((xlrp).xlogid) == (logSegNo) / XLogSegmentsPerXLogId && \
(xlrp).xrecoff / XLogSegSize == (logSeg)) ((xlrp).xrecoff / XLogSegSize) == (logSegNo) % XLogSegmentsPerXLogId)
#define XLByteInPrevSeg(xlrp, logId, logSeg) \ #define XLByteInPrevSeg(xlrp, logSegNo) \
((xlrp).xlogid == (logId) && \ (((xlrp).xrecoff == 0) ? \
((xlrp).xrecoff - 1) / XLogSegSize == (logSeg)) (((xlrp).xlogid - 1) == (logSegNo) / XLogSegmentsPerXLogId && \
((uint32) 0xffffffff) / XLogSegSize == (logSegNo) % XLogSegmentsPerXLogId) : \
((xlrp).xlogid) == (logSegNo) / XLogSegmentsPerXLogId && \
(((xlrp).xrecoff - 1) / XLogSegSize) == (logSegNo) % XLogSegmentsPerXLogId)
/* Check if an xrecoff value is in a plausible range */ /* Check if an xrecoff value is in a plausible range */
#define XRecOffIsValid(xrecoff) \ #define XRecOffIsValid(xrecoff) \
@ -215,14 +187,23 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
*/ */
#define MAXFNAMELEN 64 #define MAXFNAMELEN 64
#define XLogFileName(fname, tli, log, seg) \ #define XLogFileName(fname, tli, logSegNo) \
snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, log, seg) snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, \
(uint32) ((logSegNo) / XLogSegmentsPerXLogId), \
(uint32) ((logSegNo) % XLogSegmentsPerXLogId))
#define XLogFromFileName(fname, tli, log, seg) \ #define XLogFromFileName(fname, tli, logSegNo) \
sscanf(fname, "%08X%08X%08X", tli, log, seg) do { \
uint32 log; \
uint32 seg; \
sscanf(fname, "%08X%08X%08X", tli, &log, &seg); \
*logSegNo = (uint64) log * XLogSegmentsPerXLogId + seg; \
} while (0)
#define XLogFilePath(path, tli, log, seg) \ #define XLogFilePath(path, tli, logSegNo) \
snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X", tli, log, seg) snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X", tli, \
(uint32) ((logSegNo) / XLogSegmentsPerXLogId), \
(uint32) ((logSegNo) % XLogSegmentsPerXLogId))
#define TLHistoryFileName(fname, tli) \ #define TLHistoryFileName(fname, tli) \
snprintf(fname, MAXFNAMELEN, "%08X.history", tli) snprintf(fname, MAXFNAMELEN, "%08X.history", tli)
@ -233,11 +214,15 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
#define StatusFilePath(path, xlog, suffix) \ #define StatusFilePath(path, xlog, suffix) \
snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix) snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix)
#define BackupHistoryFileName(fname, tli, log, seg, offset) \ #define BackupHistoryFileName(fname, tli, logSegNo, offset) \
snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, log, seg, offset) snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, \
(uint32) ((logSegNo) / XLogSegmentsPerXLogId), \
(uint32) ((logSegNo) % XLogSegmentsPerXLogId), offset)
#define BackupHistoryFilePath(path, tli, log, seg, offset) \ #define BackupHistoryFilePath(path, tli, logSegNo, offset) \
snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X.%08X.backup", tli, log, seg, offset) snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X.%08X.backup", tli, \
(uint32) ((logSegNo) / XLogSegmentsPerXLogId), \
(uint32) ((logSegNo) % XLogSegmentsPerXLogId), offset)
/* /*

View File

@ -61,16 +61,16 @@ typedef struct XLogRecPtr
*/ */
#define XLByteAdvance(recptr, nbytes) \ #define XLByteAdvance(recptr, nbytes) \
do { \ do { \
if (recptr.xrecoff + nbytes >= XLogFileSize) \ uint32 oldxrecoff = (recptr).xrecoff; \
{ \ (recptr).xrecoff += nbytes; \
recptr.xlogid += 1; \ if ((recptr).xrecoff < oldxrecoff) \
recptr.xrecoff \ (recptr).xlogid += 1; /* xrecoff wrapped around */ \
= recptr.xrecoff + nbytes - XLogFileSize; \
} \
else \
recptr.xrecoff += nbytes; \
} while (0) } while (0)
/*
* XLogSegNo - physical log file sequence number.
*/
typedef uint64 XLogSegNo;
/* /*
* TimeLineID (TLI) - identifies different database histories to prevent * TimeLineID (TLI) - identifies different database histories to prevent