Move WAL continuation record information to WAL page header.

The continuation record only contained one field, xl_rem_len, so it makes
things simpler to just include it in the WAL page header. This wastes four
bytes on pages that don't begin with a continuation from previos page, plus
four bytes on every page, because of padding.

The motivation of this is to make it easier to calculate how much space a
WAL record needs. Before this patch, it depended on how many page boundaries
the record crosses. The motivation of that, in turn, is to separate the
allocation of space in the WAL from the copying of the record data to the
allocated space. Keeping the calculation of space required simple helps to
keep the critical section of allocating the space from WAL short. But that's
not included in this patch yet.

Bump WAL version number again, as this is an incompatible change.
This commit is contained in:
Heikki Linnakangas 2012-06-24 18:15:00 +03:00
parent dfda6ebaec
commit 20ba5ca64c
2 changed files with 28 additions and 38 deletions

View File

@ -696,7 +696,6 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
{
XLogCtlInsert *Insert = &XLogCtl->Insert;
XLogRecord *record;
XLogContRecord *contrecord;
XLogRecPtr RecPtr;
XLogRecPtr WriteRqst;
uint32 freespace;
@ -1085,9 +1084,7 @@ begin:;
curridx = Insert->curridx;
/* Insert cont-record header */
Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD;
contrecord = (XLogContRecord *) Insert->currpos;
contrecord->xl_rem_len = write_len;
Insert->currpos += SizeOfXLogContRecord;
Insert->currpage->xlp_rem_len = write_len;
freespace = INSERT_FREESPACE(Insert);
}
@ -3941,7 +3938,8 @@ retry:
if (total_len > len)
{
/* Need to reassemble record */
XLogContRecord *contrecord;
char *contrecord;
XLogPageHeader pageHeader;
XLogRecPtr pagelsn;
uint32 gotlen = len;
@ -3969,30 +3967,30 @@ retry:
readOff)));
goto next_record_is_invalid;
}
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
contrecord = (XLogContRecord *) ((char *) readBuf + pageHeaderSize);
if (contrecord->xl_rem_len == 0 ||
total_len != (contrecord->xl_rem_len + gotlen))
pageHeader = (XLogPageHeader) readBuf;
pageHeaderSize = XLogPageHeaderSize(pageHeader);
contrecord = (char *) readBuf + pageHeaderSize;
if (pageHeader->xlp_rem_len == 0 ||
total_len != (pageHeader->xlp_rem_len + gotlen))
{
char fname[MAXFNAMELEN];
XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, *RecPtr),
(errmsg("invalid contrecord length %u in log segment %s, offset %u",
contrecord->xl_rem_len,
pageHeader->xlp_rem_len,
XLogFileNameP(curFileTLI, readSegNo),
readOff)));
goto next_record_is_invalid;
}
len = XLOG_BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
if (contrecord->xl_rem_len > len)
len = XLOG_BLCKSZ - pageHeaderSize;
if (pageHeader->xlp_rem_len > len)
{
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
memcpy(buffer, (char *) contrecord, len);
gotlen += len;
buffer += len;
continue;
}
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord,
contrecord->xl_rem_len);
memcpy(buffer, (char *) contrecord, pageHeader->xlp_rem_len);
break;
}
if (!RecordIsValid(record, *RecPtr, emode))
@ -4000,8 +3998,7 @@ retry:
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
XLogSegNoOffsetToRecPtr(
readSegNo,
readOff + pageHeaderSize +
MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len),
readOff + pageHeaderSize + MAXALIGN(pageHeader->xlp_rem_len),
EndRecPtr);
ReadRecPtr = *RecPtr;
/* needn't worry about XLOG SWITCH, it can't cross page boundaries */

View File

@ -48,30 +48,10 @@ typedef struct BkpBlock
/* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
} BkpBlock;
/*
* When there is not enough space on current page for whole record, we
* continue on the next page with continuation record. (However, the
* XLogRecord header will never be split across pages; if there's less than
* SizeOfXLogRecord space left at the end of a page, we just waste it.)
*
* Note that xl_rem_len includes backup-block data; that is, it tracks
* xl_tot_len not xl_len in the initial header. Also note that the
* continuation data isn't necessarily aligned.
*/
typedef struct XLogContRecord
{
uint32 xl_rem_len; /* total len of remaining data for record */
/* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
} XLogContRecord;
#define SizeOfXLogContRecord sizeof(XLogContRecord)
/*
* Each page of XLOG file has a header like this:
*/
#define XLOG_PAGE_MAGIC 0xD072 /* can be used as WAL version indicator */
#define XLOG_PAGE_MAGIC 0xD073 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{
@ -79,6 +59,19 @@ typedef struct XLogPageHeaderData
uint16 xlp_info; /* flag bits, see below */
TimeLineID xlp_tli; /* TimeLineID of first record on page */
XLogRecPtr xlp_pageaddr; /* XLOG address of this page */
/*
* When there is not enough space on current page for whole record, we
* continue on the next page. xlp_rem_len is the number of bytes
* remaining from a previous page. (However, the XLogRecord header will
* never be split across pages; if there's less than SizeOfXLogRecord
* space left at the end of a page, we just waste it.)
*
* Note that xl_rem_len includes backup-block data; that is, it tracks
* xl_tot_len not xl_len in the initial header. Also note that the
* continuation data isn't necessarily aligned.
*/
uint32 xlp_rem_len; /* total len of remaining data for record */
} XLogPageHeaderData;
#define SizeOfXLogShortPHD MAXALIGN(sizeof(XLogPageHeaderData))