Revamp the WAL record format.

Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.

There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.

This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.

For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.

The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.

Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
This commit is contained in:
Heikki Linnakangas 2014-11-20 17:56:26 +02:00
parent 8dc626defe
commit 2c03216d83
93 changed files with 3945 additions and 4366 deletions

View File

@ -17,6 +17,7 @@
#include "access/xlogreader.h" #include "access/xlogreader.h"
#include "access/xlogrecord.h" #include "access/xlogrecord.h"
#include "access/xlog_internal.h"
#include "access/transam.h" #include "access/transam.h"
#include "common/fe_memutils.h" #include "common/fe_memutils.h"
#include "getopt_long.h" #include "getopt_long.h"
@ -343,90 +344,117 @@ XLogDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
* Store per-rmgr and per-record statistics for a given record. * Store per-rmgr and per-record statistics for a given record.
*/ */
static void static void
XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats, XLogRecPtr ReadRecPtr, XLogRecord *record) XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats,
XLogReaderState *record)
{ {
RmgrId rmid; RmgrId rmid;
uint8 recid; uint8 recid;
uint32 rec_len;
uint32 fpi_len;
stats->count++; stats->count++;
/* Update per-rmgr statistics */ /* Update per-rmgr statistics */
rmid = record->xl_rmid; rmid = XLogRecGetRmid(record);
rec_len = XLogRecGetDataLen(record) + SizeOfXLogRecord;
fpi_len = record->decoded_record->xl_tot_len - rec_len;
stats->rmgr_stats[rmid].count++; stats->rmgr_stats[rmid].count++;
stats->rmgr_stats[rmid].rec_len += stats->rmgr_stats[rmid].rec_len += rec_len;
record->xl_len + SizeOfXLogRecord; stats->rmgr_stats[rmid].fpi_len += fpi_len;
stats->rmgr_stats[rmid].fpi_len +=
record->xl_tot_len - (record->xl_len + SizeOfXLogRecord);
/* /*
* Update per-record statistics, where the record is identified by a * Update per-record statistics, where the record is identified by a
* combination of the RmgrId and the four bits of the xl_info field * combination of the RmgrId and the four bits of the xl_info field that
* that are the rmgr's domain (resulting in sixteen possible entries * are the rmgr's domain (resulting in sixteen possible entries per
* per RmgrId). * RmgrId).
*/ */
recid = record->xl_info >> 4; recid = XLogRecGetInfo(record) >> 4;
stats->record_stats[rmid][recid].count++; stats->record_stats[rmid][recid].count++;
stats->record_stats[rmid][recid].rec_len += stats->record_stats[rmid][recid].rec_len += rec_len;
record->xl_len + SizeOfXLogRecord; stats->record_stats[rmid][recid].fpi_len += fpi_len;
stats->record_stats[rmid][recid].fpi_len +=
record->xl_tot_len - (record->xl_len + SizeOfXLogRecord);
} }
/* /*
* Print a record to stdout * Print a record to stdout
*/ */
static void static void
XLogDumpDisplayRecord(XLogDumpConfig *config, XLogRecPtr ReadRecPtr, XLogRecord *record) XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
{ {
const char *id; const char *id;
const RmgrDescData *desc = &RmgrDescTable[record->xl_rmid]; const RmgrDescData *desc = &RmgrDescTable[XLogRecGetRmid(record)];
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blk;
int block_id;
uint8 info = XLogRecGetInfo(record);
XLogRecPtr xl_prev = XLogRecGetPrev(record);
id = desc->rm_identify(record->xl_info); id = desc->rm_identify(info);
if (id == NULL) if (id == NULL)
id = psprintf("UNKNOWN (%x)", record->xl_info & ~XLR_INFO_MASK); id = psprintf("UNKNOWN (%x)", info & ~XLR_INFO_MASK);
printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, bkp: %u%u%u%u, desc: %s ", printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ",
desc->rm_name, desc->rm_name,
record->xl_len, record->xl_tot_len, XLogRecGetDataLen(record), XLogRecGetTotalLen(record),
record->xl_xid, XLogRecGetXid(record),
(uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr, (uint32) (record->ReadRecPtr >> 32), (uint32) record->ReadRecPtr,
(uint32) (record->xl_prev >> 32), (uint32) record->xl_prev, (uint32) (xl_prev >> 32), (uint32) xl_prev);
!!(XLR_BKP_BLOCK(0) & record->xl_info), printf("desc: %s ", id);
!!(XLR_BKP_BLOCK(1) & record->xl_info),
!!(XLR_BKP_BLOCK(2) & record->xl_info),
!!(XLR_BKP_BLOCK(3) & record->xl_info),
id);
/* the desc routine will printf the description directly to stdout */ /* the desc routine will printf the description directly to stdout */
desc->rm_desc(NULL, record); desc->rm_desc(NULL, record);
putchar('\n'); if (!config->bkp_details)
if (config->bkp_details)
{ {
int bkpnum; /* print block references (short format) */
char *blk = (char *) XLogRecGetData(record) + record->xl_len; for (block_id = 0; block_id <= record->max_block_id; block_id++)
for (bkpnum = 0; bkpnum < XLR_MAX_BKP_BLOCKS; bkpnum++)
{ {
BkpBlock bkpb; if (!XLogRecHasBlockRef(record, block_id))
if (!(XLR_BKP_BLOCK(bkpnum) & record->xl_info))
continue; continue;
memcpy(&bkpb, blk, sizeof(BkpBlock)); XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
blk += sizeof(BkpBlock); if (forknum != MAIN_FORKNUM)
blk += BLCKSZ - bkpb.hole_length; printf(", blkref #%u: rel %u/%u/%u fork %s blk %u",
block_id,
rnode.spcNode, rnode.dbNode, rnode.relNode,
forkNames[forknum],
blk);
else
printf(", blkref #%u: rel %u/%u/%u blk %u",
block_id,
rnode.spcNode, rnode.dbNode, rnode.relNode,
blk);
if (XLogRecHasBlockImage(record, block_id))
printf(" FPW");
}
putchar('\n');
}
else
{
/* print block references (detailed format) */
putchar('\n');
for (block_id = 0; block_id <= record->max_block_id; block_id++)
{
if (!XLogRecHasBlockRef(record, block_id))
continue;
printf("\tbackup bkp #%u; rel %u/%u/%u; fork: %s; block: %u; hole: offset: %u, length: %u\n", XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
bkpnum, printf("\tblkref #%u: rel %u/%u/%u fork %s blk %u",
bkpb.node.spcNode, bkpb.node.dbNode, bkpb.node.relNode, block_id,
forkNames[bkpb.fork], rnode.spcNode, rnode.dbNode, rnode.relNode,
bkpb.block, bkpb.hole_offset, bkpb.hole_length); forkNames[forknum],
blk);
if (XLogRecHasBlockImage(record, block_id))
{
printf(" (FPW); hole: offset: %u, length: %u\n",
record->blocks[block_id].hole_offset,
record->blocks[block_id].hole_length);
}
putchar('\n');
} }
} }
} }
@ -924,9 +952,9 @@ main(int argc, char **argv)
/* process the record */ /* process the record */
if (config.stats == true) if (config.stats == true)
XLogDumpCountRecord(&config, &stats, xlogreader_state->ReadRecPtr, record); XLogDumpCountRecord(&config, &stats, xlogreader_state);
else else
XLogDumpDisplayRecord(&config, xlogreader_state->ReadRecPtr, record); XLogDumpDisplayRecord(&config, xlogreader_state);
/* check whether we printed enough */ /* check whether we printed enough */
config.already_displayed_records++; config.already_displayed_records++;

View File

@ -13,7 +13,7 @@
typedef struct RmgrDescData typedef struct RmgrDescData
{ {
const char *rm_name; const char *rm_name;
void (*rm_desc) (StringInfo buf, XLogRecord *record); void (*rm_desc) (StringInfo buf, XLogReaderState *record);
const char *(*rm_identify) (uint8 info); const char *(*rm_identify) (uint8 info);
} RmgrDescData; } RmgrDescData;

View File

@ -666,19 +666,16 @@ brinbuild(PG_FUNCTION_ARGS)
{ {
xl_brin_createidx xlrec; xl_brin_createidx xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata;
Page page; Page page;
xlrec.node = index->rd_node;
xlrec.version = BRIN_CURRENT_VERSION; xlrec.version = BRIN_CURRENT_VERSION;
xlrec.pagesPerRange = BrinGetPagesPerRange(index); xlrec.pagesPerRange = BrinGetPagesPerRange(index);
rdata.buffer = InvalidBuffer; XLogBeginInsert();
rdata.data = (char *) &xlrec; XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
rdata.len = SizeOfBrinCreateIdx; XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT);
rdata.next = NULL;
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX, &rdata); recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
page = BufferGetPage(meta); page = BufferGetPage(meta);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);

View File

@ -140,27 +140,19 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
/* XLOG stuff */ /* XLOG stuff */
if (RelationNeedsWAL(idxrel)) if (RelationNeedsWAL(idxrel))
{ {
BlockNumber blk = BufferGetBlockNumber(oldbuf);
xl_brin_samepage_update xlrec; xl_brin_samepage_update xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[2];
uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE; uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE;
xlrec.node = idxrel->rd_node; xlrec.offnum = oldoff;
ItemPointerSetBlockNumber(&xlrec.tid, blk);
ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfBrinSamepageUpdate;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) newtup; XLogBeginInsert();
rdata[1].len = newsz; XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
rdata[1].buffer = oldbuf;
rdata[1].buffer_std = true;
rdata[1].next = NULL;
recptr = XLogInsert(RM_BRIN_ID, info, rdata); XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) newtup, newsz);
recptr = XLogInsert(RM_BRIN_ID, info);
PageSetLSN(oldpage, recptr); PageSetLSN(oldpage, recptr);
} }
@ -211,43 +203,30 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
{ {
xl_brin_update xlrec; xl_brin_update xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[4];
uint8 info; uint8 info;
info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0); info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
xlrec.insert.node = idxrel->rd_node; xlrec.insert.offnum = newoff;
ItemPointerSet(&xlrec.insert.tid, BufferGetBlockNumber(newbuf), newoff);
xlrec.insert.heapBlk = heapBlk; xlrec.insert.heapBlk = heapBlk;
xlrec.insert.tuplen = newsz;
xlrec.insert.revmapBlk = BufferGetBlockNumber(revmapbuf);
xlrec.insert.pagesPerRange = pagesPerRange; xlrec.insert.pagesPerRange = pagesPerRange;
ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff); xlrec.oldOffnum = oldoff;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = SizeOfBrinUpdate;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) newtup; /* new page */
rdata[1].len = newsz; XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
rdata[1].buffer = extended ? InvalidBuffer : newbuf;
rdata[1].buffer_std = true;
rdata[1].next = &(rdata[2]);
rdata[2].data = (char *) NULL; XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
rdata[2].len = 0; XLogRegisterBufData(0, (char *) newtup, newsz);
rdata[2].buffer = revmapbuf;
rdata[2].buffer_std = true;
rdata[2].next = &(rdata[3]);
rdata[3].data = (char *) NULL; /* revmap page */
rdata[3].len = 0; XLogRegisterBuffer(1, revmapbuf, REGBUF_STANDARD);
rdata[3].buffer = oldbuf;
rdata[3].buffer_std = true;
rdata[3].next = NULL;
recptr = XLogInsert(RM_BRIN_ID, info, rdata); /* old page */
XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
recptr = XLogInsert(RM_BRIN_ID, info);
PageSetLSN(oldpage, recptr); PageSetLSN(oldpage, recptr);
PageSetLSN(newpage, recptr); PageSetLSN(newpage, recptr);
@ -354,36 +333,22 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
{ {
xl_brin_insert xlrec; xl_brin_insert xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[3];
uint8 info; uint8 info;
info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0); info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
xlrec.node = idxrel->rd_node;
xlrec.heapBlk = heapBlk; xlrec.heapBlk = heapBlk;
xlrec.pagesPerRange = pagesPerRange; xlrec.pagesPerRange = pagesPerRange;
xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf); xlrec.offnum = off;
xlrec.tuplen = itemsz;
ItemPointerSet(&xlrec.tid, blk, off);
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = SizeOfBrinInsert; XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
rdata[0].buffer = InvalidBuffer;
rdata[0].buffer_std = false;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) tup; XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
rdata[1].len = itemsz; XLogRegisterBufData(0, (char *) tup, itemsz);
rdata[1].buffer = extended ? InvalidBuffer : *buffer;
rdata[1].buffer_std = true;
rdata[1].next = &(rdata[2]);
rdata[2].data = (char *) NULL; XLogRegisterBuffer(1, revmapbuf, 0);
rdata[2].len = 0;
rdata[2].buffer = revmapbuf;
rdata[2].buffer_std = false;
rdata[2].next = NULL;
recptr = XLogInsert(RM_BRIN_ID, info, rdata); recptr = XLogInsert(RM_BRIN_ID, info);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetLSN(BufferGetPage(revmapbuf), recptr); PageSetLSN(BufferGetPage(revmapbuf), recptr);

View File

@ -477,23 +477,16 @@ revmap_physical_extend(BrinRevmap *revmap)
{ {
xl_brin_revmap_extend xlrec; xl_brin_revmap_extend xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[2];
xlrec.node = revmap->rm_irel->rd_node;
xlrec.targetBlk = mapBlk; xlrec.targetBlk = mapBlk;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfBrinRevmapExtend;
rdata[0].buffer = InvalidBuffer;
rdata[0].buffer_std = false;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) NULL; XLogBeginInsert();
rdata[1].len = 0; XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
rdata[1].buffer = revmap->rm_metaBuf; XLogRegisterBuffer(0, revmap->rm_metaBuf, 0);
rdata[1].buffer_std = false;
rdata[1].next = NULL;
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND, rdata); XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
PageSetLSN(metapage, recptr); PageSetLSN(metapage, recptr);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }

View File

@ -20,17 +20,15 @@
* xlog replay routines * xlog replay routines
*/ */
static void static void
brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record) brin_xlog_createidx(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_brin_createidx *xlrec = (xl_brin_createidx *) XLogRecGetData(record); xl_brin_createidx *xlrec = (xl_brin_createidx *) XLogRecGetData(record);
Buffer buf; Buffer buf;
Page page; Page page;
/* Backup blocks are not used in create_index records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
/* create the index' metapage */ /* create the index' metapage */
buf = XLogReadBuffer(xlrec->node, BRIN_METAPAGE_BLKNO, true); buf = XLogInitBufferForRedo(record, 0);
Assert(BufferIsValid(buf)); Assert(BufferIsValid(buf));
page = (Page) BufferGetPage(buf); page = (Page) BufferGetPage(buf);
brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version); brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version);
@ -44,51 +42,47 @@ brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
* revmap. * revmap.
*/ */
static void static void
brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record, brin_xlog_insert_update(XLogReaderState *record,
xl_brin_insert *xlrec, BrinTuple *tuple) xl_brin_insert *xlrec)
{ {
BlockNumber blkno; XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer; Buffer buffer;
Page page; Page page;
XLogRedoAction action; XLogRedoAction action;
blkno = ItemPointerGetBlockNumber(&xlrec->tid);
/* /*
* If we inserted the first and only tuple on the page, re-initialize the * If we inserted the first and only tuple on the page, re-initialize the
* page from scratch. * page from scratch.
*/ */
if (record->xl_info & XLOG_BRIN_INIT_PAGE) if (XLogRecGetInfo(record) & XLOG_BRIN_INIT_PAGE)
{ {
/* buffer = XLogInitBufferForRedo(record, 0);
* No full-page image here. Don't try to read it, because there
* might be one for the revmap buffer, below.
*/
buffer = XLogReadBuffer(xlrec->node, blkno, true);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
brin_page_init(page, BRIN_PAGETYPE_REGULAR); brin_page_init(page, BRIN_PAGETYPE_REGULAR);
action = BLK_NEEDS_REDO; action = BLK_NEEDS_REDO;
} }
else else
{ {
action = XLogReadBufferForRedo(lsn, record, 0, action = XLogReadBufferForRedo(record, 0, &buffer);
xlrec->node, blkno, &buffer);
} }
/* insert the index item into the page */ /* insert the index item into the page */
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
OffsetNumber offnum; OffsetNumber offnum;
BrinTuple *tuple;
Size tuplen;
tuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
Assert(tuple->bt_blkno == xlrec->heapBlk); Assert(tuple->bt_blkno == xlrec->heapBlk);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
offnum = ItemPointerGetOffsetNumber(&(xlrec->tid)); offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum) if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "brin_xlog_insert_update: invalid max offset number"); elog(PANIC, "brin_xlog_insert_update: invalid max offset number");
offnum = PageAddItem(page, (Item) tuple, xlrec->tuplen, offnum, true, offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false);
false);
if (offnum == InvalidOffsetNumber) if (offnum == InvalidOffsetNumber)
elog(PANIC, "brin_xlog_insert_update: failed to add tuple"); elog(PANIC, "brin_xlog_insert_update: failed to add tuple");
@ -99,16 +93,17 @@ brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
/* update the revmap */ /* update the revmap */
action = XLogReadBufferForRedo(lsn, record, action = XLogReadBufferForRedo(record, 1, &buffer);
record->xl_info & XLOG_BRIN_INIT_PAGE ? 0 : 1,
xlrec->node,
xlrec->revmapBlk, &buffer);
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
ItemPointerData tid;
BlockNumber blkno = BufferGetBlockNumber(buffer);
ItemPointerSet(&tid, blkno, xlrec->offnum);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk,
xlrec->tid); tid);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
} }
@ -122,34 +117,26 @@ brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
* replay a BRIN index insertion * replay a BRIN index insertion
*/ */
static void static void
brin_xlog_insert(XLogRecPtr lsn, XLogRecord *record) brin_xlog_insert(XLogReaderState *record)
{ {
xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record); xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record);
BrinTuple *newtup;
newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinInsert); brin_xlog_insert_update(record, xlrec);
brin_xlog_insert_update(lsn, record, xlrec, newtup);
} }
/* /*
* replay a BRIN index update * replay a BRIN index update
*/ */
static void static void
brin_xlog_update(XLogRecPtr lsn, XLogRecord *record) brin_xlog_update(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record); xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record);
BlockNumber blkno;
Buffer buffer; Buffer buffer;
BrinTuple *newtup;
XLogRedoAction action; XLogRedoAction action;
newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinUpdate);
/* First remove the old tuple */ /* First remove the old tuple */
blkno = ItemPointerGetBlockNumber(&(xlrec->oldtid)); action = XLogReadBufferForRedo(record, 2, &buffer);
action = XLogReadBufferForRedo(lsn, record, 2, xlrec->insert.node,
blkno, &buffer);
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
Page page; Page page;
@ -157,7 +144,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
offnum = ItemPointerGetOffsetNumber(&(xlrec->oldtid)); offnum = xlrec->oldOffnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum) if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "brin_xlog_update: invalid max offset number"); elog(PANIC, "brin_xlog_update: invalid max offset number");
@ -168,7 +155,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
} }
/* Then insert the new tuple and update revmap, like in an insertion. */ /* Then insert the new tuple and update revmap, like in an insertion. */
brin_xlog_insert_update(lsn, record, &xlrec->insert, newtup); brin_xlog_insert_update(record, &xlrec->insert);
if (BufferIsValid(buffer)) if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
@ -178,30 +165,27 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
* Update a tuple on a single page. * Update a tuple on a single page.
*/ */
static void static void
brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record) brin_xlog_samepage_update(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_brin_samepage_update *xlrec; xl_brin_samepage_update *xlrec;
BlockNumber blkno;
Buffer buffer; Buffer buffer;
XLogRedoAction action; XLogRedoAction action;
xlrec = (xl_brin_samepage_update *) XLogRecGetData(record); xlrec = (xl_brin_samepage_update *) XLogRecGetData(record);
blkno = ItemPointerGetBlockNumber(&(xlrec->tid)); action = XLogReadBufferForRedo(record, 0, &buffer);
action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, blkno,
&buffer);
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
int tuplen; Size tuplen;
BrinTuple *mmtuple; BrinTuple *mmtuple;
Page page; Page page;
OffsetNumber offnum; OffsetNumber offnum;
tuplen = record->xl_len - SizeOfBrinSamepageUpdate; mmtuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
mmtuple = (BrinTuple *) ((char *) xlrec + SizeOfBrinSamepageUpdate);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
offnum = ItemPointerGetOffsetNumber(&(xlrec->tid)); offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum) if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "brin_xlog_samepage_update: invalid max offset number"); elog(PANIC, "brin_xlog_samepage_update: invalid max offset number");
@ -223,18 +207,23 @@ brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
* Replay a revmap page extension * Replay a revmap page extension
*/ */
static void static void
brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record) brin_xlog_revmap_extend(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_brin_revmap_extend *xlrec; xl_brin_revmap_extend *xlrec;
Buffer metabuf; Buffer metabuf;
Buffer buf; Buffer buf;
Page page; Page page;
BlockNumber targetBlk;
XLogRedoAction action; XLogRedoAction action;
xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record); xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record);
XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk);
Assert(xlrec->targetBlk == targetBlk);
/* Update the metapage */ /* Update the metapage */
action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, action = XLogReadBufferForRedo(record, 0, &metabuf);
BRIN_METAPAGE_BLKNO, &metabuf);
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
Page metapg; Page metapg;
@ -255,7 +244,7 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
* image here. * image here.
*/ */
buf = XLogReadBuffer(xlrec->node, xlrec->targetBlk, true); buf = XLogInitBufferForRedo(record, 1);
page = (Page) BufferGetPage(buf); page = (Page) BufferGetPage(buf);
brin_page_init(page, BRIN_PAGETYPE_REVMAP); brin_page_init(page, BRIN_PAGETYPE_REVMAP);
@ -268,26 +257,26 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
} }
void void
brin_redo(XLogRecPtr lsn, XLogRecord *record) brin_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info & XLOG_BRIN_OPMASK) switch (info & XLOG_BRIN_OPMASK)
{ {
case XLOG_BRIN_CREATE_INDEX: case XLOG_BRIN_CREATE_INDEX:
brin_xlog_createidx(lsn, record); brin_xlog_createidx(record);
break; break;
case XLOG_BRIN_INSERT: case XLOG_BRIN_INSERT:
brin_xlog_insert(lsn, record); brin_xlog_insert(record);
break; break;
case XLOG_BRIN_UPDATE: case XLOG_BRIN_UPDATE:
brin_xlog_update(lsn, record); brin_xlog_update(record);
break; break;
case XLOG_BRIN_SAMEPAGE_UPDATE: case XLOG_BRIN_SAMEPAGE_UPDATE:
brin_xlog_samepage_update(lsn, record); brin_xlog_samepage_update(record);
break; break;
case XLOG_BRIN_REVMAP_EXTEND: case XLOG_BRIN_REVMAP_EXTEND:
brin_xlog_revmap_extend(lsn, record); brin_xlog_revmap_extend(record);
break; break;
default: default:
elog(PANIC, "brin_redo: unknown op code %u", info); elog(PANIC, "brin_redo: unknown op code %u", info);

View File

@ -326,7 +326,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
Buffer childbuf, GinStatsData *buildStats) Buffer childbuf, GinStatsData *buildStats)
{ {
Page page = BufferGetPage(stack->buffer); Page page = BufferGetPage(stack->buffer);
XLogRecData *payloadrdata;
GinPlaceToPageRC rc; GinPlaceToPageRC rc;
uint16 xlflags = 0; uint16 xlflags = 0;
Page childpage = NULL; Page childpage = NULL;
@ -351,12 +350,36 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/* /*
* Try to put the incoming tuple on the page. placeToPage will decide if * Try to put the incoming tuple on the page. placeToPage will decide if
* the page needs to be split. * the page needs to be split.
*
* WAL-logging this operation is a bit funny:
*
* We're responsible for calling XLogBeginInsert() and XLogInsert().
* XLogBeginInsert() must be called before placeToPage, because
* placeToPage can register some data to the WAL record.
*
* If placeToPage returns INSERTED, placeToPage has already called
* START_CRIT_SECTION(), and we're responsible for calling
* END_CRIT_SECTION. When it returns INSERTED, it is also responsible for
* registering any data required to replay the operation with
* XLogRegisterData(0, ...). It may only add data to block index 0; the
* main data of the WAL record is reserved for this function.
*
* If placeToPage returns SPLIT, we're wholly responsible for WAL logging.
* Splits happen infrequently, so we just make a full-page image of all
* the pages involved.
*/ */
if (RelationNeedsWAL(btree->index))
XLogBeginInsert();
rc = btree->placeToPage(btree, stack->buffer, stack, rc = btree->placeToPage(btree, stack->buffer, stack,
insertdata, updateblkno, insertdata, updateblkno,
&payloadrdata, &newlpage, &newrpage); &newlpage, &newrpage);
if (rc == UNMODIFIED) if (rc == UNMODIFIED)
{
XLogResetInsertion();
return true; return true;
}
else if (rc == INSERTED) else if (rc == INSERTED)
{ {
/* placeToPage did START_CRIT_SECTION() */ /* placeToPage did START_CRIT_SECTION() */
@ -372,17 +395,18 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
if (RelationNeedsWAL(btree->index)) if (RelationNeedsWAL(btree->index))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[3];
ginxlogInsert xlrec; ginxlogInsert xlrec;
BlockIdData childblknos[2]; BlockIdData childblknos[2];
xlrec.node = btree->index->rd_node; /*
xlrec.blkno = BufferGetBlockNumber(stack->buffer); * placetopage already registered stack->buffer as block 0.
*/
xlrec.flags = xlflags; xlrec.flags = xlflags;
rdata[0].buffer = InvalidBuffer; if (childbuf != InvalidBuffer)
rdata[0].data = (char *) &xlrec; XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
rdata[0].len = sizeof(ginxlogInsert);
XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
/* /*
* Log information about child if this was an insertion of a * Log information about child if this was an insertion of a
@ -390,26 +414,13 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
*/ */
if (childbuf != InvalidBuffer) if (childbuf != InvalidBuffer)
{ {
rdata[0].next = &rdata[1];
BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf)); BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink); BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
XLogRegisterData((char *) childblknos,
rdata[1].buffer = InvalidBuffer; sizeof(BlockIdData) * 2);
rdata[1].data = (char *) childblknos;
rdata[1].len = sizeof(BlockIdData) * 2;
rdata[1].next = &rdata[2];
rdata[2].buffer = childbuf;
rdata[2].buffer_std = false;
rdata[2].data = NULL;
rdata[2].len = 0;
rdata[2].next = payloadrdata;
} }
else
rdata[0].next = payloadrdata;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
if (childbuf != InvalidBuffer) if (childbuf != InvalidBuffer)
PageSetLSN(childpage, recptr); PageSetLSN(childpage, recptr);
@ -421,10 +432,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
} }
else if (rc == SPLIT) else if (rc == SPLIT)
{ {
/* Didn't fit, have to split */ /* Didn't fit, had to split */
Buffer rbuffer; Buffer rbuffer;
BlockNumber savedRightLink; BlockNumber savedRightLink;
XLogRecData rdata[2];
ginxlogSplit data; ginxlogSplit data;
Buffer lbuffer = InvalidBuffer; Buffer lbuffer = InvalidBuffer;
Page newrootpg = NULL; Page newrootpg = NULL;
@ -448,7 +458,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
*/ */
data.node = btree->index->rd_node; data.node = btree->index->rd_node;
data.rblkno = BufferGetBlockNumber(rbuffer);
data.flags = xlflags; data.flags = xlflags;
if (childbuf != InvalidBuffer) if (childbuf != InvalidBuffer)
{ {
@ -462,23 +471,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
else else
data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber; data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogSplit);
if (childbuf != InvalidBuffer)
{
rdata[0].next = &rdata[1];
rdata[1].buffer = childbuf;
rdata[1].buffer_std = false;
rdata[1].data = NULL;
rdata[1].len = 0;
rdata[1].next = payloadrdata;
}
else
rdata[0].next = payloadrdata;
if (stack->parent == NULL) if (stack->parent == NULL)
{ {
/* /*
@ -496,12 +488,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
buildStats->nEntryPages++; buildStats->nEntryPages++;
} }
/* data.rrlink = InvalidBlockNumber;
* root never has a right-link, so we borrow the rrlink field to
* store the root block number.
*/
data.rrlink = BufferGetBlockNumber(stack->buffer);
data.lblkno = BufferGetBlockNumber(lbuffer);
data.flags |= GIN_SPLIT_ROOT; data.flags |= GIN_SPLIT_ROOT;
GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber; GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
@ -524,7 +511,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
{ {
/* split non-root page */ /* split non-root page */
data.rrlink = savedRightLink; data.rrlink = savedRightLink;
data.lblkno = BufferGetBlockNumber(stack->buffer);
GinPageGetOpaque(newrpage)->rightlink = savedRightLink; GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT; GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
@ -572,7 +558,28 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); /*
* We just take full page images of all the split pages. Splits
* are uncommon enough that it's not worth complicating the code
* to be more efficient.
*/
if (stack->parent == NULL)
{
XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
}
else
{
XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
}
if (BufferIsValid(childbuf))
XLogRegisterBuffer(3, childbuf, 0);
XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
PageSetLSN(BufferGetPage(stack->buffer), recptr); PageSetLSN(BufferGetPage(stack->buffer), recptr);
PageSetLSN(BufferGetPage(rbuffer), recptr); PageSetLSN(BufferGetPage(rbuffer), recptr);
if (stack->parent == NULL) if (stack->parent == NULL)

View File

@ -98,20 +98,19 @@ static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
static void dataSplitPageInternal(GinBtree btree, Buffer origbuf, static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno, void *insertdata, BlockNumber updateblkno,
XLogRecData **prdata, Page *newlpage, Page *newrpage); Page *newlpage, Page *newrpage);
static disassembledLeaf *disassembleLeaf(Page page); static disassembledLeaf *disassembleLeaf(Page page);
static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining); static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
int nNewItems); int nNewItems);
static XLogRecData *constructLeafRecompressWALData(Buffer buf, static void registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf);
disassembledLeaf *leaf);
static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf); static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
static void dataPlaceToPageLeafSplit(Buffer buf, static void dataPlaceToPageLeafSplit(Buffer buf,
disassembledLeaf *leaf, disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound, ItemPointerData lbound, ItemPointerData rbound,
XLogRecData **prdata, Page lpage, Page rpage); Page lpage, Page rpage);
/* /*
* Read TIDs from leaf data page to single uncompressed array. The TIDs are * Read TIDs from leaf data page to single uncompressed array. The TIDs are
@ -428,8 +427,7 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
*/ */
static GinPlaceToPageRC static GinPlaceToPageRC
dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertdata, XLogRecData **prdata, void *insertdata, Page *newlpage, Page *newrpage)
Page *newlpage, Page *newrpage)
{ {
GinBtreeDataLeafInsertData *items = insertdata; GinBtreeDataLeafInsertData *items = insertdata;
ItemPointer newItems = &items->items[items->curitem]; ItemPointer newItems = &items->items[items->curitem];
@ -602,9 +600,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
*/ */
MemoryContextSwitchTo(oldCxt); MemoryContextSwitchTo(oldCxt);
if (RelationNeedsWAL(btree->index)) if (RelationNeedsWAL(btree->index))
*prdata = constructLeafRecompressWALData(buf, leaf); registerLeafRecompressWALData(buf, leaf);
else
*prdata = NULL;
START_CRIT_SECTION(); START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buf, leaf); dataPlaceToPageLeafRecompress(buf, leaf);
@ -685,7 +681,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
*newrpage = MemoryContextAlloc(oldCxt, BLCKSZ); *newrpage = MemoryContextAlloc(oldCxt, BLCKSZ);
dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound, dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound,
prdata, *newlpage, *newrpage); *newlpage, *newrpage);
Assert(GinPageRightMost(page) || Assert(GinPageRightMost(page) ||
ginCompareItemPointers(GinDataPageGetRightBound(*newlpage), ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
@ -791,7 +787,6 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
*/ */
if (removedsomething) if (removedsomething)
{ {
XLogRecData *payloadrdata = NULL;
bool modified; bool modified;
/* /*
@ -818,7 +813,10 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
} }
if (RelationNeedsWAL(indexrel)) if (RelationNeedsWAL(indexrel))
payloadrdata = constructLeafRecompressWALData(buffer, leaf); {
XLogBeginInsert();
registerLeafRecompressWALData(buffer, leaf);
}
START_CRIT_SECTION(); START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buffer, leaf); dataPlaceToPageLeafRecompress(buffer, leaf);
@ -827,18 +825,8 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
if (RelationNeedsWAL(indexrel)) if (RelationNeedsWAL(indexrel))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata;
ginxlogVacuumDataLeafPage xlrec;
xlrec.node = indexrel->rd_node; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
xlrec.blkno = BufferGetBlockNumber(buffer);
rdata.buffer = InvalidBuffer;
rdata.data = (char *) &xlrec;
rdata.len = offsetof(ginxlogVacuumDataLeafPage, data);
rdata.next = payloadrdata;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, &rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -850,13 +838,12 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
* Construct a ginxlogRecompressDataLeaf record representing the changes * Construct a ginxlogRecompressDataLeaf record representing the changes
* in *leaf. * in *leaf.
*/ */
static XLogRecData * static void
constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf) registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
{ {
int nmodified = 0; int nmodified = 0;
char *walbufbegin; char *walbufbegin;
char *walbufend; char *walbufend;
XLogRecData *rdata;
dlist_iter iter; dlist_iter iter;
int segno; int segno;
ginxlogRecompressDataLeaf *recompress_xlog; ginxlogRecompressDataLeaf *recompress_xlog;
@ -871,12 +858,11 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
nmodified++; nmodified++;
} }
walbufbegin = palloc( walbufbegin =
sizeof(ginxlogRecompressDataLeaf) + palloc(sizeof(ginxlogRecompressDataLeaf) +
BLCKSZ + /* max size needed to hold the segment BLCKSZ + /* max size needed to hold the segment data */
* data */ nmodified * 2 /* (segno + action) per action */
nmodified * 2 + /* (segno + action) per action */ );
sizeof(XLogRecData));
walbufend = walbufbegin; walbufend = walbufbegin;
recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend; recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
@ -944,14 +930,10 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
segno++; segno++;
} }
rdata = (XLogRecData *) MAXALIGN(walbufend);
rdata->buffer = buf;
rdata->buffer_std = TRUE;
rdata->data = walbufbegin;
rdata->len = walbufend - walbufbegin;
rdata->next = NULL;
return rdata; XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, walbufbegin, walbufend - walbufbegin);
} }
/* /*
@ -1024,7 +1006,7 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
static void static void
dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf, dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound, ItemPointerData lbound, ItemPointerData rbound,
XLogRecData **prdata, Page lpage, Page rpage) Page lpage, Page rpage)
{ {
char *ptr; char *ptr;
int segsize; int segsize;
@ -1034,10 +1016,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
dlist_node *firstright; dlist_node *firstright;
leafSegmentInfo *seginfo; leafSegmentInfo *seginfo;
/* these must be static so they can be returned to caller */
static ginxlogSplitDataLeaf split_xlog;
static XLogRecData rdata[3];
/* Initialize temporary pages to hold the new left and right pages */ /* Initialize temporary pages to hold the new left and right pages */
GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ); GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ); GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
@ -1092,29 +1070,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
Assert(rsize == leaf->rsize); Assert(rsize == leaf->rsize);
GinDataPageSetDataSize(rpage, rsize); GinDataPageSetDataSize(rpage, rsize);
*GinDataPageGetRightBound(rpage) = rbound; *GinDataPageGetRightBound(rpage) = rbound;
/* Create WAL record */
split_xlog.lsize = lsize;
split_xlog.rsize = rsize;
split_xlog.lrightbound = lbound;
split_xlog.rrightbound = rbound;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &split_xlog;
rdata[0].len = sizeof(ginxlogSplitDataLeaf);
rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) GinDataLeafPageGetPostingList(lpage);
rdata[1].len = lsize;
rdata[1].next = &rdata[2];
rdata[2].buffer = InvalidBuffer;
rdata[2].data = (char *) GinDataLeafPageGetPostingList(rpage);
rdata[2].len = rsize;
rdata[2].next = NULL;
*prdata = rdata;
} }
/* /*
@ -1124,29 +1079,30 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
* *
* In addition to inserting the given item, the downlink of the existing item * In addition to inserting the given item, the downlink of the existing item
* at 'off' is updated to point to 'updateblkno'. * at 'off' is updated to point to 'updateblkno'.
*
* On INSERTED, registers the buffer as buffer ID 0, with data.
* On SPLIT, returns rdata that represents the split pages in *prdata.
*/ */
static GinPlaceToPageRC static GinPlaceToPageRC
dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack, dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno, void *insertdata, BlockNumber updateblkno,
XLogRecData **prdata, Page *newlpage, Page *newrpage) Page *newlpage, Page *newrpage)
{ {
Page page = BufferGetPage(buf); Page page = BufferGetPage(buf);
OffsetNumber off = stack->off; OffsetNumber off = stack->off;
PostingItem *pitem; PostingItem *pitem;
/* these must be static so they can be returned to caller */ /* this must be static so it can be returned to caller */
static XLogRecData rdata;
static ginxlogInsertDataInternal data; static ginxlogInsertDataInternal data;
/* split if we have to */ /* split if we have to */
if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem)) if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
{ {
dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno, dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
prdata, newlpage, newrpage); newlpage, newrpage);
return SPLIT; return SPLIT;
} }
*prdata = &rdata;
Assert(GinPageIsData(page)); Assert(GinPageIsData(page));
START_CRIT_SECTION(); START_CRIT_SECTION();
@ -1159,14 +1115,15 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata; pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off); GinDataPageAddPostingItem(page, pitem, off);
data.offset = off; if (RelationNeedsWAL(btree->index))
data.newitem = *pitem; {
data.offset = off;
data.newitem = *pitem;
rdata.buffer = buf; XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
rdata.buffer_std = TRUE; XLogRegisterBufData(0, (char *) &data,
rdata.data = (char *) &data; sizeof(ginxlogInsertDataInternal));
rdata.len = sizeof(ginxlogInsertDataInternal); }
rdata.next = NULL;
return INSERTED; return INSERTED;
} }
@ -1178,7 +1135,6 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
static GinPlaceToPageRC static GinPlaceToPageRC
dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno, void *insertdata, BlockNumber updateblkno,
XLogRecData **prdata,
Page *newlpage, Page *newrpage) Page *newlpage, Page *newrpage)
{ {
Page page = BufferGetPage(buf); Page page = BufferGetPage(buf);
@ -1187,11 +1143,11 @@ dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
if (GinPageIsLeaf(page)) if (GinPageIsLeaf(page))
return dataPlaceToPageLeaf(btree, buf, stack, insertdata, return dataPlaceToPageLeaf(btree, buf, stack, insertdata,
prdata, newlpage, newrpage); newlpage, newrpage);
else else
return dataPlaceToPageInternal(btree, buf, stack, return dataPlaceToPageInternal(btree, buf, stack,
insertdata, updateblkno, insertdata, updateblkno,
prdata, newlpage, newrpage); newlpage, newrpage);
} }
/* /*
@ -1202,7 +1158,7 @@ static void
dataSplitPageInternal(GinBtree btree, Buffer origbuf, dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno, void *insertdata, BlockNumber updateblkno,
XLogRecData **prdata, Page *newlpage, Page *newrpage) Page *newlpage, Page *newrpage)
{ {
Page oldpage = BufferGetPage(origbuf); Page oldpage = BufferGetPage(origbuf);
OffsetNumber off = stack->off; OffsetNumber off = stack->off;
@ -1215,19 +1171,13 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
Page lpage; Page lpage;
Page rpage; Page rpage;
OffsetNumber separator; OffsetNumber separator;
PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
/* these must be static so they can be returned to caller */
static ginxlogSplitDataInternal data;
static XLogRecData rdata[4];
static PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
lpage = PageGetTempPage(oldpage); lpage = PageGetTempPage(oldpage);
rpage = PageGetTempPage(oldpage); rpage = PageGetTempPage(oldpage);
GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize); GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
*prdata = rdata;
/* /*
* First construct a new list of PostingItems, which includes all the old * First construct a new list of PostingItems, which includes all the old
* items, and the new item. * items, and the new item.
@ -1277,20 +1227,6 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
/* set up right bound for right page */ /* set up right bound for right page */
*GinDataPageGetRightBound(rpage) = oldbound; *GinDataPageGetRightBound(rpage) = oldbound;
data.separator = separator;
data.nitem = nitems;
data.rightbound = oldbound;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogSplitDataInternal);
rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) allitems;
rdata[1].len = nitems * sizeof(PostingItem);
rdata[1].next = NULL;
*newlpage = lpage; *newlpage = lpage;
*newrpage = rpage; *newrpage = rpage;
} }
@ -1797,24 +1733,18 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
if (RelationNeedsWAL(index)) if (RelationNeedsWAL(index))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[2];
ginxlogCreatePostingTree data; ginxlogCreatePostingTree data;
data.node = index->rd_node;
data.blkno = blkno;
data.size = rootsize; data.size = rootsize;
rdata[0].buffer = InvalidBuffer; XLogBeginInsert();
rdata[0].data = (char *) &data; XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
rdata[0].len = sizeof(ginxlogCreatePostingTree);
rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer; XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
rdata[1].data = (char *) GinDataLeafPageGetPostingList(page); rootsize);
rdata[1].len = rootsize; XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
rdata[1].next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }

View File

@ -22,7 +22,7 @@
static void entrySplitPage(GinBtree btree, Buffer origbuf, static void entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack, GinBtreeStack *stack,
void *insertPayload, void *insertPayload,
BlockNumber updateblkno, XLogRecData **prdata, BlockNumber updateblkno,
Page *newlpage, Page *newrpage); Page *newlpage, Page *newrpage);
/* /*
@ -515,33 +515,33 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
* On insertion to an internal node, in addition to inserting the given item, * On insertion to an internal node, in addition to inserting the given item,
* the downlink of the existing item at 'off' is updated to point to * the downlink of the existing item at 'off' is updated to point to
* 'updateblkno'. * 'updateblkno'.
*
* On INSERTED, registers the buffer as buffer ID 0, with data.
* On SPLIT, returns rdata that represents the split pages in *prdata.
*/ */
static GinPlaceToPageRC static GinPlaceToPageRC
entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertPayload, BlockNumber updateblkno, void *insertPayload, BlockNumber updateblkno,
XLogRecData **prdata, Page *newlpage, Page *newrpage) Page *newlpage, Page *newrpage)
{ {
GinBtreeEntryInsertData *insertData = insertPayload; GinBtreeEntryInsertData *insertData = insertPayload;
Page page = BufferGetPage(buf); Page page = BufferGetPage(buf);
OffsetNumber off = stack->off; OffsetNumber off = stack->off;
OffsetNumber placed; OffsetNumber placed;
int cnt = 0;
/* these must be static so they can be returned to caller */ /* this must be static so it can be returned to caller. */
static XLogRecData rdata[3];
static ginxlogInsertEntry data; static ginxlogInsertEntry data;
/* quick exit if it doesn't fit */ /* quick exit if it doesn't fit */
if (!entryIsEnoughSpace(btree, buf, off, insertData)) if (!entryIsEnoughSpace(btree, buf, off, insertData))
{ {
entrySplitPage(btree, buf, stack, insertPayload, updateblkno, entrySplitPage(btree, buf, stack, insertPayload, updateblkno,
prdata, newlpage, newrpage); newlpage, newrpage);
return SPLIT; return SPLIT;
} }
START_CRIT_SECTION(); START_CRIT_SECTION();
*prdata = rdata;
entryPreparePage(btree, page, off, insertData, updateblkno); entryPreparePage(btree, page, off, insertData, updateblkno);
placed = PageAddItem(page, placed = PageAddItem(page,
@ -552,21 +552,17 @@ entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"", elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index)); RelationGetRelationName(btree->index));
data.isDelete = insertData->isDelete; if (RelationNeedsWAL(btree->index))
data.offset = off; {
data.isDelete = insertData->isDelete;
data.offset = off;
rdata[cnt].buffer = buf; XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
rdata[cnt].buffer_std = true; XLogRegisterBufData(0, (char *) &data,
rdata[cnt].data = (char *) &data; offsetof(ginxlogInsertEntry, tuple));
rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple); XLogRegisterBufData(0, (char *) insertData->entry,
rdata[cnt].next = &rdata[cnt + 1]; IndexTupleSize(insertData->entry));
cnt++; }
rdata[cnt].buffer = buf;
rdata[cnt].buffer_std = true;
rdata[cnt].data = (char *) insertData->entry;
rdata[cnt].len = IndexTupleSize(insertData->entry);
rdata[cnt].next = NULL;
return INSERTED; return INSERTED;
} }
@ -581,7 +577,7 @@ static void
entrySplitPage(GinBtree btree, Buffer origbuf, entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack, GinBtreeStack *stack,
void *insertPayload, void *insertPayload,
BlockNumber updateblkno, XLogRecData **prdata, BlockNumber updateblkno,
Page *newlpage, Page *newrpage) Page *newlpage, Page *newrpage)
{ {
GinBtreeEntryInsertData *insertData = insertPayload; GinBtreeEntryInsertData *insertData = insertPayload;
@ -590,7 +586,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
maxoff, maxoff,
separator = InvalidOffsetNumber; separator = InvalidOffsetNumber;
Size totalsize = 0; Size totalsize = 0;
Size tupstoresize;
Size lsize = 0, Size lsize = 0,
size; size;
char *ptr; char *ptr;
@ -599,13 +594,8 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
Size pageSize = PageGetPageSize(lpage); Size pageSize = PageGetPageSize(lpage);
char tupstore[2 * BLCKSZ];
/* these must be static so they can be returned to caller */
static XLogRecData rdata[2];
static ginxlogSplitEntry data;
static char tupstore[2 * BLCKSZ];
*prdata = rdata;
entryPreparePage(btree, lpage, off, insertData, updateblkno); entryPreparePage(btree, lpage, off, insertData, updateblkno);
/* /*
@ -638,7 +628,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
ptr += size; ptr += size;
totalsize += size + sizeof(ItemIdData); totalsize += size + sizeof(ItemIdData);
} }
tupstoresize = ptr - tupstore;
/* /*
* Initialize the left and right pages, and copy all the tuples back to * Initialize the left and right pages, and copy all the tuples back to
@ -673,19 +662,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
ptr += MAXALIGN(IndexTupleSize(itup)); ptr += MAXALIGN(IndexTupleSize(itup));
} }
data.separator = separator;
data.nitem = maxoff;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogSplitEntry);
rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer;
rdata[1].data = tupstore;
rdata[1].len = tupstoresize;
rdata[1].next = NULL;
*newlpage = lpage; *newlpage = lpage;
*newrpage = rpage; *newrpage = rpage;
} }

View File

@ -108,26 +108,19 @@ writeListPage(Relation index, Buffer buffer,
if (RelationNeedsWAL(index)) if (RelationNeedsWAL(index))
{ {
XLogRecData rdata[2];
ginxlogInsertListPage data; ginxlogInsertListPage data;
XLogRecPtr recptr; XLogRecPtr recptr;
data.node = index->rd_node;
data.blkno = BufferGetBlockNumber(buffer);
data.rightlink = rightlink; data.rightlink = rightlink;
data.ntuples = ntuples; data.ntuples = ntuples;
rdata[0].buffer = InvalidBuffer; XLogBeginInsert();
rdata[0].data = (char *) &data; XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
rdata[0].len = sizeof(ginxlogInsertListPage);
rdata[0].next = rdata + 1;
rdata[1].buffer = InvalidBuffer; XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
rdata[1].data = workspace; XLogRegisterBufData(0, workspace, size);
rdata[1].len = size;
rdata[1].next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, rdata); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -224,26 +217,23 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
Buffer metabuffer; Buffer metabuffer;
Page metapage; Page metapage;
GinMetaPageData *metadata = NULL; GinMetaPageData *metadata = NULL;
XLogRecData rdata[2];
Buffer buffer = InvalidBuffer; Buffer buffer = InvalidBuffer;
Page page = NULL; Page page = NULL;
ginxlogUpdateMeta data; ginxlogUpdateMeta data;
bool separateList = false; bool separateList = false;
bool needCleanup = false; bool needCleanup = false;
int cleanupSize; int cleanupSize;
bool needWal;
if (collector->ntuples == 0) if (collector->ntuples == 0)
return; return;
needWal = RelationNeedsWAL(index);
data.node = index->rd_node; data.node = index->rd_node;
data.ntuples = 0; data.ntuples = 0;
data.newRightlink = data.prevTail = InvalidBlockNumber; data.newRightlink = data.prevTail = InvalidBlockNumber;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogUpdateMeta);
rdata[0].next = NULL;
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuffer); metapage = BufferGetPage(metabuffer);
@ -283,6 +273,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
memset(&sublist, 0, sizeof(GinMetaPageData)); memset(&sublist, 0, sizeof(GinMetaPageData));
makeSublist(index, collector->tuples, collector->ntuples, &sublist); makeSublist(index, collector->tuples, collector->ntuples, &sublist);
if (needWal)
XLogBeginInsert();
/* /*
* metapage was unlocked, see above * metapage was unlocked, see above
*/ */
@ -315,14 +308,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
LockBuffer(buffer, GIN_EXCLUSIVE); LockBuffer(buffer, GIN_EXCLUSIVE);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
rdata[0].next = rdata + 1;
rdata[1].buffer = buffer;
rdata[1].buffer_std = true;
rdata[1].data = NULL;
rdata[1].len = 0;
rdata[1].next = NULL;
Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber); Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
START_CRIT_SECTION(); START_CRIT_SECTION();
@ -336,6 +321,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
metadata->nPendingPages += sublist.nPendingPages; metadata->nPendingPages += sublist.nPendingPages;
metadata->nPendingHeapTuples += sublist.nPendingHeapTuples; metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
if (needWal)
XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
} }
} }
else else
@ -348,6 +336,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
int i, int i,
tupsize; tupsize;
char *ptr; char *ptr;
char *collectordata;
buffer = ReadBuffer(index, metadata->tail); buffer = ReadBuffer(index, metadata->tail);
LockBuffer(buffer, GIN_EXCLUSIVE); LockBuffer(buffer, GIN_EXCLUSIVE);
@ -356,16 +345,13 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
off = (PageIsEmpty(page)) ? FirstOffsetNumber : off = (PageIsEmpty(page)) ? FirstOffsetNumber :
OffsetNumberNext(PageGetMaxOffsetNumber(page)); OffsetNumberNext(PageGetMaxOffsetNumber(page));
rdata[0].next = rdata + 1; collectordata = ptr = (char *) palloc(collector->sumsize);
rdata[1].buffer = buffer;
rdata[1].buffer_std = true;
ptr = rdata[1].data = (char *) palloc(collector->sumsize);
rdata[1].len = collector->sumsize;
rdata[1].next = NULL;
data.ntuples = collector->ntuples; data.ntuples = collector->ntuples;
if (needWal)
XLogBeginInsert();
START_CRIT_SECTION(); START_CRIT_SECTION();
/* /*
@ -390,7 +376,12 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
off++; off++;
} }
Assert((ptr - rdata[1].data) <= collector->sumsize); Assert((ptr - collectordata) <= collector->sumsize);
if (needWal)
{
XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
XLogRegisterBufData(1, collectordata, collector->sumsize);
}
metadata->tailFreeSize = PageGetExactFreeSpace(page); metadata->tailFreeSize = PageGetExactFreeSpace(page);
@ -402,13 +393,16 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
*/ */
MarkBufferDirty(metabuffer); MarkBufferDirty(metabuffer);
if (RelationNeedsWAL(index)) if (needWal)
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData)); memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata); XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
PageSetLSN(metapage, recptr); PageSetLSN(metapage, recptr);
if (buffer != InvalidBuffer) if (buffer != InvalidBuffer)
@ -526,20 +520,11 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
int i; int i;
int64 nDeletedHeapTuples = 0; int64 nDeletedHeapTuples = 0;
ginxlogDeleteListPages data; ginxlogDeleteListPages data;
XLogRecData rdata[1];
Buffer buffers[GIN_NDELETE_AT_ONCE]; Buffer buffers[GIN_NDELETE_AT_ONCE];
data.node = index->rd_node;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogDeleteListPages);
rdata[0].next = NULL;
data.ndeleted = 0; data.ndeleted = 0;
while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead) while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
{ {
data.toDelete[data.ndeleted] = blknoToDelete;
buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete); buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete);
LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE); LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE);
page = BufferGetPage(buffers[data.ndeleted]); page = BufferGetPage(buffers[data.ndeleted]);
@ -562,6 +547,13 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
if (stats) if (stats)
stats->pages_deleted += data.ndeleted; stats->pages_deleted += data.ndeleted;
/*
* This operation touches an unusually large number of pages, so
* prepare the XLogInsert machinery for that before entering the
* critical section.
*/
XLogEnsureRecordSpace(data.ndeleted, 0);
START_CRIT_SECTION(); START_CRIT_SECTION();
metadata->head = blknoToDelete; metadata->head = blknoToDelete;
@ -592,9 +584,17 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogBeginInsert();
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
for (i = 0; i < data.ndeleted; i++)
XLogRegisterBuffer(i + 1, buffers[i], REGBUF_WILL_INIT);
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData)); memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata); XLogRegisterData((char *) &data,
sizeof(ginxlogDeleteListPages));
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE);
PageSetLSN(metapage, recptr); PageSetLSN(metapage, recptr);
for (i = 0; i < data.ndeleted; i++) for (i = 0; i < data.ndeleted; i++)

View File

@ -347,15 +347,13 @@ ginbuild(PG_FUNCTION_ARGS)
if (RelationNeedsWAL(index)) if (RelationNeedsWAL(index))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata;
Page page; Page page;
rdata.buffer = InvalidBuffer; XLogBeginInsert();
rdata.data = (char *) &(index->rd_node); XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT);
rdata.len = sizeof(RelFileNode); XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
rdata.next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
page = BufferGetPage(RootBuffer); page = BufferGetPage(RootBuffer);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);

View File

@ -605,19 +605,17 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
ginxlogUpdateMeta data; ginxlogUpdateMeta data;
XLogRecData rdata;
data.node = index->rd_node; data.node = index->rd_node;
data.ntuples = 0; data.ntuples = 0;
data.newRightlink = data.prevTail = InvalidBlockNumber; data.newRightlink = data.prevTail = InvalidBlockNumber;
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData)); memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
rdata.buffer = InvalidBuffer; XLogBeginInsert();
rdata.data = (char *) &data; XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
rdata.len = sizeof(ginxlogUpdateMeta); XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
rdata.next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
PageSetLSN(metapage, recptr); PageSetLSN(metapage, recptr);
} }

View File

@ -89,10 +89,6 @@ xlogVacuumPage(Relation index, Buffer buffer)
{ {
Page page = BufferGetPage(buffer); Page page = BufferGetPage(buffer);
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[3];
ginxlogVacuumPage xlrec;
uint16 lower;
uint16 upper;
/* This is only used for entry tree leaf pages. */ /* This is only used for entry tree leaf pages. */
Assert(!GinPageIsData(page)); Assert(!GinPageIsData(page));
@ -101,57 +97,14 @@ xlogVacuumPage(Relation index, Buffer buffer)
if (!RelationNeedsWAL(index)) if (!RelationNeedsWAL(index))
return; return;
xlrec.node = index->rd_node; /*
xlrec.blkno = BufferGetBlockNumber(buffer); * Always create a full image, we don't track the changes on the page at
* any more fine-grained level. This could obviously be improved...
*/
XLogBeginInsert();
XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
/* Assume we can omit data between pd_lower and pd_upper */ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
lower = ((PageHeader) page)->pd_lower;
upper = ((PageHeader) page)->pd_upper;
Assert(lower < BLCKSZ);
Assert(upper < BLCKSZ);
if (lower >= SizeOfPageHeaderData &&
upper > lower &&
upper <= BLCKSZ)
{
xlrec.hole_offset = lower;
xlrec.hole_length = upper - lower;
}
else
{
/* No "hole" to compress out */
xlrec.hole_offset = 0;
xlrec.hole_length = 0;
}
rdata[0].data = (char *) &xlrec;
rdata[0].len = sizeof(ginxlogVacuumPage);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &rdata[1];
if (xlrec.hole_length == 0)
{
rdata[1].data = (char *) page;
rdata[1].len = BLCKSZ;
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
}
else
{
/* must skip the hole */
rdata[1].data = (char *) page;
rdata[1].len = xlrec.hole_offset;
rdata[1].buffer = InvalidBuffer;
rdata[1].next = &rdata[2];
rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
rdata[2].buffer = InvalidBuffer;
rdata[2].next = NULL;
}
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -292,48 +245,27 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
if (RelationNeedsWAL(gvs->index)) if (RelationNeedsWAL(gvs->index))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[4];
ginxlogDeletePage data; ginxlogDeletePage data;
data.node = gvs->index->rd_node; /*
data.blkno = deleteBlkno; * We can't pass REGBUF_STANDARD for the deleted page, because we
data.parentBlkno = parentBlkno; * didn't set pd_lower on pre-9.4 versions. The page might've been
* binary-upgraded from an older version, and hence not have pd_lower
* set correctly. Ditto for the left page, but removing the item from
* the parent updated its pd_lower, so we know that's OK at this
* point.
*/
XLogBeginInsert();
XLogRegisterBuffer(0, dBuffer, 0);
XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
XLogRegisterBuffer(2, lBuffer, 0);
data.parentOffset = myoff; data.parentOffset = myoff;
data.leftBlkno = leftBlkno;
data.rightLink = GinPageGetOpaque(page)->rightlink; data.rightLink = GinPageGetOpaque(page)->rightlink;
/* XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
* We can't pass buffer_std = TRUE, because we didn't set pd_lower on
* pre-9.4 versions. The page might've been binary-upgraded from an
* older version, and hence not have pd_lower set correctly. Ditto for
* the left page, but removing the item from the parent updated its
* pd_lower, so we know that's OK at this point.
*/
rdata[0].buffer = dBuffer;
rdata[0].buffer_std = FALSE;
rdata[0].data = NULL;
rdata[0].len = 0;
rdata[0].next = rdata + 1;
rdata[1].buffer = pBuffer; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
rdata[1].buffer_std = TRUE;
rdata[1].data = NULL;
rdata[1].len = 0;
rdata[1].next = rdata + 2;
rdata[2].buffer = lBuffer;
rdata[2].buffer_std = FALSE;
rdata[2].data = NULL;
rdata[2].len = 0;
rdata[2].next = rdata + 3;
rdata[3].buffer = InvalidBuffer;
rdata[3].buffer_std = FALSE;
rdata[3].len = sizeof(ginxlogDeletePage);
rdata[3].data = (char *) &data;
rdata[3].next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetLSN(parentPage, recptr); PageSetLSN(parentPage, recptr);
PageSetLSN(BufferGetPage(lBuffer), recptr); PageSetLSN(BufferGetPage(lBuffer), recptr);

View File

@ -20,18 +20,15 @@
static MemoryContext opCtx; /* working memory for operations */ static MemoryContext opCtx; /* working memory for operations */
static void static void
ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record, ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
int block_index,
RelFileNode node, BlockNumber blkno)
{ {
XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer; Buffer buffer;
Page page; Page page;
if (XLogReadBufferForRedo(lsn, record, block_index, node, blkno, &buffer) if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO)
== BLK_NEEDS_REDO)
{ {
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT; GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
@ -42,18 +39,15 @@ ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
} }
static void static void
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) ginRedoCreateIndex(XLogReaderState *record)
{ {
RelFileNode *node = (RelFileNode *) XLogRecGetData(record); XLogRecPtr lsn = record->EndRecPtr;
Buffer RootBuffer, Buffer RootBuffer,
MetaBuffer; MetaBuffer;
Page page; Page page;
/* Backup blocks are not used in create_index records */ MetaBuffer = XLogInitBufferForRedo(record, 0);
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
Assert(BufferIsValid(MetaBuffer));
page = (Page) BufferGetPage(MetaBuffer); page = (Page) BufferGetPage(MetaBuffer);
GinInitMetabuffer(MetaBuffer); GinInitMetabuffer(MetaBuffer);
@ -61,8 +55,8 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(MetaBuffer); MarkBufferDirty(MetaBuffer);
RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true); RootBuffer = XLogInitBufferForRedo(record, 1);
Assert(BufferIsValid(RootBuffer)); Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
page = (Page) BufferGetPage(RootBuffer); page = (Page) BufferGetPage(RootBuffer);
GinInitBuffer(RootBuffer, GIN_LEAF); GinInitBuffer(RootBuffer, GIN_LEAF);
@ -75,18 +69,15 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record) ginRedoCreatePTree(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record); ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
char *ptr; char *ptr;
Buffer buffer; Buffer buffer;
Page page; Page page;
/* Backup blocks are not used in create_ptree records */ buffer = XLogInitBufferForRedo(record, 0);
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
buffer = XLogReadBuffer(data->node, data->blkno, true);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED); GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
@ -328,35 +319,40 @@ ginRedoInsertData(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdat
} }
static void static void
ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) ginRedoInsert(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record); ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
char *payload; #ifdef NOT_USED
BlockNumber leftChildBlkno = InvalidBlockNumber; BlockNumber leftChildBlkno = InvalidBlockNumber;
#endif
BlockNumber rightChildBlkno = InvalidBlockNumber; BlockNumber rightChildBlkno = InvalidBlockNumber;
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0; bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
/* /*
* First clear incomplete-split flag on child page if this finishes a * First clear incomplete-split flag on child page if this finishes a
* split. * split.
*/ */
if (!isLeaf) if (!isLeaf)
{ {
char *payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
#ifdef NOT_USED
leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload); leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
#endif
payload += sizeof(BlockIdData); payload += sizeof(BlockIdData);
rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload); rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
payload += sizeof(BlockIdData); payload += sizeof(BlockIdData);
ginRedoClearIncompleteSplit(lsn, record, 0, data->node, leftChildBlkno); ginRedoClearIncompleteSplit(record, 1);
} }
if (XLogReadBufferForRedo(lsn, record, isLeaf ? 0 : 1, data->node, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
data->blkno, &buffer) == BLK_NEEDS_REDO)
{ {
Page page = BufferGetPage(buffer); Page page = BufferGetPage(buffer);
Size len;
char *payload = XLogRecGetBlockData(record, 0, &len);
/* How to insert the payload is tree-type specific */ /* How to insert the payload is tree-type specific */
if (data->flags & GIN_INSERT_ISDATA) if (data->flags & GIN_INSERT_ISDATA)
@ -378,161 +374,33 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
ginRedoSplitEntry(Page lpage, Page rpage, void *rdata) ginRedoSplit(XLogReaderState *record)
{
ginxlogSplitEntry *data = (ginxlogSplitEntry *) rdata;
IndexTuple itup = (IndexTuple) ((char *) rdata + sizeof(ginxlogSplitEntry));
OffsetNumber i;
for (i = 0; i < data->separator; i++)
{
if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to gin index page");
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
}
for (i = data->separator; i < data->nitem; i++)
{
if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to gin index page");
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
}
}
static void
ginRedoSplitData(Page lpage, Page rpage, void *rdata)
{
bool isleaf = GinPageIsLeaf(lpage);
if (isleaf)
{
ginxlogSplitDataLeaf *data = (ginxlogSplitDataLeaf *) rdata;
Pointer lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf);
Pointer rptr = lptr + data->lsize;
Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize);
Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize);
memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize);
memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize);
GinDataPageSetDataSize(lpage, data->lsize);
GinDataPageSetDataSize(rpage, data->rsize);
*GinDataPageGetRightBound(lpage) = data->lrightbound;
*GinDataPageGetRightBound(rpage) = data->rrightbound;
}
else
{
ginxlogSplitDataInternal *data = (ginxlogSplitDataInternal *) rdata;
PostingItem *items = (PostingItem *) ((char *) rdata + sizeof(ginxlogSplitDataInternal));
OffsetNumber i;
OffsetNumber maxoff;
for (i = 0; i < data->separator; i++)
GinDataPageAddPostingItem(lpage, &items[i], InvalidOffsetNumber);
for (i = data->separator; i < data->nitem; i++)
GinDataPageAddPostingItem(rpage, &items[i], InvalidOffsetNumber);
/* set up right key */
maxoff = GinPageGetOpaque(lpage)->maxoff;
*GinDataPageGetRightBound(lpage) = GinDataPageGetPostingItem(lpage, maxoff)->key;
*GinDataPageGetRightBound(rpage) = data->rightbound;
}
}
static void
ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
{ {
ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record); ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
Buffer lbuffer, Buffer lbuffer,
rbuffer; rbuffer,
Page lpage, rootbuf;
rpage;
uint32 flags;
uint32 lflags,
rflags;
char *payload;
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0; bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
bool isData = (data->flags & GIN_INSERT_ISDATA) != 0;
bool isRoot = (data->flags & GIN_SPLIT_ROOT) != 0; bool isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
/* /*
* First clear incomplete-split flag on child page if this finishes a * First clear incomplete-split flag on child page if this finishes a
* split * split
*/ */
if (!isLeaf) if (!isLeaf)
ginRedoClearIncompleteSplit(lsn, record, 0, data->node, data->leftChildBlkno); ginRedoClearIncompleteSplit(record, 3);
flags = 0; if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)
if (isLeaf) elog(ERROR, "GIN split record did not contain a full-page image of left page");
flags |= GIN_LEAF;
if (isData)
flags |= GIN_DATA;
if (isLeaf && isData)
flags |= GIN_COMPRESSED;
lflags = rflags = flags; if (XLogReadBufferForRedo(record, 1, &rbuffer) != BLK_RESTORED)
if (!isRoot) elog(ERROR, "GIN split record did not contain a full-page image of right page");
lflags |= GIN_INCOMPLETE_SPLIT;
lbuffer = XLogReadBuffer(data->node, data->lblkno, true);
Assert(BufferIsValid(lbuffer));
lpage = (Page) BufferGetPage(lbuffer);
GinInitBuffer(lbuffer, lflags);
rbuffer = XLogReadBuffer(data->node, data->rblkno, true);
Assert(BufferIsValid(rbuffer));
rpage = (Page) BufferGetPage(rbuffer);
GinInitBuffer(rbuffer, rflags);
GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
GinPageGetOpaque(rpage)->rightlink = isRoot ? InvalidBlockNumber : data->rrlink;
/* Do the tree-type specific portion to restore the page contents */
if (isData)
ginRedoSplitData(lpage, rpage, payload);
else
ginRedoSplitEntry(lpage, rpage, payload);
PageSetLSN(rpage, lsn);
MarkBufferDirty(rbuffer);
PageSetLSN(lpage, lsn);
MarkBufferDirty(lbuffer);
if (isRoot) if (isRoot)
{ {
BlockNumber rootBlkno = data->rrlink; if (XLogReadBufferForRedo(record, 2, &rootbuf) != BLK_RESTORED)
Buffer rootBuf = XLogReadBuffer(data->node, rootBlkno, true); elog(ERROR, "GIN split record did not contain a full-page image of root page");
Page rootPage = BufferGetPage(rootBuf); UnlockReleaseBuffer(rootbuf);
GinInitBuffer(rootBuf, flags & ~GIN_LEAF & ~GIN_COMPRESSED);
if (isData)
{
Assert(rootBlkno != GIN_ROOT_BLKNO);
ginDataFillRoot(NULL, BufferGetPage(rootBuf),
BufferGetBlockNumber(lbuffer),
BufferGetPage(lbuffer),
BufferGetBlockNumber(rbuffer),
BufferGetPage(rbuffer));
}
else
{
Assert(rootBlkno == GIN_ROOT_BLKNO);
ginEntryFillRoot(NULL, BufferGetPage(rootBuf),
BufferGetBlockNumber(lbuffer),
BufferGetPage(lbuffer),
BufferGetBlockNumber(rbuffer),
BufferGetPage(rbuffer));
}
PageSetLSN(rootPage, lsn);
MarkBufferDirty(rootBuf);
UnlockReleaseBuffer(rootBuf);
} }
UnlockReleaseBuffer(rbuffer); UnlockReleaseBuffer(rbuffer);
@ -544,54 +412,30 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
* a XLOG_FPI record. * a XLOG_FPI record.
*/ */
static void static void
ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record) ginRedoVacuumPage(XLogReaderState *record)
{ {
ginxlogVacuumPage *xlrec = (ginxlogVacuumPage *) XLogRecGetData(record);
char *blk = ((char *) xlrec) + sizeof(ginxlogVacuumPage);
Buffer buffer; Buffer buffer;
Page page;
Assert(xlrec->hole_offset < BLCKSZ); if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
Assert(xlrec->hole_length < BLCKSZ);
/* Backup blocks are not used, we'll re-initialize the page always. */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, true);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (xlrec->hole_length == 0)
{ {
memcpy((char *) page, blk, BLCKSZ); elog(ERROR, "replay of gin entry tree page vacuum did not restore the page");
} }
else
{
memcpy((char *) page, blk, xlrec->hole_offset);
/* must zero-fill the hole */
MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length);
memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length),
blk + xlrec->hole_offset,
BLCKSZ - (xlrec->hole_offset + xlrec->hole_length));
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
} }
static void static void
ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record) ginRedoVacuumDataLeafPage(XLogReaderState *record)
{ {
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record); XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer; Buffer buffer;
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->blkno, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
Page page = BufferGetPage(buffer); Page page = BufferGetPage(buffer);
Size len;
ginxlogVacuumDataLeafPage *xlrec;
xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len);
Assert(GinPageIsLeaf(page)); Assert(GinPageIsLeaf(page));
Assert(GinPageIsData(page)); Assert(GinPageIsData(page));
@ -605,30 +449,27 @@ ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) ginRedoDeletePage(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record); ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
Buffer dbuffer; Buffer dbuffer;
Buffer pbuffer; Buffer pbuffer;
Buffer lbuffer; Buffer lbuffer;
Page page; Page page;
if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->blkno, &dbuffer) if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO)
== BLK_NEEDS_REDO)
{ {
page = BufferGetPage(dbuffer); page = BufferGetPage(dbuffer);
Assert(GinPageIsData(page)); Assert(GinPageIsData(page));
GinPageGetOpaque(page)->flags = GIN_DELETED; GinPageGetOpaque(page)->flags = GIN_DELETED;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(dbuffer); MarkBufferDirty(dbuffer);
} }
if (XLogReadBufferForRedo(lsn, record, 1, data->node, data->parentBlkno, if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO)
&pbuffer) == BLK_NEEDS_REDO)
{ {
page = BufferGetPage(pbuffer); page = BufferGetPage(pbuffer);
Assert(GinPageIsData(page)); Assert(GinPageIsData(page));
Assert(!GinPageIsLeaf(page)); Assert(!GinPageIsLeaf(page));
GinPageDeletePostingItem(page, data->parentOffset); GinPageDeletePostingItem(page, data->parentOffset);
@ -636,11 +477,9 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
MarkBufferDirty(pbuffer); MarkBufferDirty(pbuffer);
} }
if (XLogReadBufferForRedo(lsn, record, 2, data->node, data->leftBlkno, if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO)
&lbuffer) == BLK_NEEDS_REDO)
{ {
page = BufferGetPage(lbuffer); page = BufferGetPage(lbuffer);
Assert(GinPageIsData(page)); Assert(GinPageIsData(page));
GinPageGetOpaque(page)->rightlink = data->rightLink; GinPageGetOpaque(page)->rightlink = data->rightLink;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
@ -656,8 +495,9 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record) ginRedoUpdateMetapage(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record); ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record);
Buffer metabuffer; Buffer metabuffer;
Page metapage; Page metapage;
@ -668,9 +508,8 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
* image, so restore the metapage unconditionally without looking at the * image, so restore the metapage unconditionally without looking at the
* LSN, to avoid torn page hazards. * LSN, to avoid torn page hazards.
*/ */
metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false); metabuffer = XLogInitBufferForRedo(record, 0);
if (!BufferIsValid(metabuffer)) Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
return; /* assume index was deleted, nothing to do */
metapage = BufferGetPage(metabuffer); metapage = BufferGetPage(metabuffer);
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
@ -682,17 +521,18 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
/* /*
* insert into tail page * insert into tail page
*/ */
if (XLogReadBufferForRedo(lsn, record, 0, data->node, if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
data->metadata.tail, &buffer)
== BLK_NEEDS_REDO)
{ {
Page page = BufferGetPage(buffer); Page page = BufferGetPage(buffer);
OffsetNumber off; OffsetNumber off;
int i; int i;
Size tupsize; Size tupsize;
char *payload;
IndexTuple tuples; IndexTuple tuples;
Size totaltupsize;
tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta)); payload = XLogRecGetBlockData(record, 1, &totaltupsize);
tuples = (IndexTuple) payload;
if (PageIsEmpty(page)) if (PageIsEmpty(page))
off = FirstOffsetNumber; off = FirstOffsetNumber;
@ -711,6 +551,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
off++; off++;
} }
Assert(payload + totaltupsize == (char *) tuples);
/* /*
* Increase counter of heap tuples * Increase counter of heap tuples
@ -728,8 +569,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
/* /*
* New tail * New tail
*/ */
if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->prevTail, if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
Page page = BufferGetPage(buffer); Page page = BufferGetPage(buffer);
@ -746,8 +586,9 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record) ginRedoInsertListPage(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record); ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
Page page; Page page;
@ -755,15 +596,12 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
off = FirstOffsetNumber; off = FirstOffsetNumber;
int i, int i,
tupsize; tupsize;
IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage)); char *payload;
IndexTuple tuples;
Size totaltupsize;
/* /* We always re-initialize the page. */
* Backup blocks are not used, we always re-initialize the page. buffer = XLogInitBufferForRedo(record, 0);
*/
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
buffer = XLogReadBuffer(data->node, data->blkno, true);
Assert(BufferIsValid(buffer));
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_LIST); GinInitBuffer(buffer, GIN_LIST);
@ -779,6 +617,9 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
GinPageGetOpaque(page)->maxoff = 0; GinPageGetOpaque(page)->maxoff = 0;
} }
payload = XLogRecGetBlockData(record, 0, &totaltupsize);
tuples = (IndexTuple) payload;
for (i = 0; i < data->ntuples; i++) for (i = 0; i < data->ntuples; i++)
{ {
tupsize = IndexTupleSize(tuples); tupsize = IndexTupleSize(tuples);
@ -791,6 +632,7 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
tuples = (IndexTuple) (((char *) tuples) + tupsize); tuples = (IndexTuple) (((char *) tuples) + tupsize);
off++; off++;
} }
Assert((char *) tuples == payload + totaltupsize);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
@ -799,21 +641,20 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record) ginRedoDeleteListPages(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record); ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record);
Buffer metabuffer; Buffer metabuffer;
Page metapage; Page metapage;
int i; int i;
/* Backup blocks are not used in delete_listpage records */ metabuffer = XLogInitBufferForRedo(record, 0);
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
if (!BufferIsValid(metabuffer))
return; /* assume index was deleted, nothing to do */
metapage = BufferGetPage(metabuffer); metapage = BufferGetPage(metabuffer);
GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer));
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
PageSetLSN(metapage, lsn); PageSetLSN(metapage, lsn);
MarkBufferDirty(metabuffer); MarkBufferDirty(metabuffer);
@ -838,7 +679,7 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
Buffer buffer; Buffer buffer;
Page page; Page page;
buffer = XLogReadBuffer(data->node, data->toDelete[i], true); buffer = XLogInitBufferForRedo(record, i + 1);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_DELETED); GinInitBuffer(buffer, GIN_DELETED);
@ -851,9 +692,9 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
} }
void void
gin_redo(XLogRecPtr lsn, XLogRecord *record) gin_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
MemoryContext oldCtx; MemoryContext oldCtx;
/* /*
@ -866,34 +707,34 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record)
switch (info) switch (info)
{ {
case XLOG_GIN_CREATE_INDEX: case XLOG_GIN_CREATE_INDEX:
ginRedoCreateIndex(lsn, record); ginRedoCreateIndex(record);
break; break;
case XLOG_GIN_CREATE_PTREE: case XLOG_GIN_CREATE_PTREE:
ginRedoCreatePTree(lsn, record); ginRedoCreatePTree(record);
break; break;
case XLOG_GIN_INSERT: case XLOG_GIN_INSERT:
ginRedoInsert(lsn, record); ginRedoInsert(record);
break; break;
case XLOG_GIN_SPLIT: case XLOG_GIN_SPLIT:
ginRedoSplit(lsn, record); ginRedoSplit(record);
break; break;
case XLOG_GIN_VACUUM_PAGE: case XLOG_GIN_VACUUM_PAGE:
ginRedoVacuumPage(lsn, record); ginRedoVacuumPage(record);
break; break;
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE: case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
ginRedoVacuumDataLeafPage(lsn, record); ginRedoVacuumDataLeafPage(record);
break; break;
case XLOG_GIN_DELETE_PAGE: case XLOG_GIN_DELETE_PAGE:
ginRedoDeletePage(lsn, record); ginRedoDeletePage(record);
break; break;
case XLOG_GIN_UPDATE_META_PAGE: case XLOG_GIN_UPDATE_META_PAGE:
ginRedoUpdateMetapage(lsn, record); ginRedoUpdateMetapage(record);
break; break;
case XLOG_GIN_INSERT_LISTPAGE: case XLOG_GIN_INSERT_LISTPAGE:
ginRedoInsertListPage(lsn, record); ginRedoInsertListPage(record);
break; break;
case XLOG_GIN_DELETE_LISTPAGE: case XLOG_GIN_DELETE_LISTPAGE:
ginRedoDeleteListPages(lsn, record); ginRedoDeleteListPages(record);
break; break;
default: default:
elog(PANIC, "gin_redo: unknown op code %u", info); elog(PANIC, "gin_redo: unknown op code %u", info);

View File

@ -16,6 +16,7 @@
#include "access/genam.h" #include "access/genam.h"
#include "access/gist_private.h" #include "access/gist_private.h"
#include "access/xloginsert.h"
#include "catalog/index.h" #include "catalog/index.h"
#include "catalog/pg_collation.h" #include "catalog/pg_collation.h"
#include "miscadmin.h" #include "miscadmin.h"
@ -394,6 +395,14 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
GistPageSetNSN(ptr->page, oldnsn); GistPageSetNSN(ptr->page, oldnsn);
} }
/*
* gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
* insertion for that. NB: The number of pages and data segments
* specified here must match the calculations in gistXLogSplit()!
*/
if (RelationNeedsWAL(rel))
XLogEnsureRecordSpace(npage, 1 + npage * 2);
START_CRIT_SECTION(); START_CRIT_SECTION();
/* /*

View File

@ -183,14 +183,11 @@ gistbuild(PG_FUNCTION_ARGS)
if (RelationNeedsWAL(index)) if (RelationNeedsWAL(index))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata;
rdata.data = (char *) &(index->rd_node); XLogBeginInsert();
rdata.len = sizeof(RelFileNode); XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
else else

View File

@ -18,18 +18,6 @@
#include "access/xlogutils.h" #include "access/xlogutils.h"
#include "utils/memutils.h" #include "utils/memutils.h"
typedef struct
{
gistxlogPage *header;
IndexTuple *itup;
} NewPage;
typedef struct
{
gistxlogPageSplit *data;
NewPage *page;
} PageSplitRecord;
static MemoryContext opCtx; /* working memory for operations */ static MemoryContext opCtx; /* working memory for operations */
/* /*
@ -44,9 +32,9 @@ static MemoryContext opCtx; /* working memory for operations */
* action.) * action.)
*/ */
static void static void
gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index, gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
RelFileNode node, BlockNumber childblkno)
{ {
XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer; Buffer buffer;
Page page; Page page;
XLogRedoAction action; XLogRedoAction action;
@ -55,8 +43,7 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
* Note that we still update the page even if it was restored from a full * Note that we still update the page even if it was restored from a full
* page image, because the updated NSN is not included in the image. * page image, because the updated NSN is not included in the image.
*/ */
action = XLogReadBufferForRedo(lsn, record, block_index, node, childblkno, action = XLogReadBufferForRedo(record, block_id, &buffer);
&buffer);
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED) if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
{ {
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
@ -75,20 +62,23 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
* redo any page update (except page split) * redo any page update (except page split)
*/ */
static void static void
gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record) gistRedoPageUpdateRecord(XLogReaderState *record)
{ {
char *begin = XLogRecGetData(record); XLogRecPtr lsn = record->EndRecPtr;
gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) begin; gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
Page page; Page page;
char *data;
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
page = (Page) BufferGetPage(buffer); char *begin;
char *data;
Size datalen;
int ninserted = 0;
data = begin + sizeof(gistxlogPageUpdate); data = begin = XLogRecGetBlockData(record, 0, &datalen);
page = (Page) BufferGetPage(buffer);
/* Delete old tuples */ /* Delete old tuples */
if (xldata->ntodelete > 0) if (xldata->ntodelete > 0)
@ -105,12 +95,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
} }
/* add tuples */ /* add tuples */
if (data - begin < record->xl_len) if (data - begin < datalen)
{ {
OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
OffsetNumberNext(PageGetMaxOffsetNumber(page)); OffsetNumberNext(PageGetMaxOffsetNumber(page));
while (data - begin < record->xl_len) while (data - begin < datalen)
{ {
IndexTuple itup = (IndexTuple) data; IndexTuple itup = (IndexTuple) data;
Size sz = IndexTupleSize(itup); Size sz = IndexTupleSize(itup);
@ -123,9 +113,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
elog(ERROR, "failed to add item to GiST index page, size %d bytes", elog(ERROR, "failed to add item to GiST index page, size %d bytes",
(int) sz); (int) sz);
off++; off++;
ninserted++;
} }
} }
Assert(ninserted == xldata->ntoinsert);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
} }
@ -137,58 +130,51 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
* that even if the target page no longer exists, we still attempt to * that even if the target page no longer exists, we still attempt to
* replay the change on the child page. * replay the change on the child page.
*/ */
if (BlockNumberIsValid(xldata->leftchild)) if (XLogRecHasBlockRef(record, 1))
gistRedoClearFollowRight(lsn, record, 1, gistRedoClearFollowRight(record, 1);
xldata->node, xldata->leftchild);
if (BufferIsValid(buffer)) if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
} }
static void /*
decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record) * Returns an array of index pointers.
*/
static IndexTuple *
decodePageSplitRecord(char *begin, int len, int *n)
{ {
char *begin = XLogRecGetData(record), char *ptr;
*ptr; int i = 0;
int j, IndexTuple *tuples;
i = 0;
decoded->data = (gistxlogPageSplit *) begin; /* extract the number of tuples */
decoded->page = (NewPage *) palloc(sizeof(NewPage) * decoded->data->npage); memcpy(n, begin, sizeof(int));
ptr = begin + sizeof(int);
ptr = begin + sizeof(gistxlogPageSplit); tuples = palloc(*n * sizeof(IndexTuple));
for (i = 0; i < decoded->data->npage; i++)
for (i = 0; i < *n; i++)
{ {
Assert(ptr - begin < record->xl_len); Assert(ptr - begin < len);
decoded->page[i].header = (gistxlogPage *) ptr; tuples[i] = (IndexTuple) ptr;
ptr += sizeof(gistxlogPage); ptr += IndexTupleSize((IndexTuple) ptr);
decoded->page[i].itup = (IndexTuple *)
palloc(sizeof(IndexTuple) * decoded->page[i].header->num);
j = 0;
while (j < decoded->page[i].header->num)
{
Assert(ptr - begin < record->xl_len);
decoded->page[i].itup[j] = (IndexTuple) ptr;
ptr += IndexTupleSize((IndexTuple) ptr);
j++;
}
} }
Assert(ptr - begin == len);
return tuples;
} }
static void static void
gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record) gistRedoPageSplitRecord(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record); gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
PageSplitRecord xlrec;
Buffer firstbuffer = InvalidBuffer; Buffer firstbuffer = InvalidBuffer;
Buffer buffer; Buffer buffer;
Page page; Page page;
int i; int i;
bool isrootsplit = false; bool isrootsplit = false;
decodePageSplitRecord(&xlrec, record);
/* /*
* We must hold lock on the first-listed page throughout the action, * We must hold lock on the first-listed page throughout the action,
* including while updating the left child page (if any). We can unlock * including while updating the left child page (if any). We can unlock
@ -198,32 +184,39 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
*/ */
/* loop around all pages */ /* loop around all pages */
for (i = 0; i < xlrec.data->npage; i++) for (i = 0; i < xldata->npage; i++)
{ {
NewPage *newpage = xlrec.page + i;
int flags; int flags;
char *data;
Size datalen;
int num;
BlockNumber blkno;
IndexTuple *tuples;
if (newpage->header->blkno == GIST_ROOT_BLKNO) XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno);
if (blkno == GIST_ROOT_BLKNO)
{ {
Assert(i == 0); Assert(i == 0);
isrootsplit = true; isrootsplit = true;
} }
buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true); buffer = XLogInitBufferForRedo(record, i + 1);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
data = XLogRecGetBlockData(record, i + 1, &datalen);
tuples = decodePageSplitRecord(data, datalen, &num);
/* ok, clear buffer */ /* ok, clear buffer */
if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO) if (xldata->origleaf && blkno != GIST_ROOT_BLKNO)
flags = F_LEAF; flags = F_LEAF;
else else
flags = 0; flags = 0;
GISTInitBuffer(buffer, flags); GISTInitBuffer(buffer, flags);
/* and fill it */ /* and fill it */
gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber); gistfillbuffer(page, tuples, num, FirstOffsetNumber);
if (newpage->header->blkno == GIST_ROOT_BLKNO) if (blkno == GIST_ROOT_BLKNO)
{ {
GistPageGetOpaque(page)->rightlink = InvalidBlockNumber; GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
GistPageSetNSN(page, xldata->orignsn); GistPageSetNSN(page, xldata->orignsn);
@ -231,12 +224,17 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
} }
else else
{ {
if (i < xlrec.data->npage - 1) if (i < xldata->npage - 1)
GistPageGetOpaque(page)->rightlink = xlrec.page[i + 1].header->blkno; {
BlockNumber nextblkno;
XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno);
GistPageGetOpaque(page)->rightlink = nextblkno;
}
else else
GistPageGetOpaque(page)->rightlink = xldata->origrlink; GistPageGetOpaque(page)->rightlink = xldata->origrlink;
GistPageSetNSN(page, xldata->orignsn); GistPageSetNSN(page, xldata->orignsn);
if (i < xlrec.data->npage - 1 && !isrootsplit && if (i < xldata->npage - 1 && !isrootsplit &&
xldata->markfollowright) xldata->markfollowright)
GistMarkFollowRight(page); GistMarkFollowRight(page);
else else
@ -253,26 +251,22 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
} }
/* Fix follow-right data on left child page, if any */ /* Fix follow-right data on left child page, if any */
if (BlockNumberIsValid(xldata->leftchild)) if (XLogRecHasBlockRef(record, 0))
gistRedoClearFollowRight(lsn, record, 0, gistRedoClearFollowRight(record, 0);
xldata->node, xldata->leftchild);
/* Finally, release lock on the first page */ /* Finally, release lock on the first page */
UnlockReleaseBuffer(firstbuffer); UnlockReleaseBuffer(firstbuffer);
} }
static void static void
gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) gistRedoCreateIndex(XLogReaderState *record)
{ {
RelFileNode *node = (RelFileNode *) XLogRecGetData(record); XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer; Buffer buffer;
Page page; Page page;
/* Backup blocks are not used in create_index records */ buffer = XLogInitBufferForRedo(record, 0);
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
GISTInitBuffer(buffer, F_LEAF); GISTInitBuffer(buffer, F_LEAF);
@ -284,9 +278,9 @@ gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
} }
void void
gist_redo(XLogRecPtr lsn, XLogRecord *record) gist_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
MemoryContext oldCxt; MemoryContext oldCxt;
/* /*
@ -299,13 +293,13 @@ gist_redo(XLogRecPtr lsn, XLogRecord *record)
switch (info) switch (info)
{ {
case XLOG_GIST_PAGE_UPDATE: case XLOG_GIST_PAGE_UPDATE:
gistRedoPageUpdateRecord(lsn, record); gistRedoPageUpdateRecord(record);
break; break;
case XLOG_GIST_PAGE_SPLIT: case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(lsn, record); gistRedoPageSplitRecord(record);
break; break;
case XLOG_GIST_CREATE_INDEX: case XLOG_GIST_CREATE_INDEX:
gistRedoCreateIndex(lsn, record); gistRedoCreateIndex(record);
break; break;
default: default:
elog(PANIC, "gist_redo: unknown op code %u", info); elog(PANIC, "gist_redo: unknown op code %u", info);
@ -336,70 +330,49 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
BlockNumber origrlink, GistNSN orignsn, BlockNumber origrlink, GistNSN orignsn,
Buffer leftchildbuf, bool markfollowright) Buffer leftchildbuf, bool markfollowright)
{ {
XLogRecData rdata[GIST_MAX_SPLIT_PAGES * 2 + 2];
gistxlogPageSplit xlrec; gistxlogPageSplit xlrec;
SplitedPageLayout *ptr; SplitedPageLayout *ptr;
int npage = 0, int npage = 0;
cur;
XLogRecPtr recptr; XLogRecPtr recptr;
int i;
for (ptr = dist; ptr; ptr = ptr->next) for (ptr = dist; ptr; ptr = ptr->next)
npage++; npage++;
/*
* the caller should've checked this already, but doesn't hurt to check
* again.
*/
if (npage > GIST_MAX_SPLIT_PAGES)
elog(ERROR, "GiST page split into too many halves");
xlrec.node = node;
xlrec.origblkno = blkno;
xlrec.origrlink = origrlink; xlrec.origrlink = origrlink;
xlrec.orignsn = orignsn; xlrec.orignsn = orignsn;
xlrec.origleaf = page_is_leaf; xlrec.origleaf = page_is_leaf;
xlrec.npage = (uint16) npage; xlrec.npage = (uint16) npage;
xlrec.leftchild =
BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
xlrec.markfollowright = markfollowright; xlrec.markfollowright = markfollowright;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = sizeof(gistxlogPageSplit);
rdata[0].buffer = InvalidBuffer;
cur = 1;
/* /*
* Include a full page image of the child buf. (only necessary if a * Include a full page image of the child buf. (only necessary if a
* checkpoint happened since the child page was split) * checkpoint happened since the child page was split)
*/ */
if (BufferIsValid(leftchildbuf)) if (BufferIsValid(leftchildbuf))
{ XLogRegisterBuffer(0, leftchildbuf, REGBUF_STANDARD);
rdata[cur - 1].next = &(rdata[cur]);
rdata[cur].data = NULL;
rdata[cur].len = 0;
rdata[cur].buffer = leftchildbuf;
rdata[cur].buffer_std = true;
cur++;
}
/*
* NOTE: We register a lot of data. The caller must've called
* XLogEnsureRecordSpace() to prepare for that. We cannot do it here,
* because we're already in a critical section. If you change the number
* of buffer or data registrations here, make sure you modify the
* XLogEnsureRecordSpace() calls accordingly!
*/
XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageSplit));
i = 1;
for (ptr = dist; ptr; ptr = ptr->next) for (ptr = dist; ptr; ptr = ptr->next)
{ {
rdata[cur - 1].next = &(rdata[cur]); XLogRegisterBuffer(i, ptr->buffer, REGBUF_WILL_INIT);
rdata[cur].buffer = InvalidBuffer; XLogRegisterBufData(i, (char *) &(ptr->block.num), sizeof(int));
rdata[cur].data = (char *) &(ptr->block); XLogRegisterBufData(i, (char *) ptr->list, ptr->lenlist);
rdata[cur].len = sizeof(gistxlogPage); i++;
cur++;
rdata[cur - 1].next = &(rdata[cur]);
rdata[cur].buffer = InvalidBuffer;
rdata[cur].data = (char *) (ptr->list);
rdata[cur].len = ptr->lenlist;
cur++;
} }
rdata[cur - 1].next = NULL;
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT);
return recptr; return recptr;
} }
@ -413,9 +386,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
* *
* Note that both the todelete array and the tuples are marked as belonging * Note that both the todelete array and the tuples are marked as belonging
* to the target buffer; they need not be stored in XLOG if XLogInsert decides * to the target buffer; they need not be stored in XLOG if XLogInsert decides
* to log the whole buffer contents instead. Also, we take care that there's * to log the whole buffer contents instead.
* at least one rdata item referencing the buffer, even when ntodelete and
* ituplen are both zero; this ensures that XLogInsert knows about the buffer.
*/ */
XLogRecPtr XLogRecPtr
gistXLogUpdate(RelFileNode node, Buffer buffer, gistXLogUpdate(RelFileNode node, Buffer buffer,
@ -423,57 +394,31 @@ gistXLogUpdate(RelFileNode node, Buffer buffer,
IndexTuple *itup, int ituplen, IndexTuple *itup, int ituplen,
Buffer leftchildbuf) Buffer leftchildbuf)
{ {
XLogRecData rdata[MaxIndexTuplesPerPage + 3];
gistxlogPageUpdate xlrec; gistxlogPageUpdate xlrec;
int cur, int i;
i;
XLogRecPtr recptr; XLogRecPtr recptr;
xlrec.node = node;
xlrec.blkno = BufferGetBlockNumber(buffer);
xlrec.ntodelete = ntodelete; xlrec.ntodelete = ntodelete;
xlrec.leftchild = xlrec.ntoinsert = ituplen;
BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = sizeof(gistxlogPageUpdate); XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageUpdate));
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) todelete; XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
rdata[1].len = sizeof(OffsetNumber) * ntodelete; XLogRegisterBufData(0, (char *) todelete, sizeof(OffsetNumber) * ntodelete);
rdata[1].buffer = buffer;
rdata[1].buffer_std = true;
cur = 2;
/* new tuples */ /* new tuples */
for (i = 0; i < ituplen; i++) for (i = 0; i < ituplen; i++)
{ XLogRegisterBufData(0, (char *) (itup[i]), IndexTupleSize(itup[i]));
rdata[cur - 1].next = &(rdata[cur]);
rdata[cur].data = (char *) (itup[i]);
rdata[cur].len = IndexTupleSize(itup[i]);
rdata[cur].buffer = buffer;
rdata[cur].buffer_std = true;
cur++;
}
/* /*
* Include a full page image of the child buf. (only necessary if a * Include a full page image of the child buf. (only necessary if a
* checkpoint happened since the child page was split) * checkpoint happened since the child page was split)
*/ */
if (BufferIsValid(leftchildbuf)) if (BufferIsValid(leftchildbuf))
{ XLogRegisterBuffer(1, leftchildbuf, REGBUF_STANDARD);
rdata[cur - 1].next = &(rdata[cur]);
rdata[cur].data = NULL;
rdata[cur].len = 0;
rdata[cur].buffer = leftchildbuf;
rdata[cur].buffer_std = true;
cur++;
}
rdata[cur - 1].next = NULL;
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE);
return recptr; return recptr;
} }

View File

@ -700,7 +700,7 @@ hashvacuumcleanup(PG_FUNCTION_ARGS)
void void
hash_redo(XLogRecPtr lsn, XLogRecord *record) hash_redo(XLogReaderState *record)
{ {
elog(PANIC, "hash_redo: unimplemented"); elog(PANIC, "hash_redo: unimplemented");
} }

File diff suppressed because it is too large Load Diff

View File

@ -865,7 +865,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
hash_seq_init(&seq_status, state->rs_logical_mappings); hash_seq_init(&seq_status, state->rs_logical_mappings);
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL) while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{ {
XLogRecData rdata[2];
char *waldata; char *waldata;
char *waldata_start; char *waldata_start;
xl_heap_rewrite_mapping xlrec; xl_heap_rewrite_mapping xlrec;
@ -889,11 +888,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
xlrec.offset = src->off; xlrec.offset = src->off;
xlrec.start_lsn = state->rs_begin_lsn; xlrec.start_lsn = state->rs_begin_lsn;
rdata[0].data = (char *) (&xlrec);
rdata[0].len = sizeof(xlrec);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
/* write all mappings consecutively */ /* write all mappings consecutively */
len = src->num_mappings * sizeof(LogicalRewriteMappingData); len = src->num_mappings * sizeof(LogicalRewriteMappingData);
waldata_start = waldata = palloc(len); waldata_start = waldata = palloc(len);
@ -934,13 +928,12 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
written, len))); written, len)));
src->off += len; src->off += len;
rdata[1].data = waldata_start; XLogBeginInsert();
rdata[1].len = len; XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
rdata[1].buffer = InvalidBuffer; XLogRegisterData(waldata_start, len);
rdata[1].next = NULL;
/* write xlog record */ /* write xlog record */
XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, rdata); XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE);
pfree(waldata_start); pfree(waldata_start);
} }
@ -1123,7 +1116,7 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,
* Replay XLOG_HEAP2_REWRITE records * Replay XLOG_HEAP2_REWRITE records
*/ */
void void
heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r) heap_xlog_logical_rewrite(XLogReaderState *r)
{ {
char path[MAXPGPATH]; char path[MAXPGPATH];
int fd; int fd;
@ -1138,7 +1131,7 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
xlrec->mapped_db, xlrec->mapped_rel, xlrec->mapped_db, xlrec->mapped_rel,
(uint32) (xlrec->start_lsn >> 32), (uint32) (xlrec->start_lsn >> 32),
(uint32) xlrec->start_lsn, (uint32) xlrec->start_lsn,
xlrec->mapped_xid, r->xl_xid); xlrec->mapped_xid, XLogRecGetXid(r));
fd = OpenTransientFile(path, fd = OpenTransientFile(path,
O_CREAT | O_WRONLY | PG_BINARY, O_CREAT | O_WRONLY | PG_BINARY,

View File

@ -837,37 +837,25 @@ _bt_insertonpg(Relation rel,
if (RelationNeedsWAL(rel)) if (RelationNeedsWAL(rel))
{ {
xl_btree_insert xlrec; xl_btree_insert xlrec;
BlockNumber xlleftchild;
xl_btree_metadata xlmeta; xl_btree_metadata xlmeta;
uint8 xlinfo; uint8 xlinfo;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[4];
XLogRecData *nextrdata;
IndexTupleData trunctuple; IndexTupleData trunctuple;
xlrec.target.node = rel->rd_node; xlrec.offnum = itup_off;
ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off);
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = SizeOfBtreeInsert; XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = nextrdata = &(rdata[1]);
if (P_ISLEAF(lpageop)) if (P_ISLEAF(lpageop))
xlinfo = XLOG_BTREE_INSERT_LEAF; xlinfo = XLOG_BTREE_INSERT_LEAF;
else else
{ {
/* /*
* Include the block number of the left child, whose * Register the left child whose INCOMPLETE_SPLIT flag was
* INCOMPLETE_SPLIT flag was cleared. * cleared.
*/ */
xlleftchild = BufferGetBlockNumber(cbuf); XLogRegisterBuffer(1, cbuf, REGBUF_STANDARD);
nextrdata->data = (char *) &xlleftchild;
nextrdata->len = sizeof(BlockNumber);
nextrdata->buffer = cbuf;
nextrdata->buffer_std = true;
nextrdata->next = nextrdata + 1;
nextrdata++;
xlinfo = XLOG_BTREE_INSERT_UPPER; xlinfo = XLOG_BTREE_INSERT_UPPER;
} }
@ -879,33 +867,25 @@ _bt_insertonpg(Relation rel,
xlmeta.fastroot = metad->btm_fastroot; xlmeta.fastroot = metad->btm_fastroot;
xlmeta.fastlevel = metad->btm_fastlevel; xlmeta.fastlevel = metad->btm_fastlevel;
nextrdata->data = (char *) &xlmeta; XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
nextrdata->len = sizeof(xl_btree_metadata); XLogRegisterBufData(2, (char *) &xlmeta, sizeof(xl_btree_metadata));
nextrdata->buffer = InvalidBuffer;
nextrdata->next = nextrdata + 1;
nextrdata++;
xlinfo = XLOG_BTREE_INSERT_META; xlinfo = XLOG_BTREE_INSERT_META;
} }
/* Read comments in _bt_pgaddtup */ /* Read comments in _bt_pgaddtup */
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop)) if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop))
{ {
trunctuple = *itup; trunctuple = *itup;
trunctuple.t_info = sizeof(IndexTupleData); trunctuple.t_info = sizeof(IndexTupleData);
nextrdata->data = (char *) &trunctuple; XLogRegisterBufData(0, (char *) &trunctuple,
nextrdata->len = sizeof(IndexTupleData); sizeof(IndexTupleData));
} }
else else
{ XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup));
nextrdata->data = (char *) itup;
nextrdata->len = IndexTupleDSize(*itup);
}
nextrdata->buffer = buf;
nextrdata->buffer_std = true;
nextrdata->next = NULL;
recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata); recptr = XLogInsert(RM_BTREE_ID, xlinfo);
if (BufferIsValid(metabuf)) if (BufferIsValid(metabuf))
{ {
@ -1260,56 +1240,37 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
xl_btree_split xlrec; xl_btree_split xlrec;
uint8 xlinfo; uint8 xlinfo;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[7];
XLogRecData *lastrdata;
BlockNumber cblkno;
xlrec.node = rel->rd_node;
xlrec.leftsib = origpagenumber;
xlrec.rightsib = rightpagenumber;
xlrec.rnext = ropaque->btpo_next;
xlrec.level = ropaque->btpo.level; xlrec.level = ropaque->btpo.level;
xlrec.firstright = firstright; xlrec.firstright = firstright;
xlrec.newitemoff = newitemoff;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = SizeOfBtreeSplit; XLogRegisterData((char *) &xlrec, SizeOfBtreeSplit);
rdata[0].buffer = InvalidBuffer;
lastrdata = &rdata[0]; XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBuffer(1, rbuf, REGBUF_WILL_INIT);
/* Log the right sibling, because we've changed its prev-pointer. */
if (!P_RIGHTMOST(ropaque))
XLogRegisterBuffer(2, sbuf, REGBUF_STANDARD);
if (BufferIsValid(cbuf))
XLogRegisterBuffer(3, cbuf, REGBUF_STANDARD);
/* /*
* Log the new item and its offset, if it was inserted on the left * Log the new item, if it was inserted on the left page. (If it was
* page. (If it was put on the right page, we don't need to explicitly * put on the right page, we don't need to explicitly WAL log it
* WAL log it because it's included with all the other items on the * because it's included with all the other items on the right page.)
* right page.) Show the new item as belonging to the left page * Show the new item as belonging to the left page buffer, so that it
* buffer, so that it is not stored if XLogInsert decides it needs a * is not stored if XLogInsert decides it needs a full-page image of
* full-page image of the left page. We store the offset anyway, * the left page. We store the offset anyway, though, to support
* though, to support archive compression of these records. * archive compression of these records.
*/ */
if (newitemonleft) if (newitemonleft)
{ XLogRegisterBufData(0, (char *) newitem, MAXALIGN(newitemsz));
lastrdata->next = lastrdata + 1;
lastrdata++;
lastrdata->data = (char *) &newitemoff;
lastrdata->len = sizeof(OffsetNumber);
lastrdata->buffer = InvalidBuffer;
lastrdata->next = lastrdata + 1;
lastrdata++;
lastrdata->data = (char *) newitem;
lastrdata->len = MAXALIGN(newitemsz);
lastrdata->buffer = buf; /* backup block 0 */
lastrdata->buffer_std = true;
}
/* Log left page */ /* Log left page */
if (!isleaf) if (!isleaf)
{ {
lastrdata->next = lastrdata + 1;
lastrdata++;
/* /*
* We must also log the left page's high key, because the right * We must also log the left page's high key, because the right
* page's leftmost key is suppressed on non-leaf levels. Show it * page's leftmost key is suppressed on non-leaf levels. Show it
@ -1319,43 +1280,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
*/ */
itemid = PageGetItemId(origpage, P_HIKEY); itemid = PageGetItemId(origpage, P_HIKEY);
item = (IndexTuple) PageGetItem(origpage, itemid); item = (IndexTuple) PageGetItem(origpage, itemid);
lastrdata->data = (char *) item; XLogRegisterBufData(0, (char *) item, MAXALIGN(IndexTupleSize(item)));
lastrdata->len = MAXALIGN(IndexTupleSize(item));
lastrdata->buffer = buf; /* backup block 0 */
lastrdata->buffer_std = true;
}
if (isleaf && !newitemonleft)
{
lastrdata->next = lastrdata + 1;
lastrdata++;
/*
* Although we don't need to WAL-log anything on the left page, we
* still need XLogInsert to consider storing a full-page image of
* the left page, so make an empty entry referencing that buffer.
* This also ensures that the left page is always backup block 0.
*/
lastrdata->data = NULL;
lastrdata->len = 0;
lastrdata->buffer = buf; /* backup block 0 */
lastrdata->buffer_std = true;
}
/*
* Log block number of left child, whose INCOMPLETE_SPLIT flag this
* insertion clears.
*/
if (!isleaf)
{
lastrdata->next = lastrdata + 1;
lastrdata++;
cblkno = BufferGetBlockNumber(cbuf);
lastrdata->data = (char *) &cblkno;
lastrdata->len = sizeof(BlockNumber);
lastrdata->buffer = cbuf; /* backup block 1 */
lastrdata->buffer_std = true;
} }
/* /*
@ -1370,35 +1295,16 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
* and so the item pointers can be reconstructed. See comments for * and so the item pointers can be reconstructed. See comments for
* _bt_restore_page(). * _bt_restore_page().
*/ */
lastrdata->next = lastrdata + 1; XLogRegisterBufData(1,
lastrdata++; (char *) rightpage + ((PageHeader) rightpage)->pd_upper,
((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper);
lastrdata->data = (char *) rightpage +
((PageHeader) rightpage)->pd_upper;
lastrdata->len = ((PageHeader) rightpage)->pd_special -
((PageHeader) rightpage)->pd_upper;
lastrdata->buffer = InvalidBuffer;
/* Log the right sibling, because we've changed its' prev-pointer. */
if (!P_RIGHTMOST(ropaque))
{
lastrdata->next = lastrdata + 1;
lastrdata++;
lastrdata->data = NULL;
lastrdata->len = 0;
lastrdata->buffer = sbuf; /* bkp block 1 (leaf) or 2 (non-leaf) */
lastrdata->buffer_std = true;
}
lastrdata->next = NULL;
if (isroot) if (isroot)
xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT; xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT;
else else
xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R; xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R;
recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata); recptr = XLogInsert(RM_BTREE_ID, xlinfo);
PageSetLSN(origpage, recptr); PageSetLSN(origpage, recptr);
PageSetLSN(rightpage, recptr); PageSetLSN(rightpage, recptr);
@ -2090,34 +1996,35 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
{ {
xl_btree_newroot xlrec; xl_btree_newroot xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[3]; xl_btree_metadata md;
xlrec.node = rel->rd_node;
xlrec.rootblk = rootblknum; xlrec.rootblk = rootblknum;
xlrec.level = metad->btm_level; xlrec.level = metad->btm_level;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = SizeOfBtreeNewroot; XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]); XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
md.root = rootblknum;
md.level = metad->btm_level;
md.fastroot = rootblknum;
md.fastlevel = metad->btm_level;
XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
/* /*
* Direct access to page is not good but faster - we should implement * Direct access to page is not good but faster - we should implement
* some new func in page API. * some new func in page API.
*/ */
rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper; XLogRegisterBufData(0,
rdata[1].len = ((PageHeader) rootpage)->pd_special - (char *) rootpage + ((PageHeader) rootpage)->pd_upper,
((PageHeader) rootpage)->pd_upper; ((PageHeader) rootpage)->pd_special -
rdata[1].buffer = InvalidBuffer; ((PageHeader) rootpage)->pd_upper);
rdata[1].next = &(rdata[2]);
/* Make a full-page image of the left child if needed */ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
rdata[2].data = NULL;
rdata[2].len = 0;
rdata[2].buffer = lbuf;
rdata[2].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
PageSetLSN(lpage, recptr); PageSetLSN(lpage, recptr);
PageSetLSN(rootpage, recptr); PageSetLSN(rootpage, recptr);

View File

@ -236,18 +236,25 @@ _bt_getroot(Relation rel, int access)
{ {
xl_btree_newroot xlrec; xl_btree_newroot xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata; xl_btree_metadata md;
XLogBeginInsert();
XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
md.root = rootblkno;
md.level = 0;
md.fastroot = rootblkno;
md.fastlevel = 0;
XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
xlrec.node = rel->rd_node;
xlrec.rootblk = rootblkno; xlrec.rootblk = rootblkno;
xlrec.level = 0; xlrec.level = 0;
rdata.data = (char *) &xlrec; XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
rdata.len = SizeOfBtreeNewroot;
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata); recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
PageSetLSN(rootpage, recptr); PageSetLSN(rootpage, recptr);
PageSetLSN(metapg, recptr); PageSetLSN(metapg, recptr);
@ -528,39 +535,23 @@ _bt_checkpage(Relation rel, Buffer buf)
static void static void
_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid) _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
{ {
if (!RelationNeedsWAL(rel)) xl_btree_reuse_page xlrec_reuse;
return;
/* No ereport(ERROR) until changes are logged */
START_CRIT_SECTION();
/* /*
* We don't do MarkBufferDirty here because we're about to initialise the * Note that we don't register the buffer with the record, because this
* page, and nobody else can see it yet. * operation doesn't modify the page. This record only exists to provide a
* conflict point for Hot Standby.
*/ */
/* XLOG stuff */ /* XLOG stuff */
{ xlrec_reuse.node = rel->rd_node;
XLogRecData rdata[1]; xlrec_reuse.block = blkno;
xl_btree_reuse_page xlrec_reuse; xlrec_reuse.latestRemovedXid = latestRemovedXid;
xlrec_reuse.node = rel->rd_node; XLogBeginInsert();
xlrec_reuse.block = blkno; XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage);
xlrec_reuse.latestRemovedXid = latestRemovedXid;
rdata[0].data = (char *) &xlrec_reuse;
rdata[0].len = SizeOfBtreeReusePage;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata); XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE);
/*
* We don't do PageSetLSN here because we're about to initialise the
* page, so no need.
*/
}
END_CRIT_SECTION();
} }
/* /*
@ -633,7 +624,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
* WAL record that will allow us to conflict with queries * WAL record that will allow us to conflict with queries
* running on standby. * running on standby.
*/ */
if (XLogStandbyInfoActive()) if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
{ {
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@ -830,17 +821,13 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
if (RelationNeedsWAL(rel)) if (RelationNeedsWAL(rel))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[2];
xl_btree_vacuum xlrec_vacuum; xl_btree_vacuum xlrec_vacuum;
xlrec_vacuum.node = rel->rd_node;
xlrec_vacuum.block = BufferGetBlockNumber(buf);
xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed; xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
rdata[0].data = (char *) &xlrec_vacuum;
rdata[0].len = SizeOfBtreeVacuum; XLogBeginInsert();
rdata[0].buffer = InvalidBuffer; XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
rdata[0].next = &(rdata[1]); XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
/* /*
* The target-offsets array is not in the buffer, but pretend that it * The target-offsets array is not in the buffer, but pretend that it
@ -848,20 +835,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
* need not be stored too. * need not be stored too.
*/ */
if (nitems > 0) if (nitems > 0)
{ XLogRegisterBufData(0, (char *) itemnos, nitems * sizeof(OffsetNumber));
rdata[1].data = (char *) itemnos;
rdata[1].len = nitems * sizeof(OffsetNumber);
}
else
{
rdata[1].data = NULL;
rdata[1].len = 0;
}
rdata[1].buffer = buf;
rdata[1].buffer_std = true;
rdata[1].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata); recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -919,36 +895,23 @@ _bt_delitems_delete(Relation rel, Buffer buf,
if (RelationNeedsWAL(rel)) if (RelationNeedsWAL(rel))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[3];
xl_btree_delete xlrec_delete; xl_btree_delete xlrec_delete;
xlrec_delete.node = rel->rd_node;
xlrec_delete.hnode = heapRel->rd_node; xlrec_delete.hnode = heapRel->rd_node;
xlrec_delete.block = BufferGetBlockNumber(buf);
xlrec_delete.nitems = nitems; xlrec_delete.nitems = nitems;
rdata[0].data = (char *) &xlrec_delete; XLogBeginInsert();
rdata[0].len = SizeOfBtreeDelete; XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
rdata[0].buffer = InvalidBuffer; XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
rdata[0].next = &(rdata[1]);
/* /*
* We need the target-offsets array whether or not we store the whole * We need the target-offsets array whether or not we store the whole
* buffer, to allow us to find the latestRemovedXid on a standby * buffer, to allow us to find the latestRemovedXid on a standby
* server. * server.
*/ */
rdata[1].data = (char *) itemnos; XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
rdata[1].len = nitems * sizeof(OffsetNumber);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = &(rdata[2]);
rdata[2].data = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
rdata[2].len = 0;
rdata[2].buffer = buf;
rdata[2].buffer_std = true;
rdata[2].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -1493,33 +1456,26 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
{ {
xl_btree_mark_page_halfdead xlrec; xl_btree_mark_page_halfdead xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[2];
xlrec.target.node = rel->rd_node; xlrec.poffset = topoff;
ItemPointerSet(&(xlrec.target.tid), BufferGetBlockNumber(topparent), topoff);
xlrec.leafblk = leafblkno; xlrec.leafblk = leafblkno;
if (target != leafblkno) if (target != leafblkno)
xlrec.topparent = target; xlrec.topparent = target;
else else
xlrec.topparent = InvalidBlockNumber; xlrec.topparent = InvalidBlockNumber;
XLogBeginInsert();
XLogRegisterBuffer(0, leafbuf, REGBUF_WILL_INIT);
XLogRegisterBuffer(1, topparent, REGBUF_STANDARD);
page = BufferGetPage(leafbuf); page = BufferGetPage(leafbuf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page);
xlrec.leftblk = opaque->btpo_prev; xlrec.leftblk = opaque->btpo_prev;
xlrec.rightblk = opaque->btpo_next; xlrec.rightblk = opaque->btpo_next;
rdata[0].data = (char *) &xlrec; XLogRegisterData((char *) &xlrec, SizeOfBtreeMarkPageHalfDead);
rdata[0].len = SizeOfBtreeMarkPageHalfDead;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD);
rdata[1].len = 0;
rdata[1].buffer = topparent;
rdata[1].buffer_std = true;
rdata[1].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD, rdata);
page = BufferGetPage(topparent); page = BufferGetPage(topparent);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
@ -1826,63 +1782,44 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
xl_btree_metadata xlmeta; xl_btree_metadata xlmeta;
uint8 xlinfo; uint8 xlinfo;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[4];
XLogRecData *nextrdata;
xlrec.node = rel->rd_node; XLogBeginInsert();
XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
if (BufferIsValid(lbuf))
XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
if (target != leafblkno)
XLogRegisterBuffer(3, leafbuf, REGBUF_WILL_INIT);
/* information on the unlinked block */ /* information on the unlinked block */
xlrec.deadblk = target;
xlrec.leftsib = leftsib; xlrec.leftsib = leftsib;
xlrec.rightsib = rightsib; xlrec.rightsib = rightsib;
xlrec.btpo_xact = opaque->btpo.xact; xlrec.btpo_xact = opaque->btpo.xact;
/* information needed to recreate the leaf block (if not the target) */ /* information needed to recreate the leaf block (if not the target) */
xlrec.leafblk = leafblkno;
xlrec.leafleftsib = leafleftsib; xlrec.leafleftsib = leafleftsib;
xlrec.leafrightsib = leafrightsib; xlrec.leafrightsib = leafrightsib;
xlrec.topparent = nextchild; xlrec.topparent = nextchild;
rdata[0].data = (char *) &xlrec; XLogRegisterData((char *) &xlrec, SizeOfBtreeUnlinkPage);
rdata[0].len = SizeOfBtreeUnlinkPage;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = nextrdata = &(rdata[1]);
if (BufferIsValid(metabuf)) if (BufferIsValid(metabuf))
{ {
XLogRegisterBuffer(4, metabuf, REGBUF_WILL_INIT);
xlmeta.root = metad->btm_root; xlmeta.root = metad->btm_root;
xlmeta.level = metad->btm_level; xlmeta.level = metad->btm_level;
xlmeta.fastroot = metad->btm_fastroot; xlmeta.fastroot = metad->btm_fastroot;
xlmeta.fastlevel = metad->btm_fastlevel; xlmeta.fastlevel = metad->btm_fastlevel;
nextrdata->data = (char *) &xlmeta; XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
nextrdata->len = sizeof(xl_btree_metadata);
nextrdata->buffer = InvalidBuffer;
nextrdata->next = nextrdata + 1;
nextrdata++;
xlinfo = XLOG_BTREE_UNLINK_PAGE_META; xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
} }
else else
xlinfo = XLOG_BTREE_UNLINK_PAGE; xlinfo = XLOG_BTREE_UNLINK_PAGE;
nextrdata->data = NULL; recptr = XLogInsert(RM_BTREE_ID, xlinfo);
nextrdata->len = 0;
nextrdata->buffer = rbuf;
nextrdata->buffer_std = true;
nextrdata->next = NULL;
if (BufferIsValid(lbuf))
{
nextrdata->next = nextrdata + 1;
nextrdata++;
nextrdata->data = NULL;
nextrdata->len = 0;
nextrdata->buffer = lbuf;
nextrdata->buffer_std = true;
nextrdata->next = NULL;
}
recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
if (BufferIsValid(metabuf)) if (BufferIsValid(metabuf))
{ {

View File

@ -72,17 +72,23 @@ _bt_restore_page(Page page, char *from, int len)
} }
static void static void
_bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn, _bt_restore_meta(XLogReaderState *record, uint8 block_id)
BlockNumber root, uint32 level,
BlockNumber fastroot, uint32 fastlevel)
{ {
XLogRecPtr lsn = record->EndRecPtr;
Buffer metabuf; Buffer metabuf;
Page metapg; Page metapg;
BTMetaPageData *md; BTMetaPageData *md;
BTPageOpaque pageop; BTPageOpaque pageop;
xl_btree_metadata *xlrec;
char *ptr;
Size len;
metabuf = XLogReadBuffer(rnode, BTREE_METAPAGE, true); metabuf = XLogInitBufferForRedo(record, block_id);
Assert(BufferIsValid(metabuf)); ptr = XLogRecGetBlockData(record, block_id, &len);
Assert(len == sizeof(xl_btree_metadata));
Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
xlrec = (xl_btree_metadata *) ptr;
metapg = BufferGetPage(metabuf); metapg = BufferGetPage(metabuf);
_bt_pageinit(metapg, BufferGetPageSize(metabuf)); _bt_pageinit(metapg, BufferGetPageSize(metabuf));
@ -90,10 +96,10 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
md = BTPageGetMeta(metapg); md = BTPageGetMeta(metapg);
md->btm_magic = BTREE_MAGIC; md->btm_magic = BTREE_MAGIC;
md->btm_version = BTREE_VERSION; md->btm_version = BTREE_VERSION;
md->btm_root = root; md->btm_root = xlrec->root;
md->btm_level = level; md->btm_level = xlrec->level;
md->btm_fastroot = fastroot; md->btm_fastroot = xlrec->fastroot;
md->btm_fastlevel = fastlevel; md->btm_fastlevel = xlrec->fastlevel;
pageop = (BTPageOpaque) PageGetSpecialPointer(metapg); pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
pageop->btpo_flags = BTP_META; pageop->btpo_flags = BTP_META;
@ -117,14 +123,12 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
* types that can insert a downlink: insert, split, and newroot. * types that can insert a downlink: insert, split, and newroot.
*/ */
static void static void
_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record, _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
int block_index,
RelFileNode rnode, BlockNumber cblock)
{ {
XLogRecPtr lsn = record->EndRecPtr;
Buffer buf; Buffer buf;
if (XLogReadBufferForRedo(lsn, record, block_index, rnode, cblock, &buf) if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
== BLK_NEEDS_REDO)
{ {
Page page = (Page) BufferGetPage(buf); Page page = (Page) BufferGetPage(buf);
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page); BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@ -140,38 +144,12 @@ _bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
} }
static void static void
btree_xlog_insert(bool isleaf, bool ismeta, btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
XLogRecPtr lsn, XLogRecord *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record); xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
Page page; Page page;
char *datapos;
int datalen;
xl_btree_metadata md;
BlockNumber cblkno = 0;
int main_blk_index;
datapos = (char *) xlrec + SizeOfBtreeInsert;
datalen = record->xl_len - SizeOfBtreeInsert;
/*
* if this insert finishes a split at lower level, extract the block
* number of the (left) child.
*/
if (!isleaf && (record->xl_info & XLR_BKP_BLOCK(0)) == 0)
{
memcpy(&cblkno, datapos, sizeof(BlockNumber));
Assert(cblkno != 0);
datapos += sizeof(BlockNumber);
datalen -= sizeof(BlockNumber);
}
if (ismeta)
{
memcpy(&md, datapos, sizeof(xl_btree_metadata));
datapos += sizeof(xl_btree_metadata);
datalen -= sizeof(xl_btree_metadata);
}
/* /*
* Insertion to an internal page finishes an incomplete split at the child * Insertion to an internal page finishes an incomplete split at the child
@ -183,21 +161,15 @@ btree_xlog_insert(bool isleaf, bool ismeta,
* cannot be updates happening. * cannot be updates happening.
*/ */
if (!isleaf) if (!isleaf)
_bt_clear_incomplete_split(record, 1);
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{ {
_bt_clear_incomplete_split(lsn, record, 0, xlrec->target.node, cblkno); Size datalen;
main_blk_index = 1; char *datapos = XLogRecGetBlockData(record, 0, &datalen);
}
else
main_blk_index = 0;
if (XLogReadBufferForRedo(lsn, record, main_blk_index, xlrec->target.node,
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
&buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
if (PageAddItem(page, (Item) datapos, datalen, if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
false, false) == InvalidOffsetNumber) false, false) == InvalidOffsetNumber)
elog(PANIC, "btree_insert_redo: failed to add item"); elog(PANIC, "btree_insert_redo: failed to add item");
@ -215,15 +187,13 @@ btree_xlog_insert(bool isleaf, bool ismeta,
* obsolete link from the metapage. * obsolete link from the metapage.
*/ */
if (ismeta) if (ismeta)
_bt_restore_meta(xlrec->target.node, lsn, _bt_restore_meta(record, 2);
md.root, md.level,
md.fastroot, md.fastlevel);
} }
static void static void
btree_xlog_split(bool onleft, bool isroot, btree_xlog_split(bool onleft, bool isroot, XLogReaderState *record)
XLogRecPtr lsn, XLogRecord *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record); xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
bool isleaf = (xlrec->level == 0); bool isleaf = (xlrec->level == 0);
Buffer lbuf; Buffer lbuf;
@ -231,56 +201,17 @@ btree_xlog_split(bool onleft, bool isroot,
Page rpage; Page rpage;
BTPageOpaque ropaque; BTPageOpaque ropaque;
char *datapos; char *datapos;
int datalen; Size datalen;
OffsetNumber newitemoff = 0;
Item newitem = NULL;
Size newitemsz = 0;
Item left_hikey = NULL; Item left_hikey = NULL;
Size left_hikeysz = 0; Size left_hikeysz = 0;
BlockNumber cblkno = InvalidBlockNumber; BlockNumber leftsib;
BlockNumber rightsib;
BlockNumber rnext;
datapos = (char *) xlrec + SizeOfBtreeSplit; XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib);
datalen = record->xl_len - SizeOfBtreeSplit; XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib);
if (!XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext))
/* Extract newitemoff and newitem, if present */ rnext = P_NONE;
if (onleft)
{
memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
datapos += sizeof(OffsetNumber);
datalen -= sizeof(OffsetNumber);
}
if (onleft && !(record->xl_info & XLR_BKP_BLOCK(0)))
{
/*
* We assume that 16-bit alignment is enough to apply IndexTupleSize
* (since it's fetching from a uint16 field) and also enough for
* PageAddItem to insert the tuple.
*/
newitem = (Item) datapos;
newitemsz = MAXALIGN(IndexTupleSize(newitem));
datapos += newitemsz;
datalen -= newitemsz;
}
/* Extract left hikey and its size (still assuming 16-bit alignment) */
if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(0)))
{
left_hikey = (Item) datapos;
left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
datapos += left_hikeysz;
datalen -= left_hikeysz;
}
/*
* If this insertion finishes an incomplete split, get the block number of
* the child.
*/
if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
{
memcpy(&cblkno, datapos, sizeof(BlockNumber));
datapos += sizeof(BlockNumber);
datalen -= sizeof(BlockNumber);
}
/* /*
* Clear the incomplete split flag on the left sibling of the child page * Clear the incomplete split flag on the left sibling of the child page
@ -288,18 +219,18 @@ btree_xlog_split(bool onleft, bool isroot,
* before locking the other pages) * before locking the other pages)
*/ */
if (!isleaf) if (!isleaf)
_bt_clear_incomplete_split(lsn, record, 1, xlrec->node, cblkno); _bt_clear_incomplete_split(record, 3);
/* Reconstruct right (new) sibling page from scratch */ /* Reconstruct right (new) sibling page from scratch */
rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true); rbuf = XLogInitBufferForRedo(record, 1);
Assert(BufferIsValid(rbuf)); datapos = XLogRecGetBlockData(record, 1, &datalen);
rpage = (Page) BufferGetPage(rbuf); rpage = (Page) BufferGetPage(rbuf);
_bt_pageinit(rpage, BufferGetPageSize(rbuf)); _bt_pageinit(rpage, BufferGetPageSize(rbuf));
ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage); ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
ropaque->btpo_prev = xlrec->leftsib; ropaque->btpo_prev = leftsib;
ropaque->btpo_next = xlrec->rnext; ropaque->btpo_next = rnext;
ropaque->btpo.level = xlrec->level; ropaque->btpo.level = xlrec->level;
ropaque->btpo_flags = isleaf ? BTP_LEAF : 0; ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
ropaque->btpo_cycleid = 0; ropaque->btpo_cycleid = 0;
@ -324,8 +255,7 @@ btree_xlog_split(bool onleft, bool isroot,
/* don't release the buffer yet; we touch right page's first item below */ /* don't release the buffer yet; we touch right page's first item below */
/* Now reconstruct left (original) sibling page */ /* Now reconstruct left (original) sibling page */
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->leftsib, if (XLogReadBufferForRedo(record, 0, &lbuf) == BLK_NEEDS_REDO)
&lbuf) == BLK_NEEDS_REDO)
{ {
/* /*
* To retain the same physical order of the tuples that they had, we * To retain the same physical order of the tuples that they had, we
@ -339,9 +269,31 @@ btree_xlog_split(bool onleft, bool isroot,
Page lpage = (Page) BufferGetPage(lbuf); Page lpage = (Page) BufferGetPage(lbuf);
BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage); BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
OffsetNumber off; OffsetNumber off;
Item newitem;
Size newitemsz = 0;
Page newlpage; Page newlpage;
OffsetNumber leftoff; OffsetNumber leftoff;
datapos = XLogRecGetBlockData(record, 0, &datalen);
if (onleft)
{
newitem = (Item) datapos;
newitemsz = MAXALIGN(IndexTupleSize(newitem));
datapos += newitemsz;
datalen -= newitemsz;
}
/* Extract left hikey and its size (assuming 16-bit alignment) */
if (!isleaf)
{
left_hikey = (Item) datapos;
left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
datapos += left_hikeysz;
datalen -= left_hikeysz;
}
Assert(datalen == 0);
newlpage = PageGetTempPageCopySpecial(lpage); newlpage = PageGetTempPageCopySpecial(lpage);
/* Set high key */ /* Set high key */
@ -358,7 +310,7 @@ btree_xlog_split(bool onleft, bool isroot,
Item item; Item item;
/* add the new item if it was inserted on left page */ /* add the new item if it was inserted on left page */
if (onleft && off == newitemoff) if (onleft && off == xlrec->newitemoff)
{ {
if (PageAddItem(newlpage, newitem, newitemsz, leftoff, if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
false, false) == InvalidOffsetNumber) false, false) == InvalidOffsetNumber)
@ -376,7 +328,7 @@ btree_xlog_split(bool onleft, bool isroot,
} }
/* cope with possibility that newitem goes at the end */ /* cope with possibility that newitem goes at the end */
if (onleft && off == newitemoff) if (onleft && off == xlrec->newitemoff)
{ {
if (PageAddItem(newlpage, newitem, newitemsz, leftoff, if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
false, false) == InvalidOffsetNumber) false, false) == InvalidOffsetNumber)
@ -390,7 +342,7 @@ btree_xlog_split(bool onleft, bool isroot,
lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT; lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
if (isleaf) if (isleaf)
lopaque->btpo_flags |= BTP_LEAF; lopaque->btpo_flags |= BTP_LEAF;
lopaque->btpo_next = xlrec->rightsib; lopaque->btpo_next = rightsib;
lopaque->btpo_cycleid = 0; lopaque->btpo_cycleid = 0;
PageSetLSN(lpage, lsn); PageSetLSN(lpage, lsn);
@ -410,22 +362,16 @@ btree_xlog_split(bool onleft, bool isroot,
* replay, because no other index update can be in progress, and readers * replay, because no other index update can be in progress, and readers
* will cope properly when following an obsolete left-link. * will cope properly when following an obsolete left-link.
*/ */
if (xlrec->rnext != P_NONE) if (rnext != P_NONE)
{ {
/*
* the backup block containing right sibling is 1 or 2, depending
* whether this was a leaf or internal page.
*/
int rnext_index = isleaf ? 1 : 2;
Buffer buffer; Buffer buffer;
if (XLogReadBufferForRedo(lsn, record, rnext_index, xlrec->node, if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
xlrec->rnext, &buffer) == BLK_NEEDS_REDO)
{ {
Page page = (Page) BufferGetPage(buffer); Page page = (Page) BufferGetPage(buffer);
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page); BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_prev = xlrec->rightsib; pageop->btpo_prev = rightsib;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
@ -436,8 +382,9 @@ btree_xlog_split(bool onleft, bool isroot,
} }
static void static void
btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record) btree_xlog_vacuum(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record); xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
Page page; Page page;
@ -466,9 +413,13 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
*/ */
if (HotStandbyActiveInReplay()) if (HotStandbyActiveInReplay())
{ {
RelFileNode thisrnode;
BlockNumber thisblkno;
BlockNumber blkno; BlockNumber blkno;
for (blkno = xlrec->lastBlockVacuumed + 1; blkno < xlrec->block; blkno++) XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
{ {
/* /*
* We use RBM_NORMAL_NO_LOG mode because it's not an error * We use RBM_NORMAL_NO_LOG mode because it's not an error
@ -483,7 +434,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
* buffer manager we could optimise this so that if the block is * buffer manager we could optimise this so that if the block is
* not in shared_buffers we confirm it as unpinned. * not in shared_buffers we confirm it as unpinned.
*/ */
buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
RBM_NORMAL_NO_LOG); RBM_NORMAL_NO_LOG);
if (BufferIsValid(buffer)) if (BufferIsValid(buffer))
{ {
@ -497,20 +448,23 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
* Like in btvacuumpage(), we need to take a cleanup lock on every leaf * Like in btvacuumpage(), we need to take a cleanup lock on every leaf
* page. See nbtree/README for details. * page. See nbtree/README for details.
*/ */
if (XLogReadBufferForRedoExtended(lsn, record, 0, if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
xlrec->node, MAIN_FORKNUM, xlrec->block,
RBM_NORMAL, true, &buffer)
== BLK_NEEDS_REDO) == BLK_NEEDS_REDO)
{ {
char *ptr;
Size len;
ptr = XLogRecGetBlockData(record, 0, &len);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
if (record->xl_len > SizeOfBtreeVacuum) if (len > 0)
{ {
OffsetNumber *unused; OffsetNumber *unused;
OffsetNumber *unend; OffsetNumber *unend;
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeVacuum); unused = (OffsetNumber *) ptr;
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len); unend = (OffsetNumber *) ((char *) ptr + len);
if ((unend - unused) > 0) if ((unend - unused) > 0)
PageIndexMultiDelete(page, unused, unend - unused); PageIndexMultiDelete(page, unused, unend - unused);
@ -542,13 +496,16 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
* XXX optimise later with something like XLogPrefetchBuffer() * XXX optimise later with something like XLogPrefetchBuffer()
*/ */
static TransactionId static TransactionId
btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec) btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record)
{ {
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
OffsetNumber *unused; OffsetNumber *unused;
Buffer ibuffer, Buffer ibuffer,
hbuffer; hbuffer;
Page ipage, Page ipage,
hpage; hpage;
RelFileNode rnode;
BlockNumber blkno;
ItemId iitemid, ItemId iitemid,
hitemid; hitemid;
IndexTuple itup; IndexTuple itup;
@ -588,9 +545,11 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
* InvalidTransactionId to cancel all HS transactions. That's probably * InvalidTransactionId to cancel all HS transactions. That's probably
* overkill, but it's safe, and certainly better than panicking here. * overkill, but it's safe, and certainly better than panicking here.
*/ */
ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false); XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL);
if (!BufferIsValid(ibuffer)) if (!BufferIsValid(ibuffer))
return InvalidTransactionId; return InvalidTransactionId;
LockBuffer(ibuffer, BT_READ);
ipage = (Page) BufferGetPage(ibuffer); ipage = (Page) BufferGetPage(ibuffer);
/* /*
@ -611,12 +570,13 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
* Locate the heap page that the index tuple points at * Locate the heap page that the index tuple points at
*/ */
hblkno = ItemPointerGetBlockNumber(&(itup->t_tid)); hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
hbuffer = XLogReadBuffer(xlrec->hnode, hblkno, false); hbuffer = XLogReadBufferExtended(xlrec->hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL);
if (!BufferIsValid(hbuffer)) if (!BufferIsValid(hbuffer))
{ {
UnlockReleaseBuffer(ibuffer); UnlockReleaseBuffer(ibuffer);
return InvalidTransactionId; return InvalidTransactionId;
} }
LockBuffer(hbuffer, BUFFER_LOCK_SHARE);
hpage = (Page) BufferGetPage(hbuffer); hpage = (Page) BufferGetPage(hbuffer);
/* /*
@ -678,8 +638,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
} }
static void static void
btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record) btree_xlog_delete(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record); xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
Page page; Page page;
@ -698,21 +659,23 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
*/ */
if (InHotStandby) if (InHotStandby)
{ {
TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(xlrec); TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(record);
RelFileNode rnode;
ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node); XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode);
} }
/* /*
* We don't need to take a cleanup lock to apply these changes. See * We don't need to take a cleanup lock to apply these changes. See
* nbtree/README for details. * nbtree/README for details.
*/ */
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
if (record->xl_len > SizeOfBtreeDelete) if (XLogRecGetDataLen(record) > SizeOfBtreeDelete)
{ {
OffsetNumber *unused; OffsetNumber *unused;
@ -736,17 +699,15 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record) btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record); xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
BlockNumber parent;
Buffer buffer; Buffer buffer;
Page page; Page page;
BTPageOpaque pageop; BTPageOpaque pageop;
IndexTupleData trunctuple; IndexTupleData trunctuple;
parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
/* /*
* In normal operation, we would lock all the pages this WAL record * In normal operation, we would lock all the pages this WAL record
* touches before changing any of them. In WAL replay, it should be okay * touches before changing any of them. In WAL replay, it should be okay
@ -756,8 +717,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
*/ */
/* parent page */ /* parent page */
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node, parent, if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
OffsetNumber poffset; OffsetNumber poffset;
ItemId itemid; ItemId itemid;
@ -768,7 +728,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop = (BTPageOpaque) PageGetSpecialPointer(page);
poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid)); poffset = xlrec->poffset;
nextoffset = OffsetNumberNext(poffset); nextoffset = OffsetNumberNext(poffset);
itemid = PageGetItemId(page, nextoffset); itemid = PageGetItemId(page, nextoffset);
@ -788,8 +748,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
/* Rewrite the leaf page as a halfdead page */ /* Rewrite the leaf page as a halfdead page */
buffer = XLogReadBuffer(xlrec->target.node, xlrec->leafblk, true); buffer = XLogInitBufferForRedo(record, 0);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer)); _bt_pageinit(page, BufferGetPageSize(buffer));
@ -822,17 +781,16 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
static void static void
btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record) btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record); xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
BlockNumber target;
BlockNumber leftsib; BlockNumber leftsib;
BlockNumber rightsib; BlockNumber rightsib;
Buffer buffer; Buffer buffer;
Page page; Page page;
BTPageOpaque pageop; BTPageOpaque pageop;
target = xlrec->deadblk;
leftsib = xlrec->leftsib; leftsib = xlrec->leftsib;
rightsib = xlrec->rightsib; rightsib = xlrec->rightsib;
@ -845,8 +803,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
*/ */
/* Fix left-link of right sibling */ /* Fix left-link of right sibling */
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, rightsib, &buffer) if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
== BLK_NEEDS_REDO)
{ {
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@ -861,8 +818,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
/* Fix right-link of left sibling, if any */ /* Fix right-link of left sibling, if any */
if (leftsib != P_NONE) if (leftsib != P_NONE)
{ {
if (XLogReadBufferForRedo(lsn, record, 1, xlrec->node, leftsib, &buffer) if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
== BLK_NEEDS_REDO)
{ {
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@ -876,8 +832,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
} }
/* Rewrite target page as empty deleted page */ /* Rewrite target page as empty deleted page */
buffer = XLogReadBuffer(xlrec->node, target, true); buffer = XLogInitBufferForRedo(record, 0);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer)); _bt_pageinit(page, BufferGetPageSize(buffer));
@ -898,7 +853,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
* itself, update the leaf to point to the next remaining child in the * itself, update the leaf to point to the next remaining child in the
* branch. * branch.
*/ */
if (target != xlrec->leafblk) if (XLogRecHasBlockRef(record, 3))
{ {
/* /*
* There is no real data on the page, so we just re-create it from * There is no real data on the page, so we just re-create it from
@ -906,8 +861,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
*/ */
IndexTupleData trunctuple; IndexTupleData trunctuple;
buffer = XLogReadBuffer(xlrec->node, xlrec->leafblk, true); buffer = XLogInitBufferForRedo(record, 3);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@ -936,27 +890,21 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
/* Update metapage if needed */ /* Update metapage if needed */
if (info == XLOG_BTREE_UNLINK_PAGE_META) if (info == XLOG_BTREE_UNLINK_PAGE_META)
{ _bt_restore_meta(record, 4);
xl_btree_metadata md;
memcpy(&md, (char *) xlrec + SizeOfBtreeUnlinkPage,
sizeof(xl_btree_metadata));
_bt_restore_meta(xlrec->node, lsn,
md.root, md.level,
md.fastroot, md.fastlevel);
}
} }
static void static void
btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record) btree_xlog_newroot(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record); xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
Page page; Page page;
BTPageOpaque pageop; BTPageOpaque pageop;
char *ptr;
Size len;
buffer = XLogReadBuffer(xlrec->node, xlrec->rootblk, true); buffer = XLogInitBufferForRedo(record, 0);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer)); _bt_pageinit(page, BufferGetPageSize(buffer));
@ -969,34 +917,24 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
pageop->btpo_flags |= BTP_LEAF; pageop->btpo_flags |= BTP_LEAF;
pageop->btpo_cycleid = 0; pageop->btpo_cycleid = 0;
if (record->xl_len > SizeOfBtreeNewroot) if (xlrec->level > 0)
{ {
IndexTuple itup; ptr = XLogRecGetBlockData(record, 0, &len);
BlockNumber cblkno; _bt_restore_page(page, ptr, len);
_bt_restore_page(page,
(char *) xlrec + SizeOfBtreeNewroot,
record->xl_len - SizeOfBtreeNewroot);
/* extract block number of the left-hand split page */
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_HIKEY));
cblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
/* Clear the incomplete-split flag in left child */ /* Clear the incomplete-split flag in left child */
_bt_clear_incomplete_split(lsn, record, 0, xlrec->node, cblkno); _bt_clear_incomplete_split(record, 1);
} }
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
_bt_restore_meta(xlrec->node, lsn, _bt_restore_meta(record, 2);
xlrec->rootblk, xlrec->level,
xlrec->rootblk, xlrec->level);
} }
static void static void
btree_xlog_reuse_page(XLogRecPtr lsn, XLogRecord *record) btree_xlog_reuse_page(XLogReaderState *record)
{ {
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record); xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
@ -1015,58 +953,55 @@ btree_xlog_reuse_page(XLogRecPtr lsn, XLogRecord *record)
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
xlrec->node); xlrec->node);
} }
/* Backup blocks are not used in reuse_page records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
} }
void void
btree_redo(XLogRecPtr lsn, XLogRecord *record) btree_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info) switch (info)
{ {
case XLOG_BTREE_INSERT_LEAF: case XLOG_BTREE_INSERT_LEAF:
btree_xlog_insert(true, false, lsn, record); btree_xlog_insert(true, false, record);
break; break;
case XLOG_BTREE_INSERT_UPPER: case XLOG_BTREE_INSERT_UPPER:
btree_xlog_insert(false, false, lsn, record); btree_xlog_insert(false, false, record);
break; break;
case XLOG_BTREE_INSERT_META: case XLOG_BTREE_INSERT_META:
btree_xlog_insert(false, true, lsn, record); btree_xlog_insert(false, true, record);
break; break;
case XLOG_BTREE_SPLIT_L: case XLOG_BTREE_SPLIT_L:
btree_xlog_split(true, false, lsn, record); btree_xlog_split(true, false, record);
break; break;
case XLOG_BTREE_SPLIT_R: case XLOG_BTREE_SPLIT_R:
btree_xlog_split(false, false, lsn, record); btree_xlog_split(false, false, record);
break; break;
case XLOG_BTREE_SPLIT_L_ROOT: case XLOG_BTREE_SPLIT_L_ROOT:
btree_xlog_split(true, true, lsn, record); btree_xlog_split(true, true, record);
break; break;
case XLOG_BTREE_SPLIT_R_ROOT: case XLOG_BTREE_SPLIT_R_ROOT:
btree_xlog_split(false, true, lsn, record); btree_xlog_split(false, true, record);
break; break;
case XLOG_BTREE_VACUUM: case XLOG_BTREE_VACUUM:
btree_xlog_vacuum(lsn, record); btree_xlog_vacuum(record);
break; break;
case XLOG_BTREE_DELETE: case XLOG_BTREE_DELETE:
btree_xlog_delete(lsn, record); btree_xlog_delete(record);
break; break;
case XLOG_BTREE_MARK_PAGE_HALFDEAD: case XLOG_BTREE_MARK_PAGE_HALFDEAD:
btree_xlog_mark_page_halfdead(info, lsn, record); btree_xlog_mark_page_halfdead(info, record);
break; break;
case XLOG_BTREE_UNLINK_PAGE: case XLOG_BTREE_UNLINK_PAGE:
case XLOG_BTREE_UNLINK_PAGE_META: case XLOG_BTREE_UNLINK_PAGE_META:
btree_xlog_unlink_page(info, lsn, record); btree_xlog_unlink_page(info, record);
break; break;
case XLOG_BTREE_NEWROOT: case XLOG_BTREE_NEWROOT:
btree_xlog_newroot(lsn, record); btree_xlog_newroot(record);
break; break;
case XLOG_BTREE_REUSE_PAGE: case XLOG_BTREE_REUSE_PAGE:
btree_xlog_reuse_page(lsn, record); btree_xlog_reuse_page(record);
break; break;
default: default:
elog(PANIC, "btree_redo: unknown op code %u", info); elog(PANIC, "btree_redo: unknown op code %u", info);

View File

@ -17,64 +17,49 @@
#include "access/brin_xlog.h" #include "access/brin_xlog.h"
void void
brin_desc(StringInfo buf, XLogRecord *record) brin_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
info &= XLOG_BRIN_OPMASK; info &= XLOG_BRIN_OPMASK;
if (info == XLOG_BRIN_CREATE_INDEX) if (info == XLOG_BRIN_CREATE_INDEX)
{ {
xl_brin_createidx *xlrec = (xl_brin_createidx *) rec; xl_brin_createidx *xlrec = (xl_brin_createidx *) rec;
appendStringInfo(buf, "v%d pagesPerRange %u rel %u/%u/%u", appendStringInfo(buf, "v%d pagesPerRange %u",
xlrec->version, xlrec->pagesPerRange, xlrec->version, xlrec->pagesPerRange);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode);
} }
else if (info == XLOG_BRIN_INSERT) else if (info == XLOG_BRIN_INSERT)
{ {
xl_brin_insert *xlrec = (xl_brin_insert *) rec; xl_brin_insert *xlrec = (xl_brin_insert *) rec;
appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u TID (%u,%u)", appendStringInfo(buf, "heapBlk %u pagesPerRange %u offnum %u",
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->heapBlk,
xlrec->node.relNode,
xlrec->heapBlk, xlrec->revmapBlk,
xlrec->pagesPerRange, xlrec->pagesPerRange,
ItemPointerGetBlockNumber(&xlrec->tid), xlrec->offnum);
ItemPointerGetOffsetNumber(&xlrec->tid));
} }
else if (info == XLOG_BRIN_UPDATE) else if (info == XLOG_BRIN_UPDATE)
{ {
xl_brin_update *xlrec = (xl_brin_update *) rec; xl_brin_update *xlrec = (xl_brin_update *) rec;
appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u old TID (%u,%u) TID (%u,%u)", appendStringInfo(buf, "heapBlk %u pagesPerRange %u old offnum %u, new offnum %u",
xlrec->insert.node.spcNode, xlrec->insert.node.dbNode, xlrec->insert.heapBlk,
xlrec->insert.node.relNode,
xlrec->insert.heapBlk, xlrec->insert.revmapBlk,
xlrec->insert.pagesPerRange, xlrec->insert.pagesPerRange,
ItemPointerGetBlockNumber(&xlrec->oldtid), xlrec->oldOffnum,
ItemPointerGetOffsetNumber(&xlrec->oldtid), xlrec->insert.offnum);
ItemPointerGetBlockNumber(&xlrec->insert.tid),
ItemPointerGetOffsetNumber(&xlrec->insert.tid));
} }
else if (info == XLOG_BRIN_SAMEPAGE_UPDATE) else if (info == XLOG_BRIN_SAMEPAGE_UPDATE)
{ {
xl_brin_samepage_update *xlrec = (xl_brin_samepage_update *) rec; xl_brin_samepage_update *xlrec = (xl_brin_samepage_update *) rec;
appendStringInfo(buf, "rel %u/%u/%u TID (%u,%u)", appendStringInfo(buf, "offnum %u", xlrec->offnum);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode,
ItemPointerGetBlockNumber(&xlrec->tid),
ItemPointerGetOffsetNumber(&xlrec->tid));
} }
else if (info == XLOG_BRIN_REVMAP_EXTEND) else if (info == XLOG_BRIN_REVMAP_EXTEND)
{ {
xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) rec; xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) rec;
appendStringInfo(buf, "rel %u/%u/%u targetBlk %u", appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->targetBlk);
} }
} }

View File

@ -18,10 +18,10 @@
void void
clog_desc(StringInfo buf, XLogRecord *record) clog_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == CLOG_ZEROPAGE || info == CLOG_TRUNCATE) if (info == CLOG_ZEROPAGE || info == CLOG_TRUNCATE)
{ {

View File

@ -19,10 +19,10 @@
void void
dbase_desc(StringInfo buf, XLogRecord *record) dbase_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_DBASE_CREATE) if (info == XLOG_DBASE_CREATE)
{ {

View File

@ -15,16 +15,10 @@
#include "postgres.h" #include "postgres.h"
#include "access/gin_private.h" #include "access/gin_private.h"
#include "access/xlogutils.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
static void
desc_node(StringInfo buf, RelFileNode node, BlockNumber blkno)
{
appendStringInfo(buf, "node: %u/%u/%u blkno: %u",
node.spcNode, node.dbNode, node.relNode, blkno);
}
static void static void
desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData) desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
{ {
@ -77,26 +71,25 @@ desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
} }
void void
gin_desc(StringInfo buf, XLogRecord *record) gin_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info) switch (info)
{ {
case XLOG_GIN_CREATE_INDEX: case XLOG_GIN_CREATE_INDEX:
desc_node(buf, *(RelFileNode *) rec, GIN_ROOT_BLKNO); /* no further information */
break; break;
case XLOG_GIN_CREATE_PTREE: case XLOG_GIN_CREATE_PTREE:
desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno); /* no further information */
break; break;
case XLOG_GIN_INSERT: case XLOG_GIN_INSERT:
{ {
ginxlogInsert *xlrec = (ginxlogInsert *) rec; ginxlogInsert *xlrec = (ginxlogInsert *) rec;
char *payload = rec + sizeof(ginxlogInsert); char *payload = rec + sizeof(ginxlogInsert);
desc_node(buf, xlrec->node, xlrec->blkno); appendStringInfo(buf, "isdata: %c isleaf: %c",
appendStringInfo(buf, " isdata: %c isleaf: %c",
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F', (xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F'); (xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
if (!(xlrec->flags & GIN_INSERT_ISLEAF)) if (!(xlrec->flags & GIN_INSERT_ISLEAF))
@ -119,7 +112,7 @@ gin_desc(StringInfo buf, XLogRecord *record)
ginxlogRecompressDataLeaf *insertData = ginxlogRecompressDataLeaf *insertData =
(ginxlogRecompressDataLeaf *) payload; (ginxlogRecompressDataLeaf *) payload;
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogRecHasBlockImage(record, 0))
appendStringInfo(buf, " (full page image)"); appendStringInfo(buf, " (full page image)");
else else
desc_recompress_leaf(buf, insertData); desc_recompress_leaf(buf, insertData);
@ -139,39 +132,38 @@ gin_desc(StringInfo buf, XLogRecord *record)
{ {
ginxlogSplit *xlrec = (ginxlogSplit *) rec; ginxlogSplit *xlrec = (ginxlogSplit *) rec;
desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno); appendStringInfo(buf, "isrootsplit: %c",
appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F'); (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
appendStringInfo(buf, " isdata: %c isleaf: %c", appendStringInfo(buf, " isdata: %c isleaf: %c",
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F', (xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F'); (xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
} }
break; break;
case XLOG_GIN_VACUUM_PAGE: case XLOG_GIN_VACUUM_PAGE:
desc_node(buf, ((ginxlogVacuumPage *) rec)->node, ((ginxlogVacuumPage *) rec)->blkno); /* no further information */
break; break;
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE: case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
{ {
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec; ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
desc_node(buf, xlrec->node, xlrec->blkno); if (XLogRecHasBlockImage(record, 0))
if (record->xl_info & XLR_BKP_BLOCK(0))
appendStringInfo(buf, " (full page image)"); appendStringInfo(buf, " (full page image)");
else else
desc_recompress_leaf(buf, &xlrec->data); desc_recompress_leaf(buf, &xlrec->data);
} }
break; break;
case XLOG_GIN_DELETE_PAGE: case XLOG_GIN_DELETE_PAGE:
desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno); /* no further information */
break; break;
case XLOG_GIN_UPDATE_META_PAGE: case XLOG_GIN_UPDATE_META_PAGE:
desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, GIN_METAPAGE_BLKNO); /* no further information */
break; break;
case XLOG_GIN_INSERT_LISTPAGE: case XLOG_GIN_INSERT_LISTPAGE:
desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno); /* no further information */
break; break;
case XLOG_GIN_DELETE_LISTPAGE: case XLOG_GIN_DELETE_LISTPAGE:
appendStringInfo(buf, "%d pages, ", ((ginxlogDeleteListPages *) rec)->ndeleted); appendStringInfo(buf, "ndeleted: %d",
desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, GIN_METAPAGE_BLKNO); ((ginxlogDeleteListPages *) rec)->ndeleted);
break; break;
} }
} }

View File

@ -18,34 +18,23 @@
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
static void
out_target(StringInfo buf, RelFileNode node)
{
appendStringInfo(buf, "rel %u/%u/%u",
node.spcNode, node.dbNode, node.relNode);
}
static void static void
out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec) out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec)
{ {
out_target(buf, xlrec->node);
appendStringInfo(buf, "; block number %u", xlrec->blkno);
} }
static void static void
out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec) out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
{ {
appendStringInfoString(buf, "page_split: "); appendStringInfo(buf, "page_split: splits to %d pages",
out_target(buf, xlrec->node); xlrec->npage);
appendStringInfo(buf, "; block number %u splits to %d pages",
xlrec->origblkno, xlrec->npage);
} }
void void
gist_desc(StringInfo buf, XLogRecord *record) gist_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info) switch (info)
{ {
@ -56,10 +45,6 @@ gist_desc(StringInfo buf, XLogRecord *record)
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec); out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break; break;
case XLOG_GIST_CREATE_INDEX: case XLOG_GIST_CREATE_INDEX:
appendStringInfo(buf, "rel %u/%u/%u",
((RelFileNode *) rec)->spcNode,
((RelFileNode *) rec)->dbNode,
((RelFileNode *) rec)->relNode);
break; break;
} }
} }

View File

@ -17,7 +17,7 @@
#include "access/hash.h" #include "access/hash.h"
void void
hash_desc(StringInfo buf, XLogRecord *record) hash_desc(StringInfo buf, XLogReaderState *record)
{ {
} }

View File

@ -16,15 +16,6 @@
#include "access/heapam_xlog.h" #include "access/heapam_xlog.h"
static void
out_target(StringInfo buf, xl_heaptid *target)
{
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
target->node.spcNode, target->node.dbNode, target->node.relNode,
ItemPointerGetBlockNumber(&(target->tid)),
ItemPointerGetOffsetNumber(&(target->tid)));
}
static void static void
out_infobits(StringInfo buf, uint8 infobits) out_infobits(StringInfo buf, uint8 infobits)
{ {
@ -41,23 +32,23 @@ out_infobits(StringInfo buf, uint8 infobits)
} }
void void
heap_desc(StringInfo buf, XLogRecord *record) heap_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
info &= XLOG_HEAP_OPMASK; info &= XLOG_HEAP_OPMASK;
if (info == XLOG_HEAP_INSERT) if (info == XLOG_HEAP_INSERT)
{ {
xl_heap_insert *xlrec = (xl_heap_insert *) rec; xl_heap_insert *xlrec = (xl_heap_insert *) rec;
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "off %u", xlrec->offnum);
} }
else if (info == XLOG_HEAP_DELETE) else if (info == XLOG_HEAP_DELETE)
{ {
xl_heap_delete *xlrec = (xl_heap_delete *) rec; xl_heap_delete *xlrec = (xl_heap_delete *) rec;
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "off %u", xlrec->offnum);
appendStringInfoChar(buf, ' '); appendStringInfoChar(buf, ' ');
out_infobits(buf, xlrec->infobits_set); out_infobits(buf, xlrec->infobits_set);
} }
@ -65,24 +56,24 @@ heap_desc(StringInfo buf, XLogRecord *record)
{ {
xl_heap_update *xlrec = (xl_heap_update *) rec; xl_heap_update *xlrec = (xl_heap_update *) rec;
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "off %u xmax %u",
appendStringInfo(buf, " xmax %u ", xlrec->old_xmax); xlrec->old_offnum,
xlrec->old_xmax);
out_infobits(buf, xlrec->old_infobits_set); out_infobits(buf, xlrec->old_infobits_set);
appendStringInfo(buf, "; new tid %u/%u xmax %u", appendStringInfo(buf, "; new off %u xmax %u",
ItemPointerGetBlockNumber(&(xlrec->newtid)), xlrec->new_offnum,
ItemPointerGetOffsetNumber(&(xlrec->newtid)),
xlrec->new_xmax); xlrec->new_xmax);
} }
else if (info == XLOG_HEAP_HOT_UPDATE) else if (info == XLOG_HEAP_HOT_UPDATE)
{ {
xl_heap_update *xlrec = (xl_heap_update *) rec; xl_heap_update *xlrec = (xl_heap_update *) rec;
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "off %u xmax %u",
appendStringInfo(buf, " xmax %u ", xlrec->old_xmax); xlrec->old_offnum,
xlrec->old_xmax);
out_infobits(buf, xlrec->old_infobits_set); out_infobits(buf, xlrec->old_infobits_set);
appendStringInfo(buf, "; new tid %u/%u xmax %u", appendStringInfo(buf, "; new off %u xmax %u",
ItemPointerGetBlockNumber(&(xlrec->newtid)), xlrec->new_offnum,
ItemPointerGetOffsetNumber(&(xlrec->newtid)),
xlrec->new_xmax); xlrec->new_xmax);
} }
else if (info == XLOG_HEAP_LOCK) else if (info == XLOG_HEAP_LOCK)
@ -90,40 +81,34 @@ heap_desc(StringInfo buf, XLogRecord *record)
xl_heap_lock *xlrec = (xl_heap_lock *) rec; xl_heap_lock *xlrec = (xl_heap_lock *) rec;
appendStringInfo(buf, "xid %u: ", xlrec->locking_xid); appendStringInfo(buf, "xid %u: ", xlrec->locking_xid);
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "off %u ", xlrec->offnum);
appendStringInfoChar(buf, ' ');
out_infobits(buf, xlrec->infobits_set); out_infobits(buf, xlrec->infobits_set);
} }
else if (info == XLOG_HEAP_INPLACE) else if (info == XLOG_HEAP_INPLACE)
{ {
xl_heap_inplace *xlrec = (xl_heap_inplace *) rec; xl_heap_inplace *xlrec = (xl_heap_inplace *) rec;
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "off %u", xlrec->offnum);
} }
} }
void void
heap2_desc(StringInfo buf, XLogRecord *record) heap2_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
info &= XLOG_HEAP_OPMASK; info &= XLOG_HEAP_OPMASK;
if (info == XLOG_HEAP2_CLEAN) if (info == XLOG_HEAP2_CLEAN)
{ {
xl_heap_clean *xlrec = (xl_heap_clean *) rec; xl_heap_clean *xlrec = (xl_heap_clean *) rec;
appendStringInfo(buf, "rel %u/%u/%u; blk %u remxid %u", appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block,
xlrec->latestRemovedXid);
} }
else if (info == XLOG_HEAP2_FREEZE_PAGE) else if (info == XLOG_HEAP2_FREEZE_PAGE)
{ {
xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec; xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec;
appendStringInfo(buf, "rel %u/%u/%u; blk %u; cutoff xid %u ntuples %u", appendStringInfo(buf, "cutoff xid %u ntuples %u",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block,
xlrec->cutoff_xid, xlrec->ntuples); xlrec->cutoff_xid, xlrec->ntuples);
} }
else if (info == XLOG_HEAP2_CLEANUP_INFO) else if (info == XLOG_HEAP2_CLEANUP_INFO)
@ -136,17 +121,13 @@ heap2_desc(StringInfo buf, XLogRecord *record)
{ {
xl_heap_visible *xlrec = (xl_heap_visible *) rec; xl_heap_visible *xlrec = (xl_heap_visible *) rec;
appendStringInfo(buf, "rel %u/%u/%u; blk %u", appendStringInfo(buf, "cutoff xid %u", xlrec->cutoff_xid);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block);
} }
else if (info == XLOG_HEAP2_MULTI_INSERT) else if (info == XLOG_HEAP2_MULTI_INSERT)
{ {
xl_heap_multi_insert *xlrec = (xl_heap_multi_insert *) rec; xl_heap_multi_insert *xlrec = (xl_heap_multi_insert *) rec;
appendStringInfo(buf, "rel %u/%u/%u; blk %u; %d tuples", appendStringInfo(buf, "%d tuples", xlrec->ntuples);
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
xlrec->blkno, xlrec->ntuples);
} }
else if (info == XLOG_HEAP2_LOCK_UPDATED) else if (info == XLOG_HEAP2_LOCK_UPDATED)
{ {
@ -154,13 +135,18 @@ heap2_desc(StringInfo buf, XLogRecord *record)
appendStringInfo(buf, "xmax %u msk %04x; ", xlrec->xmax, appendStringInfo(buf, "xmax %u msk %04x; ", xlrec->xmax,
xlrec->infobits_set); xlrec->infobits_set);
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "off %u", xlrec->offnum);
} }
else if (info == XLOG_HEAP2_NEW_CID) else if (info == XLOG_HEAP2_NEW_CID)
{ {
xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec; xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec;
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
xlrec->target_node.spcNode,
xlrec->target_node.dbNode,
xlrec->target_node.relNode,
ItemPointerGetBlockNumber(&(xlrec->target_tid)),
ItemPointerGetOffsetNumber(&(xlrec->target_tid)));
appendStringInfo(buf, "; cmin: %u, cmax: %u, combo: %u", appendStringInfo(buf, "; cmin: %u, cmax: %u, combo: %u",
xlrec->cmin, xlrec->cmax, xlrec->combocid); xlrec->cmin, xlrec->cmax, xlrec->combocid);
} }

View File

@ -47,10 +47,10 @@ out_member(StringInfo buf, MultiXactMember *member)
} }
void void
multixact_desc(StringInfo buf, XLogRecord *record) multixact_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE || if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE ||
info == XLOG_MULTIXACT_ZERO_MEM_PAGE) info == XLOG_MULTIXACT_ZERO_MEM_PAGE)

View File

@ -16,20 +16,11 @@
#include "access/nbtree.h" #include "access/nbtree.h"
static void
out_target(StringInfo buf, xl_btreetid *target)
{
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
target->node.spcNode, target->node.dbNode, target->node.relNode,
ItemPointerGetBlockNumber(&(target->tid)),
ItemPointerGetOffsetNumber(&(target->tid)));
}
void void
btree_desc(StringInfo buf, XLogRecord *record) btree_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info) switch (info)
{ {
@ -39,7 +30,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
{ {
xl_btree_insert *xlrec = (xl_btree_insert *) rec; xl_btree_insert *xlrec = (xl_btree_insert *) rec;
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "off %u", xlrec->offnum);
break; break;
} }
case XLOG_BTREE_SPLIT_L: case XLOG_BTREE_SPLIT_L:
@ -49,11 +40,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
{ {
xl_btree_split *xlrec = (xl_btree_split *) rec; xl_btree_split *xlrec = (xl_btree_split *) rec;
appendStringInfo(buf, "rel %u/%u/%u ", appendStringInfo(buf, "level %u, firstright %d",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode);
appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
xlrec->level, xlrec->firstright); xlrec->level, xlrec->firstright);
break; break;
} }
@ -61,9 +48,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
{ {
xl_btree_vacuum *xlrec = (xl_btree_vacuum *) rec; xl_btree_vacuum *xlrec = (xl_btree_vacuum *) rec;
appendStringInfo(buf, "rel %u/%u/%u; blk %u, lastBlockVacuumed %u", appendStringInfo(buf, "lastBlockVacuumed %u",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block,
xlrec->lastBlockVacuumed); xlrec->lastBlockVacuumed);
break; break;
} }
@ -71,18 +56,14 @@ btree_desc(StringInfo buf, XLogRecord *record)
{ {
xl_btree_delete *xlrec = (xl_btree_delete *) rec; xl_btree_delete *xlrec = (xl_btree_delete *) rec;
appendStringInfo(buf, "index %u/%u/%u; iblk %u, heap %u/%u/%u;", appendStringInfo(buf, "%d items", xlrec->nitems);
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
xlrec->block,
xlrec->hnode.spcNode, xlrec->hnode.dbNode, xlrec->hnode.relNode);
break; break;
} }
case XLOG_BTREE_MARK_PAGE_HALFDEAD: case XLOG_BTREE_MARK_PAGE_HALFDEAD:
{ {
xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) rec; xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) rec;
out_target(buf, &(xlrec->target)); appendStringInfo(buf, "topparent %u; leaf %u; left %u; right %u",
appendStringInfo(buf, "; topparent %u; leaf %u; left %u; right %u",
xlrec->topparent, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk); xlrec->topparent, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk);
break; break;
} }
@ -91,22 +72,19 @@ btree_desc(StringInfo buf, XLogRecord *record)
{ {
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec; xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec;
appendStringInfo(buf, "rel %u/%u/%u; ", appendStringInfo(buf, "left %u; right %u; btpo_xact %u; ",
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode); xlrec->leftsib, xlrec->rightsib,
appendStringInfo(buf, "dead %u; left %u; right %u; btpo_xact %u; ", xlrec->btpo_xact);
xlrec->deadblk, xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact); appendStringInfo(buf, "leafleft %u; leafright %u; topparent %u",
appendStringInfo(buf, "leaf %u; leafleft %u; leafright %u; topparent %u", xlrec->leafleftsib, xlrec->leafrightsib,
xlrec->leafblk, xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent); xlrec->topparent);
break; break;
} }
case XLOG_BTREE_NEWROOT: case XLOG_BTREE_NEWROOT:
{ {
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec; xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
appendStringInfo(buf, "rel %u/%u/%u; root %u lev %u", appendStringInfo(buf, "lev %u", xlrec->level);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode,
xlrec->rootblk, xlrec->level);
break; break;
} }
case XLOG_BTREE_REUSE_PAGE: case XLOG_BTREE_REUSE_PAGE:
@ -115,7 +93,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u", appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u",
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->latestRemovedXid); xlrec->node.relNode, xlrec->latestRemovedXid);
break; break;
} }
} }

View File

@ -17,10 +17,10 @@
#include "utils/relmapper.h" #include "utils/relmapper.h"
void void
relmap_desc(StringInfo buf, XLogRecord *record) relmap_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_RELMAP_UPDATE) if (info == XLOG_RELMAP_UPDATE)
{ {

View File

@ -18,10 +18,10 @@
void void
seq_desc(StringInfo buf, XLogRecord *record) seq_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
xl_seq_rec *xlrec = (xl_seq_rec *) rec; xl_seq_rec *xlrec = (xl_seq_rec *) rec;
if (info == XLOG_SEQ_LOG) if (info == XLOG_SEQ_LOG)

View File

@ -19,10 +19,10 @@
void void
smgr_desc(StringInfo buf, XLogRecord *record) smgr_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_SMGR_CREATE) if (info == XLOG_SMGR_CREATE)
{ {

View File

@ -16,70 +16,66 @@
#include "access/spgist_private.h" #include "access/spgist_private.h"
static void
out_target(StringInfo buf, RelFileNode node)
{
appendStringInfo(buf, "rel %u/%u/%u ",
node.spcNode, node.dbNode, node.relNode);
}
void void
spg_desc(StringInfo buf, XLogRecord *record) spg_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info) switch (info)
{ {
case XLOG_SPGIST_CREATE_INDEX: case XLOG_SPGIST_CREATE_INDEX:
appendStringInfo(buf, "rel %u/%u/%u",
((RelFileNode *) rec)->spcNode,
((RelFileNode *) rec)->dbNode,
((RelFileNode *) rec)->relNode);
break; break;
case XLOG_SPGIST_ADD_LEAF: case XLOG_SPGIST_ADD_LEAF:
out_target(buf, ((spgxlogAddLeaf *) rec)->node); {
appendStringInfo(buf, "%u", spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
((spgxlogAddLeaf *) rec)->blknoLeaf);
appendStringInfo(buf, "add leaf to page");
appendStringInfo(buf, "; off %u; headoff %u; parentoff %u",
xlrec->offnumLeaf, xlrec->offnumHeadLeaf,
xlrec->offnumParent);
if (xlrec->newPage)
appendStringInfo(buf, " (newpage)");
if (xlrec->storesNulls)
appendStringInfo(buf, " (nulls)");
}
break; break;
case XLOG_SPGIST_MOVE_LEAFS: case XLOG_SPGIST_MOVE_LEAFS:
out_target(buf, ((spgxlogMoveLeafs *) rec)->node); appendStringInfo(buf, "%u leafs",
appendStringInfo(buf, "%u leafs from page %u to page %u", ((spgxlogMoveLeafs *) rec)->nMoves);
((spgxlogMoveLeafs *) rec)->nMoves,
((spgxlogMoveLeafs *) rec)->blknoSrc,
((spgxlogMoveLeafs *) rec)->blknoDst);
break; break;
case XLOG_SPGIST_ADD_NODE: case XLOG_SPGIST_ADD_NODE:
out_target(buf, ((spgxlogAddNode *) rec)->node); appendStringInfo(buf, "off %u",
appendStringInfo(buf, "%u:%u",
((spgxlogAddNode *) rec)->blkno,
((spgxlogAddNode *) rec)->offnum); ((spgxlogAddNode *) rec)->offnum);
break; break;
case XLOG_SPGIST_SPLIT_TUPLE: case XLOG_SPGIST_SPLIT_TUPLE:
out_target(buf, ((spgxlogSplitTuple *) rec)->node); appendStringInfo(buf, "prefix off: %u, postfix off: %u (same %d, new %d)",
appendStringInfo(buf, "%u:%u to %u:%u",
((spgxlogSplitTuple *) rec)->blknoPrefix,
((spgxlogSplitTuple *) rec)->offnumPrefix, ((spgxlogSplitTuple *) rec)->offnumPrefix,
((spgxlogSplitTuple *) rec)->blknoPostfix, ((spgxlogSplitTuple *) rec)->offnumPostfix,
((spgxlogSplitTuple *) rec)->offnumPostfix); ((spgxlogSplitTuple *) rec)->postfixBlkSame,
((spgxlogSplitTuple *) rec)->newPage
);
break; break;
case XLOG_SPGIST_PICKSPLIT: case XLOG_SPGIST_PICKSPLIT:
out_target(buf, ((spgxlogPickSplit *) rec)->node); {
spgxlogPickSplit *xlrec = (spgxlogPickSplit *) rec;
appendStringInfo(buf, "ndel %u; nins %u",
xlrec->nDelete, xlrec->nInsert);
if (xlrec->innerIsParent)
appendStringInfo(buf, " (innerIsParent)");
if (xlrec->isRootSplit)
appendStringInfo(buf, " (isRootSplit)");
}
break; break;
case XLOG_SPGIST_VACUUM_LEAF: case XLOG_SPGIST_VACUUM_LEAF:
out_target(buf, ((spgxlogVacuumLeaf *) rec)->node); /* no further information */
appendStringInfo(buf, "page %u",
((spgxlogVacuumLeaf *) rec)->blkno);
break; break;
case XLOG_SPGIST_VACUUM_ROOT: case XLOG_SPGIST_VACUUM_ROOT:
out_target(buf, ((spgxlogVacuumRoot *) rec)->node); /* no further information */
appendStringInfo(buf, "page %u",
((spgxlogVacuumRoot *) rec)->blkno);
break; break;
case XLOG_SPGIST_VACUUM_REDIRECT: case XLOG_SPGIST_VACUUM_REDIRECT:
out_target(buf, ((spgxlogVacuumRedirect *) rec)->node); appendStringInfo(buf, "newest XID %u",
appendStringInfo(buf, "page %u, newest XID %u",
((spgxlogVacuumRedirect *) rec)->blkno,
((spgxlogVacuumRedirect *) rec)->newestRedirectXid); ((spgxlogVacuumRedirect *) rec)->newestRedirectXid);
break; break;
} }

View File

@ -37,10 +37,10 @@ standby_desc_running_xacts(StringInfo buf, xl_running_xacts *xlrec)
} }
void void
standby_desc(StringInfo buf, XLogRecord *record) standby_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_STANDBY_LOCK) if (info == XLOG_STANDBY_LOCK)
{ {

View File

@ -18,10 +18,10 @@
void void
tblspc_desc(StringInfo buf, XLogRecord *record) tblspc_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_TBLSPC_CREATE) if (info == XLOG_TBLSPC_CREATE)
{ {

View File

@ -137,10 +137,10 @@ xact_desc_assignment(StringInfo buf, xl_xact_assignment *xlrec)
} }
void void
xact_desc(StringInfo buf, XLogRecord *record) xact_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_XACT_COMMIT_COMPACT) if (info == XLOG_XACT_COMMIT_COMPACT)
{ {

View File

@ -32,10 +32,10 @@ const struct config_enum_entry wal_level_options[] = {
}; };
void void
xlog_desc(StringInfo buf, XLogRecord *record) xlog_desc(StringInfo buf, XLogReaderState *record)
{ {
char *rec = XLogRecGetData(record); char *rec = XLogRecGetData(record);
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_CHECKPOINT_SHUTDOWN || if (info == XLOG_CHECKPOINT_SHUTDOWN ||
info == XLOG_CHECKPOINT_ONLINE) info == XLOG_CHECKPOINT_ONLINE)
@ -76,11 +76,7 @@ xlog_desc(StringInfo buf, XLogRecord *record)
} }
else if (info == XLOG_FPI) else if (info == XLOG_FPI)
{ {
BkpBlock *bkp = (BkpBlock *) rec; /* no further information to print */
appendStringInfo(buf, "%s block %u",
relpathperm(bkp->node, bkp->fork),
bkp->block);
} }
else if (info == XLOG_BACKUP_END) else if (info == XLOG_BACKUP_END)
{ {

View File

@ -16,8 +16,8 @@
#include "postgres.h" #include "postgres.h"
#include "access/genam.h" #include "access/genam.h"
#include "access/xloginsert.h"
#include "access/spgist_private.h" #include "access/spgist_private.h"
#include "access/xloginsert.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "utils/rel.h" #include "utils/rel.h"
@ -202,25 +202,17 @@ static void
addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple, addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew) SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
{ {
XLogRecData rdata[4];
spgxlogAddLeaf xlrec; spgxlogAddLeaf xlrec;
xlrec.node = index->rd_node;
xlrec.blknoLeaf = current->blkno;
xlrec.newPage = isNew; xlrec.newPage = isNew;
xlrec.storesNulls = isNulls; xlrec.storesNulls = isNulls;
/* these will be filled below as needed */ /* these will be filled below as needed */
xlrec.offnumLeaf = InvalidOffsetNumber; xlrec.offnumLeaf = InvalidOffsetNumber;
xlrec.offnumHeadLeaf = InvalidOffsetNumber; xlrec.offnumHeadLeaf = InvalidOffsetNumber;
xlrec.blknoParent = InvalidBlockNumber;
xlrec.offnumParent = InvalidOffsetNumber; xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0; xlrec.nodeI = 0;
ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
ACCEPT_RDATA_DATA(leafTuple, leafTuple->size, 1);
ACCEPT_RDATA_BUFFER(current->buffer, 2);
START_CRIT_SECTION(); START_CRIT_SECTION();
if (current->offnum == InvalidOffsetNumber || if (current->offnum == InvalidOffsetNumber ||
@ -237,13 +229,10 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
/* Must update parent's downlink if any */ /* Must update parent's downlink if any */
if (parent->buffer != InvalidBuffer) if (parent->buffer != InvalidBuffer)
{ {
xlrec.blknoParent = parent->blkno;
xlrec.offnumParent = parent->offnum; xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node; xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum); saveNodeLink(index, parent, current->blkno, current->offnum);
ACCEPT_RDATA_BUFFER(parent->buffer, 3);
} }
} }
else else
@ -303,12 +292,20 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF, rdata); XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
XLogRegisterData((char *) leafTuple, leafTuple->size);
XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
if (xlrec.offnumParent != InvalidOffsetNumber)
XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF);
PageSetLSN(current->page, recptr); PageSetLSN(current->page, recptr);
/* update parent only if we actually changed it */ /* update parent only if we actually changed it */
if (xlrec.blknoParent != InvalidBlockNumber) if (xlrec.offnumParent != InvalidOffsetNumber)
{ {
PageSetLSN(parent->page, recptr); PageSetLSN(parent->page, recptr);
} }
@ -399,7 +396,6 @@ moveLeafs(Relation index, SpGistState *state,
OffsetNumber *toDelete; OffsetNumber *toDelete;
OffsetNumber *toInsert; OffsetNumber *toInsert;
BlockNumber nblkno; BlockNumber nblkno;
XLogRecData rdata[7];
spgxlogMoveLeafs xlrec; spgxlogMoveLeafs xlrec;
char *leafdata, char *leafdata,
*leafptr; *leafptr;
@ -455,20 +451,6 @@ moveLeafs(Relation index, SpGistState *state,
nblkno = BufferGetBlockNumber(nbuf); nblkno = BufferGetBlockNumber(nbuf);
Assert(nblkno != current->blkno); Assert(nblkno != current->blkno);
/* prepare WAL info */
xlrec.node = index->rd_node;
STORE_STATE(state, xlrec.stateSrc);
xlrec.blknoSrc = current->blkno;
xlrec.blknoDst = nblkno;
xlrec.nMoves = nDelete;
xlrec.replaceDead = replaceDead;
xlrec.storesNulls = isNulls;
xlrec.blknoParent = parent->blkno;
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
leafdata = leafptr = palloc(size); leafdata = leafptr = palloc(size);
START_CRIT_SECTION(); START_CRIT_SECTION();
@ -533,15 +515,29 @@ moveLeafs(Relation index, SpGistState *state,
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogMoveLeafs, 0); /* prepare WAL info */
ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * nDelete, 1); STORE_STATE(state, xlrec.stateSrc);
ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nInsert, 2);
ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, 3);
ACCEPT_RDATA_BUFFER(current->buffer, 4);
ACCEPT_RDATA_BUFFER(nbuf, 5);
ACCEPT_RDATA_BUFFER(parent->buffer, 6);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS, rdata); xlrec.nMoves = nDelete;
xlrec.replaceDead = replaceDead;
xlrec.storesNulls = isNulls;
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfSpgxlogMoveLeafs);
XLogRegisterData((char *) toDelete,
sizeof(OffsetNumber) * nDelete);
XLogRegisterData((char *) toInsert,
sizeof(OffsetNumber) * nInsert);
XLogRegisterData((char *) leafdata, leafptr - leafdata);
XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
XLogRegisterBuffer(1, nbuf, REGBUF_STANDARD | (xlrec.newPage ? REGBUF_WILL_INIT : 0));
XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS);
PageSetLSN(current->page, recptr); PageSetLSN(current->page, recptr);
PageSetLSN(npage, recptr); PageSetLSN(npage, recptr);
@ -701,8 +697,6 @@ doPickSplit(Relation index, SpGistState *state,
int currentFreeSpace; int currentFreeSpace;
int totalLeafSizes; int totalLeafSizes;
bool allTheSame; bool allTheSame;
XLogRecData rdata[10];
int nRdata;
spgxlogPickSplit xlrec; spgxlogPickSplit xlrec;
char *leafdata, char *leafdata,
*leafptr; *leafptr;
@ -725,7 +719,6 @@ doPickSplit(Relation index, SpGistState *state,
newLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n); newLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n);
leafPageSelect = (uint8 *) palloc(sizeof(uint8) * n); leafPageSelect = (uint8 *) palloc(sizeof(uint8) * n);
xlrec.node = index->rd_node;
STORE_STATE(state, xlrec.stateSrc); STORE_STATE(state, xlrec.stateSrc);
/* /*
@ -971,10 +964,6 @@ doPickSplit(Relation index, SpGistState *state,
} }
/* /*
* Because a WAL record can't involve more than four buffers, we can only
* afford to deal with two leaf pages in each picksplit action, ie the
* current page and at most one other.
*
* The new leaf tuples converted from the existing ones should require the * The new leaf tuples converted from the existing ones should require the
* same or less space, and therefore should all fit onto one page * same or less space, and therefore should all fit onto one page
* (although that's not necessarily the current page, since we can't * (although that's not necessarily the current page, since we can't
@ -1108,17 +1097,13 @@ doPickSplit(Relation index, SpGistState *state,
} }
/* Start preparing WAL record */ /* Start preparing WAL record */
xlrec.blknoSrc = current->blkno;
xlrec.blknoDest = InvalidBlockNumber;
xlrec.nDelete = 0; xlrec.nDelete = 0;
xlrec.initSrc = isNew; xlrec.initSrc = isNew;
xlrec.storesNulls = isNulls; xlrec.storesNulls = isNulls;
xlrec.isRootSplit = SpGistBlockIsRoot(current->blkno);
leafdata = leafptr = (char *) palloc(totalLeafSizes); leafdata = leafptr = (char *) palloc(totalLeafSizes);
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogPickSplit, 0);
nRdata = 1;
/* Here we begin making the changes to the target pages */ /* Here we begin making the changes to the target pages */
START_CRIT_SECTION(); START_CRIT_SECTION();
@ -1150,12 +1135,6 @@ doPickSplit(Relation index, SpGistState *state,
else else
{ {
xlrec.nDelete = nToDelete; xlrec.nDelete = nToDelete;
ACCEPT_RDATA_DATA(toDelete,
sizeof(OffsetNumber) * nToDelete,
nRdata);
nRdata++;
ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
nRdata++;
if (!state->isBuild) if (!state->isBuild)
{ {
@ -1240,25 +1219,8 @@ doPickSplit(Relation index, SpGistState *state,
if (newLeafBuffer != InvalidBuffer) if (newLeafBuffer != InvalidBuffer)
{ {
MarkBufferDirty(newLeafBuffer); MarkBufferDirty(newLeafBuffer);
/* also save block number for WAL */
xlrec.blknoDest = BufferGetBlockNumber(newLeafBuffer);
if (!xlrec.initDest)
{
ACCEPT_RDATA_BUFFER(newLeafBuffer, nRdata);
nRdata++;
}
} }
xlrec.nInsert = nToInsert;
ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nToInsert, nRdata);
nRdata++;
ACCEPT_RDATA_DATA(leafPageSelect, sizeof(uint8) * nToInsert, nRdata);
nRdata++;
ACCEPT_RDATA_DATA(innerTuple, innerTuple->size, nRdata);
nRdata++;
ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, nRdata);
nRdata++;
/* Remember current buffer, since we're about to change "current" */ /* Remember current buffer, since we're about to change "current" */
saveCurrent = *current; saveCurrent = *current;
@ -1276,7 +1238,6 @@ doPickSplit(Relation index, SpGistState *state,
current->blkno = parent->blkno; current->blkno = parent->blkno;
current->buffer = parent->buffer; current->buffer = parent->buffer;
current->page = parent->page; current->page = parent->page;
xlrec.blknoInner = current->blkno;
xlrec.offnumInner = current->offnum = xlrec.offnumInner = current->offnum =
SpGistPageAddNewItem(state, current->page, SpGistPageAddNewItem(state, current->page,
(Item) innerTuple, innerTuple->size, (Item) innerTuple, innerTuple->size,
@ -1285,14 +1246,11 @@ doPickSplit(Relation index, SpGistState *state,
/* /*
* Update parent node link and mark parent page dirty * Update parent node link and mark parent page dirty
*/ */
xlrec.blknoParent = parent->blkno; xlrec.innerIsParent = true;
xlrec.offnumParent = parent->offnum; xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node; xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum); saveNodeLink(index, parent, current->blkno, current->offnum);
ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
nRdata++;
/* /*
* Update redirection link (in old current buffer) * Update redirection link (in old current buffer)
*/ */
@ -1314,7 +1272,6 @@ doPickSplit(Relation index, SpGistState *state,
current->buffer = newInnerBuffer; current->buffer = newInnerBuffer;
current->blkno = BufferGetBlockNumber(current->buffer); current->blkno = BufferGetBlockNumber(current->buffer);
current->page = BufferGetPage(current->buffer); current->page = BufferGetPage(current->buffer);
xlrec.blknoInner = current->blkno;
xlrec.offnumInner = current->offnum = xlrec.offnumInner = current->offnum =
SpGistPageAddNewItem(state, current->page, SpGistPageAddNewItem(state, current->page,
(Item) innerTuple, innerTuple->size, (Item) innerTuple, innerTuple->size,
@ -1326,16 +1283,11 @@ doPickSplit(Relation index, SpGistState *state,
/* /*
* Update parent node link and mark parent page dirty * Update parent node link and mark parent page dirty
*/ */
xlrec.blknoParent = parent->blkno; xlrec.innerIsParent = (parent->buffer == current->buffer);
xlrec.offnumParent = parent->offnum; xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node; xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum); saveNodeLink(index, parent, current->blkno, current->offnum);
ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
nRdata++;
ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
nRdata++;
/* /*
* Update redirection link (in old current buffer) * Update redirection link (in old current buffer)
*/ */
@ -1357,8 +1309,8 @@ doPickSplit(Relation index, SpGistState *state,
SpGistInitBuffer(current->buffer, (isNulls ? SPGIST_NULLS : 0)); SpGistInitBuffer(current->buffer, (isNulls ? SPGIST_NULLS : 0));
xlrec.initInner = true; xlrec.initInner = true;
xlrec.innerIsParent = false;
xlrec.blknoInner = current->blkno;
xlrec.offnumInner = current->offnum = xlrec.offnumInner = current->offnum =
PageAddItem(current->page, (Item) innerTuple, innerTuple->size, PageAddItem(current->page, (Item) innerTuple, innerTuple->size,
InvalidOffsetNumber, false, false); InvalidOffsetNumber, false, false);
@ -1367,7 +1319,6 @@ doPickSplit(Relation index, SpGistState *state,
innerTuple->size); innerTuple->size);
/* No parent link to update, nor redirection to do */ /* No parent link to update, nor redirection to do */
xlrec.blknoParent = InvalidBlockNumber;
xlrec.offnumParent = InvalidOffsetNumber; xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0; xlrec.nodeI = 0;
@ -1381,9 +1332,46 @@ doPickSplit(Relation index, SpGistState *state,
if (RelationNeedsWAL(index)) if (RelationNeedsWAL(index))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
int flags;
XLogBeginInsert();
xlrec.nInsert = nToInsert;
XLogRegisterData((char *) &xlrec, SizeOfSpgxlogPickSplit);
XLogRegisterData((char *) toDelete,
sizeof(OffsetNumber) * xlrec.nDelete);
XLogRegisterData((char *) toInsert,
sizeof(OffsetNumber) * xlrec.nInsert);
XLogRegisterData((char *) leafPageSelect,
sizeof(uint8) * xlrec.nInsert);
XLogRegisterData((char *) innerTuple, innerTuple->size);
XLogRegisterData(leafdata, leafptr - leafdata);
flags = REGBUF_STANDARD;
if (xlrec.initSrc)
flags |= REGBUF_WILL_INIT;
if (BufferIsValid(saveCurrent.buffer))
XLogRegisterBuffer(0, saveCurrent.buffer, flags);
if (BufferIsValid(newLeafBuffer))
{
flags = REGBUF_STANDARD;
if (xlrec.initDest)
flags |= REGBUF_WILL_INIT;
XLogRegisterBuffer(1, newLeafBuffer, flags);
}
XLogRegisterBuffer(2, current->buffer, REGBUF_STANDARD);
if (parent->buffer != InvalidBuffer)
{
if (parent->buffer != current->buffer)
XLogRegisterBuffer(3, parent->buffer, REGBUF_STANDARD);
else
Assert(xlrec.innerIsParent);
}
/* Issue the WAL record */ /* Issue the WAL record */
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT, rdata); recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT);
/* Update page LSNs on all affected pages */ /* Update page LSNs on all affected pages */
if (newLeafBuffer != InvalidBuffer) if (newLeafBuffer != InvalidBuffer)
@ -1489,7 +1477,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
int nodeN, Datum nodeLabel) int nodeN, Datum nodeLabel)
{ {
SpGistInnerTuple newInnerTuple; SpGistInnerTuple newInnerTuple;
XLogRecData rdata[5];
spgxlogAddNode xlrec; spgxlogAddNode xlrec;
/* Should not be applied to nulls */ /* Should not be applied to nulls */
@ -1499,25 +1486,18 @@ spgAddNodeAction(Relation index, SpGistState *state,
newInnerTuple = addNode(state, innerTuple, nodeLabel, nodeN); newInnerTuple = addNode(state, innerTuple, nodeLabel, nodeN);
/* Prepare WAL record */ /* Prepare WAL record */
xlrec.node = index->rd_node;
STORE_STATE(state, xlrec.stateSrc); STORE_STATE(state, xlrec.stateSrc);
xlrec.blkno = current->blkno;
xlrec.offnum = current->offnum; xlrec.offnum = current->offnum;
/* we don't fill these unless we need to change the parent downlink */ /* we don't fill these unless we need to change the parent downlink */
xlrec.blknoParent = InvalidBlockNumber; xlrec.parentBlk = -1;
xlrec.offnumParent = InvalidOffsetNumber; xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0; xlrec.nodeI = 0;
/* we don't fill these unless tuple has to be moved */ /* we don't fill these unless tuple has to be moved */
xlrec.blknoNew = InvalidBlockNumber;
xlrec.offnumNew = InvalidOffsetNumber; xlrec.offnumNew = InvalidOffsetNumber;
xlrec.newPage = false; xlrec.newPage = false;
ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
ACCEPT_RDATA_DATA(newInnerTuple, newInnerTuple->size, 1);
ACCEPT_RDATA_BUFFER(current->buffer, 2);
if (PageGetExactFreeSpace(current->page) >= if (PageGetExactFreeSpace(current->page) >=
newInnerTuple->size - innerTuple->size) newInnerTuple->size - innerTuple->size)
{ {
@ -1539,7 +1519,13 @@ spgAddNodeAction(Relation index, SpGistState *state,
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata); XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
PageSetLSN(current->page, recptr); PageSetLSN(current->page, recptr);
} }
@ -1565,7 +1551,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
saveCurrent = *current; saveCurrent = *current;
xlrec.blknoParent = parent->blkno;
xlrec.offnumParent = parent->offnum; xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node; xlrec.nodeI = parent->node;
@ -1580,8 +1565,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
current->blkno = BufferGetBlockNumber(current->buffer); current->blkno = BufferGetBlockNumber(current->buffer);
current->page = BufferGetPage(current->buffer); current->page = BufferGetPage(current->buffer);
xlrec.blknoNew = current->blkno;
/* /*
* Let's just make real sure new current isn't same as old. Right now * Let's just make real sure new current isn't same as old. Right now
* that's impossible, but if SpGistGetBuffer ever got smart enough to * that's impossible, but if SpGistGetBuffer ever got smart enough to
@ -1590,17 +1573,19 @@ spgAddNodeAction(Relation index, SpGistState *state,
* replay would be subtly wrong, so I think a mere assert isn't enough * replay would be subtly wrong, so I think a mere assert isn't enough
* here. * here.
*/ */
if (xlrec.blknoNew == xlrec.blkno) if (current->blkno == saveCurrent.blkno)
elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer"); elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
/* /*
* New current and parent buffer will both be modified; but note that * New current and parent buffer will both be modified; but note that
* parent buffer could be same as either new or old current. * parent buffer could be same as either new or old current.
*/ */
ACCEPT_RDATA_BUFFER(current->buffer, 3); if (parent->buffer == saveCurrent.buffer)
if (parent->buffer != current->buffer && xlrec.parentBlk = 0;
parent->buffer != saveCurrent.buffer) else if (parent->buffer == current->buffer)
ACCEPT_RDATA_BUFFER(parent->buffer, 4); xlrec.parentBlk = 1;
else
xlrec.parentBlk = 2;
START_CRIT_SECTION(); START_CRIT_SECTION();
@ -1647,7 +1632,20 @@ spgAddNodeAction(Relation index, SpGistState *state,
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata); XLogBeginInsert();
/* orig page */
XLogRegisterBuffer(0, saveCurrent.buffer, REGBUF_STANDARD);
/* new page */
XLogRegisterBuffer(1, current->buffer, REGBUF_STANDARD);
/* parent page (if different from orig and new) */
if (xlrec.parentBlk == 2)
XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
/* we don't bother to check if any of these are redundant */ /* we don't bother to check if any of these are redundant */
PageSetLSN(current->page, recptr); PageSetLSN(current->page, recptr);
@ -1682,7 +1680,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
BlockNumber postfixBlkno; BlockNumber postfixBlkno;
OffsetNumber postfixOffset; OffsetNumber postfixOffset;
int i; int i;
XLogRecData rdata[5];
spgxlogSplitTuple xlrec; spgxlogSplitTuple xlrec;
Buffer newBuffer = InvalidBuffer; Buffer newBuffer = InvalidBuffer;
@ -1725,14 +1722,8 @@ spgSplitNodeAction(Relation index, SpGistState *state,
postfixTuple->allTheSame = innerTuple->allTheSame; postfixTuple->allTheSame = innerTuple->allTheSame;
/* prep data for WAL record */ /* prep data for WAL record */
xlrec.node = index->rd_node;
xlrec.newPage = false; xlrec.newPage = false;
ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
ACCEPT_RDATA_DATA(prefixTuple, prefixTuple->size, 1);
ACCEPT_RDATA_DATA(postfixTuple, postfixTuple->size, 2);
ACCEPT_RDATA_BUFFER(current->buffer, 3);
/* /*
* If we can't fit both tuples on the current page, get a new page for the * If we can't fit both tuples on the current page, get a new page for the
* postfix tuple. In particular, can't split to the root page. * postfix tuple. In particular, can't split to the root page.
@ -1752,7 +1743,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
GBUF_INNER_PARITY(current->blkno + 1), GBUF_INNER_PARITY(current->blkno + 1),
postfixTuple->size + sizeof(ItemIdData), postfixTuple->size + sizeof(ItemIdData),
&xlrec.newPage); &xlrec.newPage);
ACCEPT_RDATA_BUFFER(newBuffer, 4);
} }
START_CRIT_SECTION(); START_CRIT_SECTION();
@ -1767,27 +1757,28 @@ spgSplitNodeAction(Relation index, SpGistState *state,
if (xlrec.offnumPrefix != current->offnum) if (xlrec.offnumPrefix != current->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page", elog(ERROR, "failed to add item of size %u to SPGiST index page",
prefixTuple->size); prefixTuple->size);
xlrec.blknoPrefix = current->blkno;
/* /*
* put postfix tuple into appropriate page * put postfix tuple into appropriate page
*/ */
if (newBuffer == InvalidBuffer) if (newBuffer == InvalidBuffer)
{ {
xlrec.blknoPostfix = postfixBlkno = current->blkno; postfixBlkno = current->blkno;
xlrec.offnumPostfix = postfixOffset = xlrec.offnumPostfix = postfixOffset =
SpGistPageAddNewItem(state, current->page, SpGistPageAddNewItem(state, current->page,
(Item) postfixTuple, postfixTuple->size, (Item) postfixTuple, postfixTuple->size,
NULL, false); NULL, false);
xlrec.postfixBlkSame = true;
} }
else else
{ {
xlrec.blknoPostfix = postfixBlkno = BufferGetBlockNumber(newBuffer); postfixBlkno = BufferGetBlockNumber(newBuffer);
xlrec.offnumPostfix = postfixOffset = xlrec.offnumPostfix = postfixOffset =
SpGistPageAddNewItem(state, BufferGetPage(newBuffer), SpGistPageAddNewItem(state, BufferGetPage(newBuffer),
(Item) postfixTuple, postfixTuple->size, (Item) postfixTuple, postfixTuple->size,
NULL, false); NULL, false);
MarkBufferDirty(newBuffer); MarkBufferDirty(newBuffer);
xlrec.postfixBlkSame = false;
} }
/* /*
@ -1808,7 +1799,23 @@ spgSplitNodeAction(Relation index, SpGistState *state,
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE, rdata); XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
XLogRegisterData((char *) prefixTuple, prefixTuple->size);
XLogRegisterData((char *) postfixTuple, postfixTuple->size);
XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
if (newBuffer != InvalidBuffer)
{
int flags;
flags = REGBUF_STANDARD;
if (xlrec.newPage)
flags |= REGBUF_WILL_INIT;
XLogRegisterBuffer(1, newBuffer, flags);
}
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE);
PageSetLSN(current->page, recptr); PageSetLSN(current->page, recptr);

View File

@ -105,15 +105,18 @@ spgbuild(PG_FUNCTION_ARGS)
if (RelationNeedsWAL(index)) if (RelationNeedsWAL(index))
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata;
/* WAL data is just the relfilenode */ XLogBeginInsert();
rdata.data = (char *) &(index->rd_node);
rdata.len = sizeof(RelFileNode);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, &rdata); /*
* Replay will re-initialize the pages, so don't take full pages
* images. No other data to log.
*/
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
PageSetLSN(BufferGetPage(metabuffer), recptr); PageSetLSN(BufferGetPage(metabuffer), recptr);
PageSetLSN(BufferGetPage(rootbuffer), recptr); PageSetLSN(BufferGetPage(rootbuffer), recptr);

View File

@ -127,7 +127,6 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
{ {
Page page = BufferGetPage(buffer); Page page = BufferGetPage(buffer);
spgxlogVacuumLeaf xlrec; spgxlogVacuumLeaf xlrec;
XLogRecData rdata[8];
OffsetNumber toDead[MaxIndexTuplesPerPage]; OffsetNumber toDead[MaxIndexTuplesPerPage];
OffsetNumber toPlaceholder[MaxIndexTuplesPerPage]; OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
OffsetNumber moveSrc[MaxIndexTuplesPerPage]; OffsetNumber moveSrc[MaxIndexTuplesPerPage];
@ -323,20 +322,6 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove) if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
elog(ERROR, "inconsistent counts of deletable tuples"); elog(ERROR, "inconsistent counts of deletable tuples");
/* Prepare WAL record */
xlrec.node = index->rd_node;
xlrec.blkno = BufferGetBlockNumber(buffer);
STORE_STATE(&bds->spgstate, xlrec.stateSrc);
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumLeaf, 0);
ACCEPT_RDATA_DATA(toDead, sizeof(OffsetNumber) * xlrec.nDead, 1);
ACCEPT_RDATA_DATA(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder, 2);
ACCEPT_RDATA_DATA(moveSrc, sizeof(OffsetNumber) * xlrec.nMove, 3);
ACCEPT_RDATA_DATA(moveDest, sizeof(OffsetNumber) * xlrec.nMove, 4);
ACCEPT_RDATA_DATA(chainSrc, sizeof(OffsetNumber) * xlrec.nChain, 5);
ACCEPT_RDATA_DATA(chainDest, sizeof(OffsetNumber) * xlrec.nChain, 6);
ACCEPT_RDATA_BUFFER(buffer, 7);
/* Do the updates */ /* Do the updates */
START_CRIT_SECTION(); START_CRIT_SECTION();
@ -389,7 +374,22 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, rdata); XLogBeginInsert();
STORE_STATE(&bds->spgstate, xlrec.stateSrc);
XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
/* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -407,12 +407,10 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
{ {
Page page = BufferGetPage(buffer); Page page = BufferGetPage(buffer);
spgxlogVacuumRoot xlrec; spgxlogVacuumRoot xlrec;
XLogRecData rdata[3];
OffsetNumber toDelete[MaxIndexTuplesPerPage]; OffsetNumber toDelete[MaxIndexTuplesPerPage];
OffsetNumber i, OffsetNumber i,
max = PageGetMaxOffsetNumber(page); max = PageGetMaxOffsetNumber(page);
xlrec.blkno = BufferGetBlockNumber(buffer);
xlrec.nDelete = 0; xlrec.nDelete = 0;
/* Scan page, identify tuples to delete, accumulate stats */ /* Scan page, identify tuples to delete, accumulate stats */
@ -448,15 +446,6 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
if (xlrec.nDelete == 0) if (xlrec.nDelete == 0)
return; /* nothing more to do */ return; /* nothing more to do */
/* Prepare WAL record */
xlrec.node = index->rd_node;
STORE_STATE(&bds->spgstate, xlrec.stateSrc);
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRoot, 0);
/* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * xlrec.nDelete, 1);
ACCEPT_RDATA_BUFFER(buffer, 2);
/* Do the update */ /* Do the update */
START_CRIT_SECTION(); START_CRIT_SECTION();
@ -469,7 +458,19 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, rdata); XLogBeginInsert();
/* Prepare WAL record */
STORE_STATE(&bds->spgstate, xlrec.stateSrc);
XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
/* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
XLogRegisterData((char *) toDelete,
sizeof(OffsetNumber) * xlrec.nDelete);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -499,10 +500,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage]; OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
OffsetNumber itemnos[MaxIndexTuplesPerPage]; OffsetNumber itemnos[MaxIndexTuplesPerPage];
spgxlogVacuumRedirect xlrec; spgxlogVacuumRedirect xlrec;
XLogRecData rdata[3];
xlrec.node = index->rd_node;
xlrec.blkno = BufferGetBlockNumber(buffer);
xlrec.nToPlaceholder = 0; xlrec.nToPlaceholder = 0;
xlrec.newestRedirectXid = InvalidTransactionId; xlrec.newestRedirectXid = InvalidTransactionId;
@ -585,11 +583,15 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRedirect, 0); XLogBeginInsert();
ACCEPT_RDATA_DATA(itemToPlaceholder, sizeof(OffsetNumber) * xlrec.nToPlaceholder, 1);
ACCEPT_RDATA_BUFFER(buffer, 2);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, rdata); XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
XLogRegisterData((char *) itemToPlaceholder,
sizeof(OffsetNumber) * xlrec.nToPlaceholder);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }

View File

@ -71,33 +71,30 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
} }
static void static void
spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) spgRedoCreateIndex(XLogReaderState *record)
{ {
RelFileNode *node = (RelFileNode *) XLogRecGetData(record); XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer; Buffer buffer;
Page page; Page page;
/* Backup blocks are not used in create_index records */ buffer = XLogInitBufferForRedo(record, 0);
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
SpGistInitMetapage(page); SpGistInitMetapage(page);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
buffer = XLogReadBuffer(*node, SPGIST_ROOT_BLKNO, true); buffer = XLogInitBufferForRedo(record, 1);
Assert(BufferIsValid(buffer)); Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
SpGistInitBuffer(buffer, SPGIST_LEAF); SpGistInitBuffer(buffer, SPGIST_LEAF);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
buffer = XLogReadBuffer(*node, SPGIST_NULL_BLKNO, true); buffer = XLogInitBufferForRedo(record, 2);
Assert(BufferIsValid(buffer)); Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS); SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
@ -106,8 +103,9 @@ spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record) spgRedoAddLeaf(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record); char *ptr = XLogRecGetData(record);
spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr; spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr;
char *leafTuple; char *leafTuple;
@ -128,15 +126,13 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
*/ */
if (xldata->newPage) if (xldata->newPage)
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, true); buffer = XLogInitBufferForRedo(record, 0);
SpGistInitBuffer(buffer, SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO; action = BLK_NEEDS_REDO;
} }
else else
action = XLogReadBufferForRedo(lsn, record, 0, action = XLogReadBufferForRedo(record, 0, &buffer);
xldata->node, xldata->blknoLeaf,
&buffer);
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
@ -164,7 +160,8 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
{ {
/* replacing a DEAD tuple */ /* replacing a DEAD tuple */
PageIndexTupleDelete(page, xldata->offnumLeaf); PageIndexTupleDelete(page, xldata->offnumLeaf);
if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size, if (PageAddItem(page,
(Item) leafTuple, leafTupleHdr.size,
xldata->offnumLeaf, false, false) != xldata->offnumLeaf) xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
elog(ERROR, "failed to add item of size %u to SPGiST index page", elog(ERROR, "failed to add item of size %u to SPGiST index page",
leafTupleHdr.size); leafTupleHdr.size);
@ -177,13 +174,14 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
/* update parent downlink if necessary */ /* update parent downlink if necessary */
if (xldata->blknoParent != InvalidBlockNumber) if (xldata->offnumParent != InvalidOffsetNumber)
{ {
if (XLogReadBufferForRedo(lsn, record, 1, if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
xldata->node, xldata->blknoParent,
&buffer) == BLK_NEEDS_REDO)
{ {
SpGistInnerTuple tuple; SpGistInnerTuple tuple;
BlockNumber blknoLeaf;
XLogRecGetBlockTag(record, 0, NULL, NULL, &blknoLeaf);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
@ -191,7 +189,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI, spgUpdateNodeLink(tuple, xldata->nodeI,
xldata->blknoLeaf, xldata->offnumLeaf); blknoLeaf, xldata->offnumLeaf);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
@ -202,8 +200,9 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record) spgRedoMoveLeafs(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record); char *ptr = XLogRecGetData(record);
spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr; spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr;
SpGistState state; SpGistState state;
@ -213,6 +212,9 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
Buffer buffer; Buffer buffer;
Page page; Page page;
XLogRedoAction action; XLogRedoAction action;
BlockNumber blknoDst;
XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst);
fillFakeState(&state, xldata->stateSrc); fillFakeState(&state, xldata->stateSrc);
@ -235,15 +237,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
/* Insert tuples on the dest page (do first, so redirect is valid) */ /* Insert tuples on the dest page (do first, so redirect is valid) */
if (xldata->newPage) if (xldata->newPage)
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true); buffer = XLogInitBufferForRedo(record, 1);
SpGistInitBuffer(buffer, SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO; action = BLK_NEEDS_REDO;
} }
else else
action = XLogReadBufferForRedo(lsn, record, 1, action = XLogReadBufferForRedo(record, 1, &buffer);
xldata->node, xldata->blknoDst,
&buffer);
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
int i; int i;
@ -260,7 +261,8 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
* field. * field.
*/ */
leafTuple = ptr; leafTuple = ptr;
memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); memcpy(&leafTupleHdr, leafTuple,
sizeof(SpGistLeafTupleData));
addOrReplaceTuple(page, (Item) leafTuple, addOrReplaceTuple(page, (Item) leafTuple,
leafTupleHdr.size, toInsert[i]); leafTupleHdr.size, toInsert[i]);
@ -274,14 +276,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
/* Delete tuples from the source page, inserting a redirection pointer */ /* Delete tuples from the source page, inserting a redirection pointer */
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves, spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT, state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
xldata->blknoDst, blknoDst,
toInsert[nInsert - 1]); toInsert[nInsert - 1]);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
@ -291,8 +293,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
/* And update the parent downlink */ /* And update the parent downlink */
if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent, if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
SpGistInnerTuple tuple; SpGistInnerTuple tuple;
@ -302,7 +303,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI, spgUpdateNodeLink(tuple, xldata->nodeI,
xldata->blknoDst, toInsert[nInsert - 1]); blknoDst, toInsert[nInsert - 1]);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
@ -312,8 +313,9 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record) spgRedoAddNode(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record); char *ptr = XLogRecGetData(record);
spgxlogAddNode *xldata = (spgxlogAddNode *) ptr; spgxlogAddNode *xldata = (spgxlogAddNode *) ptr;
char *innerTuple; char *innerTuple;
@ -321,7 +323,6 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
SpGistState state; SpGistState state;
Buffer buffer; Buffer buffer;
Page page; Page page;
int bbi;
XLogRedoAction action; XLogRedoAction action;
ptr += sizeof(spgxlogAddNode); ptr += sizeof(spgxlogAddNode);
@ -331,17 +332,18 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
fillFakeState(&state, xldata->stateSrc); fillFakeState(&state, xldata->stateSrc);
if (xldata->blknoNew == InvalidBlockNumber) if (!XLogRecHasBlockRef(record, 1))
{ {
/* update in place */ /* update in place */
Assert(xldata->blknoParent == InvalidBlockNumber); Assert(xldata->parentBlk == -1);
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
PageIndexTupleDelete(page, xldata->offnum); PageIndexTupleDelete(page, xldata->offnum);
if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size, if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size,
xldata->offnum, false, false) != xldata->offnum) xldata->offnum,
false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page", elog(ERROR, "failed to add item of size %u to SPGiST index page",
innerTupleHdr.size); innerTupleHdr.size);
@ -353,30 +355,30 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
} }
else else
{ {
BlockNumber blkno;
BlockNumber blknoNew;
XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno);
XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew);
/* /*
* In normal operation we would have all three pages (source, dest, * In normal operation we would have all three pages (source, dest,
* and parent) locked simultaneously; but in WAL replay it should be * and parent) locked simultaneously; but in WAL replay it should be
* safe to update them one at a time, as long as we do it in the right * safe to update them one at a time, as long as we do it in the right
* order. * order. We must insert the new tuple before replacing the old tuple
* * with the redirect tuple.
* The logic here depends on the assumption that blkno != blknoNew,
* else we can't tell which BKP bit goes with which page, and the LSN
* checks could go wrong too.
*/ */
Assert(xldata->blkno != xldata->blknoNew);
/* Install new tuple first so redirect is valid */ /* Install new tuple first so redirect is valid */
if (xldata->newPage) if (xldata->newPage)
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, true);
/* AddNode is not used for nulls pages */ /* AddNode is not used for nulls pages */
buffer = XLogInitBufferForRedo(record, 1);
SpGistInitBuffer(buffer, 0); SpGistInitBuffer(buffer, 0);
action = BLK_NEEDS_REDO; action = BLK_NEEDS_REDO;
} }
else else
action = XLogReadBufferForRedo(lsn, record, 1, action = XLogReadBufferForRedo(record, 1, &buffer);
xldata->node, xldata->blknoNew,
&buffer);
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
@ -385,22 +387,26 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
innerTupleHdr.size, xldata->offnumNew); innerTupleHdr.size, xldata->offnumNew);
/* /*
* If parent is in this same page, don't advance LSN; doing so * If parent is in this same page, update it now.
* would fool us into not applying the parent downlink update
* below. We'll update the LSN when we fix the parent downlink.
*/ */
if (xldata->blknoParent != xldata->blknoNew) if (xldata->parentBlk == 1)
{ {
PageSetLSN(page, lsn); SpGistInnerTuple parentTuple;
parentTuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parentTuple, xldata->nodeI,
blknoNew, xldata->offnumNew);
} }
PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer)) if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
/* Delete old tuple, replacing it with redirect or placeholder tuple */ /* Delete old tuple, replacing it with redirect or placeholder tuple */
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
SpGistDeadTuple dt; SpGistDeadTuple dt;
@ -412,11 +418,12 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
InvalidOffsetNumber); InvalidOffsetNumber);
else else
dt = spgFormDeadTuple(&state, SPGIST_REDIRECT, dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
xldata->blknoNew, blknoNew,
xldata->offnumNew); xldata->offnumNew);
PageIndexTupleDelete(page, xldata->offnum); PageIndexTupleDelete(page, xldata->offnum);
if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum, if (PageAddItem(page, (Item) dt, dt->size,
xldata->offnum,
false, false) != xldata->offnum) false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page", elog(ERROR, "failed to add item of size %u to SPGiST index page",
dt->size); dt->size);
@ -427,67 +434,55 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
SpGistPageGetOpaque(page)->nRedirection++; SpGistPageGetOpaque(page)->nRedirection++;
/* /*
* If parent is in this same page, don't advance LSN; doing so * If parent is in this same page, update it now.
* would fool us into not applying the parent downlink update
* below. We'll update the LSN when we fix the parent downlink.
*/ */
if (xldata->blknoParent != xldata->blkno) if (xldata->parentBlk == 0)
{ {
PageSetLSN(page, lsn); SpGistInnerTuple parentTuple;
parentTuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parentTuple, xldata->nodeI,
blknoNew, xldata->offnumNew);
} }
PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer)) if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
/* /*
* Update parent downlink. Since parent could be in either of the * Update parent downlink (if we didn't do it as part of the source or
* previous two buffers, it's a bit tricky to determine which BKP bit * destination page update already).
* applies.
*/ */
if (xldata->blknoParent == xldata->blkno) if (xldata->parentBlk == 2)
bbi = 0;
else if (xldata->blknoParent == xldata->blknoNew)
bbi = 1;
else
bbi = 2;
if (record->xl_info & XLR_BKP_BLOCK(bbi))
{ {
if (bbi == 2) /* else we already did it */ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
(void) RestoreBackupBlock(lsn, record, bbi, false, false); {
action = BLK_RESTORED; SpGistInnerTuple parentTuple;
buffer = InvalidBuffer;
}
else
{
action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
xldata->blknoParent, &buffer);
Assert(action != BLK_RESTORED);
}
if (action == BLK_NEEDS_REDO)
{
SpGistInnerTuple innerTuple;
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
innerTuple = (SpGistInnerTuple) PageGetItem(page, parentTuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(innerTuple, xldata->nodeI, spgUpdateNodeLink(parentTuple, xldata->nodeI,
xldata->blknoNew, xldata->offnumNew); blknoNew, xldata->offnumNew);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
} }
static void static void
spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record) spgRedoSplitTuple(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record); char *ptr = XLogRecGetData(record);
spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr; spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr;
char *prefixTuple; char *prefixTuple;
@ -496,6 +491,7 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
SpGistInnerTupleData postfixTupleHdr; SpGistInnerTupleData postfixTupleHdr;
Buffer buffer; Buffer buffer;
Page page; Page page;
XLogRedoAction action;
ptr += sizeof(spgxlogSplitTuple); ptr += sizeof(spgxlogSplitTuple);
prefixTuple = ptr; prefixTuple = ptr;
@ -513,22 +509,17 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
*/ */
/* insert postfix tuple first to avoid dangling link */ /* insert postfix tuple first to avoid dangling link */
if (xldata->blknoPostfix != xldata->blknoPrefix) if (!xldata->postfixBlkSame)
{ {
XLogRedoAction action;
if (xldata->newPage) if (xldata->newPage)
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, true); buffer = XLogInitBufferForRedo(record, 1);
/* SplitTuple is not used for nulls pages */ /* SplitTuple is not used for nulls pages */
SpGistInitBuffer(buffer, 0); SpGistInitBuffer(buffer, 0);
action = BLK_NEEDS_REDO; action = BLK_NEEDS_REDO;
} }
else else
action = XLogReadBufferForRedo(lsn, record, 1, action = XLogReadBufferForRedo(record, 1, &buffer);
xldata->node, xldata->blknoPostfix,
&buffer);
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
@ -544,18 +535,19 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
} }
/* now handle the original page */ /* now handle the original page */
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoPrefix, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
PageIndexTupleDelete(page, xldata->offnumPrefix); PageIndexTupleDelete(page, xldata->offnumPrefix);
if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size, if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size,
xldata->offnumPrefix, false, false) != xldata->offnumPrefix) xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
elog(ERROR, "failed to add item of size %u to SPGiST index page", elog(ERROR, "failed to add item of size %u to SPGiST index page",
prefixTupleHdr.size); prefixTupleHdr.size);
if (xldata->blknoPostfix == xldata->blknoPrefix) if (xldata->postfixBlkSame)
addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size, addOrReplaceTuple(page, (Item) postfixTuple,
postfixTupleHdr.size,
xldata->offnumPostfix); xldata->offnumPostfix);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
@ -566,8 +558,9 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) spgRedoPickSplit(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record); char *ptr = XLogRecGetData(record);
spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr; spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr;
char *innerTuple; char *innerTuple;
@ -578,14 +571,16 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
uint8 *leafPageSelect; uint8 *leafPageSelect;
Buffer srcBuffer; Buffer srcBuffer;
Buffer destBuffer; Buffer destBuffer;
Buffer innerBuffer;
Page srcPage; Page srcPage;
Page destPage; Page destPage;
Buffer innerBuffer;
Page page; Page page;
int bbi;
int i; int i;
BlockNumber blknoInner;
XLogRedoAction action; XLogRedoAction action;
XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner);
fillFakeState(&state, xldata->stateSrc); fillFakeState(&state, xldata->stateSrc);
ptr += SizeOfSpgxlogPickSplit; ptr += SizeOfSpgxlogPickSplit;
@ -603,13 +598,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
/* now ptr points to the list of leaf tuples */ /* now ptr points to the list of leaf tuples */
/* if (xldata->isRootSplit)
* It's a bit tricky to identify which pages have been handled as
* full-page images, so we explicitly count each referenced buffer.
*/
bbi = 0;
if (SpGistBlockIsRoot(xldata->blknoSrc))
{ {
/* when splitting root, we touch it only in the guise of new inner */ /* when splitting root, we touch it only in the guise of new inner */
srcBuffer = InvalidBuffer; srcBuffer = InvalidBuffer;
@ -618,8 +607,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
else if (xldata->initSrc) else if (xldata->initSrc)
{ {
/* just re-init the source page */ /* just re-init the source page */
srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true); srcBuffer = XLogInitBufferForRedo(record, 0);
Assert(BufferIsValid(srcBuffer));
srcPage = (Page) BufferGetPage(srcBuffer); srcPage = (Page) BufferGetPage(srcBuffer);
SpGistInitBuffer(srcBuffer, SpGistInitBuffer(srcBuffer,
@ -634,9 +622,8 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
* inserting leaf tuples and the new inner tuple, else the added * inserting leaf tuples and the new inner tuple, else the added
* redirect tuple will be a dangling link.) * redirect tuple will be a dangling link.)
*/ */
if (XLogReadBufferForRedo(lsn, record, bbi, srcPage = NULL;
xldata->node, xldata->blknoSrc, if (XLogReadBufferForRedo(record, 0, &srcBuffer) == BLK_NEEDS_REDO)
&srcBuffer) == BLK_NEEDS_REDO)
{ {
srcPage = BufferGetPage(srcBuffer); srcPage = BufferGetPage(srcBuffer);
@ -650,7 +637,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
toDelete, xldata->nDelete, toDelete, xldata->nDelete,
SPGIST_REDIRECT, SPGIST_REDIRECT,
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
xldata->blknoInner, blknoInner,
xldata->offnumInner); xldata->offnumInner);
else else
spgPageIndexMultiDelete(&state, srcPage, spgPageIndexMultiDelete(&state, srcPage,
@ -662,15 +649,10 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
/* don't update LSN etc till we're done with it */ /* don't update LSN etc till we're done with it */
} }
else
{
srcPage = NULL; /* don't do any page updates */
}
bbi++;
} }
/* try to access dest page if any */ /* try to access dest page if any */
if (xldata->blknoDest == InvalidBlockNumber) if (!XLogRecHasBlockRef(record, 1))
{ {
destBuffer = InvalidBuffer; destBuffer = InvalidBuffer;
destPage = NULL; destPage = NULL;
@ -678,8 +660,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
else if (xldata->initDest) else if (xldata->initDest)
{ {
/* just re-init the dest page */ /* just re-init the dest page */
destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true); destBuffer = XLogInitBufferForRedo(record, 1);
Assert(BufferIsValid(destBuffer));
destPage = (Page) BufferGetPage(destBuffer); destPage = (Page) BufferGetPage(destBuffer);
SpGistInitBuffer(destBuffer, SpGistInitBuffer(destBuffer,
@ -692,17 +673,10 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
* We could probably release the page lock immediately in the * We could probably release the page lock immediately in the
* full-page-image case, but for safety let's hold it till later. * full-page-image case, but for safety let's hold it till later.
*/ */
if (XLogReadBufferForRedo(lsn, record, bbi, if (XLogReadBufferForRedo(record, 1, &destBuffer) == BLK_NEEDS_REDO)
xldata->node, xldata->blknoDest,
&destBuffer) == BLK_NEEDS_REDO)
{
destPage = (Page) BufferGetPage(destBuffer); destPage = (Page) BufferGetPage(destBuffer);
}
else else
{
destPage = NULL; /* don't do any page updates */ destPage = NULL; /* don't do any page updates */
}
bbi++;
} }
/* restore leaf tuples to src and/or dest page */ /* restore leaf tuples to src and/or dest page */
@ -739,14 +713,12 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
/* restore new inner tuple */ /* restore new inner tuple */
if (xldata->initInner) if (xldata->initInner)
{ {
innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true); innerBuffer = XLogInitBufferForRedo(record, 2);
SpGistInitBuffer(innerBuffer, SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0));
(xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO; action = BLK_NEEDS_REDO;
} }
else else
action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node, action = XLogReadBufferForRedo(record, 2, &innerBuffer);
xldata->blknoInner, &innerBuffer);
if (action == BLK_NEEDS_REDO) if (action == BLK_NEEDS_REDO)
{ {
@ -756,14 +728,14 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
xldata->offnumInner); xldata->offnumInner);
/* if inner is also parent, update link while we're here */ /* if inner is also parent, update link while we're here */
if (xldata->blknoInner == xldata->blknoParent) if (xldata->innerIsParent)
{ {
SpGistInnerTuple parent; SpGistInnerTuple parent;
parent = (SpGistInnerTuple) PageGetItem(page, parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI, spgUpdateNodeLink(parent, xldata->nodeI,
xldata->blknoInner, xldata->offnumInner); blknoInner, xldata->offnumInner);
} }
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
@ -771,7 +743,6 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
} }
if (BufferIsValid(innerBuffer)) if (BufferIsValid(innerBuffer))
UnlockReleaseBuffer(innerBuffer); UnlockReleaseBuffer(innerBuffer);
bbi++;
/* /*
* Now we can release the leaf-page locks. It's okay to do this before * Now we can release the leaf-page locks. It's okay to do this before
@ -783,18 +754,11 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(destBuffer); UnlockReleaseBuffer(destBuffer);
/* update parent downlink, unless we did it above */ /* update parent downlink, unless we did it above */
if (xldata->blknoParent == InvalidBlockNumber) if (XLogRecHasBlockRef(record, 3))
{
/* no parent cause we split the root */
Assert(SpGistBlockIsRoot(xldata->blknoInner));
}
else if (xldata->blknoInner != xldata->blknoParent)
{ {
Buffer parentBuffer; Buffer parentBuffer;
if (XLogReadBufferForRedo(lsn, record, bbi, if (XLogReadBufferForRedo(record, 3, &parentBuffer) == BLK_NEEDS_REDO)
xldata->node, xldata->blknoParent,
&parentBuffer) == BLK_NEEDS_REDO)
{ {
SpGistInnerTuple parent; SpGistInnerTuple parent;
@ -803,7 +767,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
parent = (SpGistInnerTuple) PageGetItem(page, parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI, spgUpdateNodeLink(parent, xldata->nodeI,
xldata->blknoInner, xldata->offnumInner); blknoInner, xldata->offnumInner);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(parentBuffer); MarkBufferDirty(parentBuffer);
@ -811,11 +775,14 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
if (BufferIsValid(parentBuffer)) if (BufferIsValid(parentBuffer))
UnlockReleaseBuffer(parentBuffer); UnlockReleaseBuffer(parentBuffer);
} }
else
Assert(xldata->innerIsParent || xldata->isRootSplit);
} }
static void static void
spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record) spgRedoVacuumLeaf(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record); char *ptr = XLogRecGetData(record);
spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr; spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr;
OffsetNumber *toDead; OffsetNumber *toDead;
@ -844,8 +811,7 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
ptr += sizeof(OffsetNumber) * xldata->nChain; ptr += sizeof(OffsetNumber) * xldata->nChain;
chainDest = (OffsetNumber *) ptr; chainDest = (OffsetNumber *) ptr;
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
@ -897,8 +863,9 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record) spgRedoVacuumRoot(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record); char *ptr = XLogRecGetData(record);
spgxlogVacuumRoot *xldata = (spgxlogVacuumRoot *) ptr; spgxlogVacuumRoot *xldata = (spgxlogVacuumRoot *) ptr;
OffsetNumber *toDelete; OffsetNumber *toDelete;
@ -907,8 +874,7 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
toDelete = xldata->offsets; toDelete = xldata->offsets;
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
@ -923,8 +889,9 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
} }
static void static void
spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record) spgRedoVacuumRedirect(XLogReaderState *record)
{ {
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record); char *ptr = XLogRecGetData(record);
spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr; spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
OffsetNumber *itemToPlaceholder; OffsetNumber *itemToPlaceholder;
@ -939,12 +906,16 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
if (InHotStandby) if (InHotStandby)
{ {
if (TransactionIdIsValid(xldata->newestRedirectXid)) if (TransactionIdIsValid(xldata->newestRedirectXid))
{
RelFileNode node;
XLogRecGetBlockTag(record, 0, &node, NULL, NULL);
ResolveRecoveryConflictWithSnapshot(xldata->newestRedirectXid, ResolveRecoveryConflictWithSnapshot(xldata->newestRedirectXid,
xldata->node); node);
}
} }
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
&buffer) == BLK_NEEDS_REDO)
{ {
Page page = BufferGetPage(buffer); Page page = BufferGetPage(buffer);
SpGistPageOpaque opaque = SpGistPageGetOpaque(page); SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
@ -995,40 +966,40 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
} }
void void
spg_redo(XLogRecPtr lsn, XLogRecord *record) spg_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
MemoryContext oldCxt; MemoryContext oldCxt;
oldCxt = MemoryContextSwitchTo(opCtx); oldCxt = MemoryContextSwitchTo(opCtx);
switch (info) switch (info)
{ {
case XLOG_SPGIST_CREATE_INDEX: case XLOG_SPGIST_CREATE_INDEX:
spgRedoCreateIndex(lsn, record); spgRedoCreateIndex(record);
break; break;
case XLOG_SPGIST_ADD_LEAF: case XLOG_SPGIST_ADD_LEAF:
spgRedoAddLeaf(lsn, record); spgRedoAddLeaf(record);
break; break;
case XLOG_SPGIST_MOVE_LEAFS: case XLOG_SPGIST_MOVE_LEAFS:
spgRedoMoveLeafs(lsn, record); spgRedoMoveLeafs(record);
break; break;
case XLOG_SPGIST_ADD_NODE: case XLOG_SPGIST_ADD_NODE:
spgRedoAddNode(lsn, record); spgRedoAddNode(record);
break; break;
case XLOG_SPGIST_SPLIT_TUPLE: case XLOG_SPGIST_SPLIT_TUPLE:
spgRedoSplitTuple(lsn, record); spgRedoSplitTuple(record);
break; break;
case XLOG_SPGIST_PICKSPLIT: case XLOG_SPGIST_PICKSPLIT:
spgRedoPickSplit(lsn, record); spgRedoPickSplit(record);
break; break;
case XLOG_SPGIST_VACUUM_LEAF: case XLOG_SPGIST_VACUUM_LEAF:
spgRedoVacuumLeaf(lsn, record); spgRedoVacuumLeaf(record);
break; break;
case XLOG_SPGIST_VACUUM_ROOT: case XLOG_SPGIST_VACUUM_ROOT:
spgRedoVacuumRoot(lsn, record); spgRedoVacuumRoot(record);
break; break;
case XLOG_SPGIST_VACUUM_REDIRECT: case XLOG_SPGIST_VACUUM_REDIRECT:
spgRedoVacuumRedirect(lsn, record); spgRedoVacuumRedirect(record);
break; break;
default: default:
elog(PANIC, "spg_redo: unknown op code %u", info); elog(PANIC, "spg_redo: unknown op code %u", info);

View File

@ -440,96 +440,164 @@ happen before the WAL record is inserted; see notes in SyncOneBuffer().)
Note that marking a buffer dirty with MarkBufferDirty() should only Note that marking a buffer dirty with MarkBufferDirty() should only
happen iff you write a WAL record; see Writing Hints below. happen iff you write a WAL record; see Writing Hints below.
5. If the relation requires WAL-logging, build a WAL log record and pass it 5. If the relation requires WAL-logging, build a WAL record using
to XLogInsert(); then update the page's LSN using the returned XLOG XLogBeginInsert and XLogRegister* functions, and insert it. (See
location. For instance, "Constructing a WAL record" below). Then update the page's LSN using the
returned XLOG location. For instance,
recptr = XLogInsert(rmgr_id, info, rdata); XLogBeginInsert();
XLogRegisterBuffer(...)
XLogRegisterData(...)
recptr = XLogInsert(rmgr_id, info);
PageSetLSN(dp, recptr); PageSetLSN(dp, recptr);
// Note that we no longer do PageSetTLI() from 9.3 onwards
// since that field on a page has now changed its meaning.
6. END_CRIT_SECTION() 6. END_CRIT_SECTION()
7. Unlock and unpin the buffer(s). 7. Unlock and unpin the buffer(s).
XLogInsert's "rdata" argument is an array of pointer/size items identifying Complex changes (such as a multilevel index insertion) normally need to be
chunks of data to be written in the XLOG record, plus optional shared-buffer described by a series of atomic-action WAL records. The intermediate states
IDs for chunks that are in shared buffers rather than temporary variables. must be self-consistent, so that if the replay is interrupted between any
The "rdata" array must mention (at least once) each of the shared buffers two actions, the system is fully functional. In btree indexes, for example,
being modified, unless the action is such that the WAL replay routine can a page split requires a new page to be allocated, and an insertion of a new
reconstruct the entire page contents. XLogInsert includes the logic that key in the parent btree level, but for locking reasons this has to be
tests to see whether a shared buffer has been modified since the last reflected by two separate WAL records. Replaying the first record, to
checkpoint. If not, the entire page contents are logged rather than just the allocate the new page and move tuples to it, sets a flag on the page to
portion(s) pointed to by "rdata". indicate that the key has not been inserted to the parent yet. Replaying the
second record clears the flag. This intermediate state is never seen by
other backends during normal operation, because the lock on the child page
is held across the two actions, but will be seen if the operation is
interrupted before writing the second WAL record. The search algorithm works
with the intermediate state as normal, but if an insertion encounters a page
with the incomplete-split flag set, it will finish the interrupted split by
inserting the key to the parent, before proceeding.
Because XLogInsert drops the rdata components associated with buffers it
chooses to log in full, the WAL replay routines normally need to test to see
which buffers were handled that way --- otherwise they may be misled about
what the XLOG record actually contains. XLOG records that describe multi-page
changes therefore require some care to design: you must be certain that you
know what data is indicated by each "BKP" bit. An example of the trickiness
is that in a HEAP_UPDATE record, BKP(0) normally is associated with the source
page and BKP(1) is associated with the destination page --- but if these are
the same page, only BKP(0) would have been set.
For this reason as well as the risk of deadlocking on buffer locks, it's best Constructing a WAL record
to design WAL records so that they reflect small atomic actions involving just -------------------------
one or a few pages. The current XLOG infrastructure cannot handle WAL records
involving references to more than four shared buffers, anyway.
In the case where the WAL record contains enough information to re-generate A WAL record consists of a header common to all WAL record types,
the entire contents of a page, do *not* show that page's buffer ID in the record-specific data, and information about the data blocks modified. Each
rdata array, even if some of the rdata items point into the buffer. This is modified data block is identified by an ID number, and can optionally have
because you don't want XLogInsert to log the whole page contents. The more record-specific data associated with the block. If XLogInsert decides
standard replay-routine pattern for this case is that a full-page image of a block needs to be taken, the data associated
with that block is not included.
buffer = XLogReadBuffer(rnode, blkno, true); The API for constructing a WAL record consists of five functions:
Assert(BufferIsValid(buffer)); XLogBeginInsert, XLogRegisterBuffer, XLogRegisterData, XLogRegisterBufData,
page = (Page) BufferGetPage(buffer); and XLogInsert. First, call XLogBeginInsert(). Then register all the buffers
modified, and data needed to replay the changes, using XLogRegister*
functions. Finally, insert the constructed record to the WAL by calling
XLogInsert().
... initialize the page ... XLogBeginInsert();
PageSetLSN(page, lsn); /* register buffers modified as part of this WAL-logged action */
MarkBufferDirty(buffer); XLogRegisterBuffer(0, lbuffer, REGBUF_STANDARD);
UnlockReleaseBuffer(buffer); XLogRegisterBuffer(1, rbuffer, REGBUF_STANDARD);
In the case where the WAL record provides only enough information to /* register data that is always included in the WAL record */
incrementally update the page, the rdata array *must* mention the buffer XLogRegisterData(&xlrec, SizeOfFictionalAction);
ID at least once; otherwise there is no defense against torn-page problems.
The standard replay-routine pattern for this case is
if (XLogReadBufferForRedo(lsn, record, N, rnode, blkno, &buffer) == BLK_NEEDS_REDO) /*
{ * register data associated with a buffer. This will not be included
page = (Page) BufferGetPage(buffer); * in the record if a full-page image is taken.
*/
XLogRegisterBufData(0, tuple->data, tuple->len);
... apply the change ... /* more data associated with the buffer */
XLogRegisterBufData(0, data2, len2);
PageSetLSN(page, lsn); /*
MarkBufferDirty(buffer); * Ok, all the data and buffers to include in the WAL record have
} * been registered. Insert the record.
if (BufferIsValid(buffer)) */
UnlockReleaseBuffer(buffer); recptr = XLogInsert(RM_FOO_ID, XLOG_FOOBAR_DO_STUFF);
XLogReadBufferForRedo reads the page from disk, and checks what action needs to Details of the API functions:
be taken to the page. If the XLR_BKP_BLOCK(N) flag is set, it restores the
full page image and returns BLK_RESTORED. If there is no full page image, but
page cannot be found or if the change has already been replayed (i.e. the
page's LSN >= the record we're replaying), it returns BLK_NOTFOUND or BLK_DONE,
respectively. Usually, the redo routine only needs to pay attention to the
BLK_NEEDS_REDO return code, which means that the routine should apply the
incremental change. In any case, the caller is responsible for unlocking and
releasing the buffer. Note that XLogReadBufferForRedo returns the buffer
locked even if no redo is required, unless the page does not exist.
As noted above, for a multi-page update you need to be able to determine void XLogBeginInsert(void)
which XLR_BKP_BLOCK(N) flag applies to each page. If a WAL record reflects
a combination of fully-rewritable and incremental updates, then the rewritable Must be called before XLogRegisterBuffer and XLogRegisterData.
pages don't count for the XLR_BKP_BLOCK(N) numbering. (XLR_BKP_BLOCK(N) is
associated with the N'th distinct buffer ID seen in the "rdata" array, and void XLogResetInsertion(void)
per the above discussion, fully-rewritable buffers shouldn't be mentioned in
"rdata".) Clear any currently registered data and buffers from the WAL record
construction workspace. This is only needed if you have already called
XLogBeginInsert(), but decide to not insert the record after all.
void XLogEnsureRecordSpace(int max_block_id, int nrdatas)
Normally, the WAL record construction buffers have the following limits:
* highest block ID that can be used is 4 (allowing five block references)
* Max 20 chunks of registered data
These default limits are enough for most record types that change some
on-disk structures. For the odd case that requires more data, or needs to
modify more buffers, these limits can be raised by calling
XLogEnsureRecordSpace(). XLogEnsureRecordSpace() must be called before
XLogBeginInsert(), and outside a critical section.
void XLogRegisterBuffer(uint8 block_id, Buffer buf, uint8 flags);
XLogRegisterBuffer adds information about a data block to the WAL record.
block_id is an arbitrary number used to identify this page reference in
the redo routine. The information needed to re-find the page at redo -
relfilenode, fork, and block number - are included in the WAL record.
XLogInsert will automatically include a full copy of the page contents, if
this is the first modification of the buffer since the last checkpoint.
It is important to register every buffer modified by the action with
XLogRegisterBuffer, to avoid torn-page hazards.
The flags control when and how the buffer contents are included in the
WAL record. Normally, a full-page image is taken only if the page has not
been modified since the last checkpoint, and only if full_page_writes=on
or an online backup is in progress. The REGBUF_FORCE_IMAGE flag can be
used to force a full-page image to always be included; that is useful
e.g. for an operation that rewrites most of the page, so that tracking the
details is not worth it. For the rare case where it is not necessary to
protect from torn pages, REGBUF_NO_IMAGE flag can be used to suppress
full page image from being taken. REGBUF_WILL_INIT also suppresses a full
page image, but the redo routine must re-generate the page from scratch,
without looking at the old page contents. Re-initializing the page
protects from torn page hazards like a full page image does.
The REGBUF_STANDARD flag can be specified together with the other flags to
indicate that the page follows the standard page layout. It causes the
area between pd_lower and pd_upper to be left out from the image, reducing
WAL volume.
If the REGBUF_KEEP_DATA flag is given, any per-buffer data registered with
XLogRegisterBufData() is included in the WAL record even if a full-page
image is taken.
void XLogRegisterData(char *data, int len);
XLogRegisterData is used to include arbitrary data in the WAL record. If
XLogRegisterData() is called multiple times, the data are appended, and
will be made available to the redo routine as one contiguous chunk.
void XLogRegisterBufData(uint8 block_id, char *data, int len);
XLogRegisterBufData is used to include data associated with a particular
buffer that was registered earlier with XLogRegisterBuffer(). If
XLogRegisterBufData() is called multiple times with the same block ID, the
data are appended, and will be made available to the redo routine as one
contiguous chunk.
If a full-page image of the buffer is taken at insertion, the data is not
included in the WAL record, unless the REGBUF_KEEP_DATA flag is used.
Writing a REDO routine
----------------------
A REDO routine uses the data and page references included in the WAL record
to reconstruct the new state of the page. The record decoding functions
and macros in xlogreader.c/h can be used to extract the data from the record.
When replaying a WAL record that describes changes on multiple pages, you When replaying a WAL record that describes changes on multiple pages, you
must be careful to lock the pages properly to prevent concurrent Hot Standby must be careful to lock the pages properly to prevent concurrent Hot Standby
@ -545,23 +613,6 @@ either an exclusive buffer lock or a shared lock plus buffer header lock,
or be writing the data block directly rather than through shared buffers or be writing the data block directly rather than through shared buffers
while holding AccessExclusiveLock on the relation. while holding AccessExclusiveLock on the relation.
Due to all these constraints, complex changes (such as a multilevel index
insertion) normally need to be described by a series of atomic-action WAL
records. The intermediate states must be self-consistent, so that if the
replay is interrupted between any two actions, the system is fully
functional. In btree indexes, for example, a page split requires a new page
to be allocated, and an insertion of a new key in the parent btree level,
but for locking reasons this has to be reflected by two separate WAL
records. Replaying the first record, to allocate the new page and move
tuples to it, sets a flag on the page to indicate that the key has not been
inserted to the parent yet. Replaying the second record clears the flag.
This intermediate state is never seen by other backends during normal
operation, because the lock on the child page is held across the two
actions, but will be seen if the operation is interrupted before writing
the second WAL record. The search algorithm works with the intermediate
state as normal, but if an insertion encounters a page with the
incomplete-split flag set, it will finish the interrupted split by
inserting the key to the parent, before proceeding.
Writing Hints Writing Hints
------------- -------------

View File

@ -699,13 +699,9 @@ CLOGPagePrecedes(int page1, int page2)
static void static void
WriteZeroPageXlogRec(int pageno) WriteZeroPageXlogRec(int pageno)
{ {
XLogRecData rdata; XLogBeginInsert();
XLogRegisterData((char *) (&pageno), sizeof(int));
rdata.data = (char *) (&pageno); (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
rdata.len = sizeof(int);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata);
} }
/* /*
@ -717,14 +713,11 @@ WriteZeroPageXlogRec(int pageno)
static void static void
WriteTruncateXlogRec(int pageno) WriteTruncateXlogRec(int pageno)
{ {
XLogRecData rdata;
XLogRecPtr recptr; XLogRecPtr recptr;
rdata.data = (char *) (&pageno); XLogBeginInsert();
rdata.len = sizeof(int); XLogRegisterData((char *) (&pageno), sizeof(int));
rdata.buffer = InvalidBuffer; recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE);
rdata.next = NULL;
recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata);
XLogFlush(recptr); XLogFlush(recptr);
} }
@ -732,12 +725,12 @@ WriteTruncateXlogRec(int pageno)
* CLOG resource manager's routines * CLOG resource manager's routines
*/ */
void void
clog_redo(XLogRecPtr lsn, XLogRecord *record) clog_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in clog records */ /* Backup blocks are not used in clog records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(!XLogRecHasAnyBlockRefs(record));
if (info == CLOG_ZEROPAGE) if (info == CLOG_ZEROPAGE)
{ {

View File

@ -720,7 +720,6 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
{ {
MultiXactId multi; MultiXactId multi;
MultiXactOffset offset; MultiXactOffset offset;
XLogRecData rdata[2];
xl_multixact_create xlrec; xl_multixact_create xlrec;
debug_elog3(DEBUG2, "Create: %s", debug_elog3(DEBUG2, "Create: %s",
@ -796,17 +795,11 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
* the status flags in one XLogRecData, then all the xids in another one? * the status flags in one XLogRecData, then all the xids in another one?
* Not clear that it's worth the trouble though. * Not clear that it's worth the trouble though.
*/ */
rdata[0].data = (char *) (&xlrec); XLogBeginInsert();
rdata[0].len = SizeOfMultiXactCreate; XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate);
rdata[0].buffer = InvalidBuffer; XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember));
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) members; (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
rdata[1].len = nmembers * sizeof(MultiXactMember);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
(void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
/* Now enter the information into the OFFSETs and MEMBERs logs */ /* Now enter the information into the OFFSETs and MEMBERs logs */
RecordNewMultiXact(multi, offset, nmembers, members); RecordNewMultiXact(multi, offset, nmembers, members);
@ -2705,25 +2698,21 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
static void static void
WriteMZeroPageXlogRec(int pageno, uint8 info) WriteMZeroPageXlogRec(int pageno, uint8 info)
{ {
XLogRecData rdata; XLogBeginInsert();
XLogRegisterData((char *) (&pageno), sizeof(int));
rdata.data = (char *) (&pageno); (void) XLogInsert(RM_MULTIXACT_ID, info);
rdata.len = sizeof(int);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
(void) XLogInsert(RM_MULTIXACT_ID, info, &rdata);
} }
/* /*
* MULTIXACT resource manager's routines * MULTIXACT resource manager's routines
*/ */
void void
multixact_redo(XLogRecPtr lsn, XLogRecord *record) multixact_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in multixact records */ /* Backup blocks are not used in multixact records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE) if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
{ {
@ -2775,7 +2764,7 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record)
* should be unnecessary, since any XID found here ought to have other * should be unnecessary, since any XID found here ought to have other
* evidence in the XLOG, but let's be safe. * evidence in the XLOG, but let's be safe.
*/ */
max_xid = record->xl_xid; max_xid = XLogRecGetXid(record);
for (i = 0; i < xlrec->nmembers; i++) for (i = 0; i < xlrec->nmembers; i++)
{ {
if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid)) if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))

View File

@ -889,14 +889,21 @@ typedef struct TwoPhaseRecordOnDisk
/* /*
* During prepare, the state file is assembled in memory before writing it * During prepare, the state file is assembled in memory before writing it
* to WAL and the actual state file. We use a chain of XLogRecData blocks * to WAL and the actual state file. We use a chain of StateFileChunk blocks
* so that we will be able to pass the state file contents directly to * for that.
* XLogInsert.
*/ */
typedef struct StateFileChunk
{
char *data;
uint32 len;
struct StateFileChunk *next;
} StateFileChunk;
static struct xllist static struct xllist
{ {
XLogRecData *head; /* first data block in the chain */ StateFileChunk *head; /* first data block in the chain */
XLogRecData *tail; /* last block in chain */ StateFileChunk *tail; /* last block in chain */
uint32 num_chunks;
uint32 bytes_free; /* free bytes left in tail block */ uint32 bytes_free; /* free bytes left in tail block */
uint32 total_len; /* total data bytes in chain */ uint32 total_len; /* total data bytes in chain */
} records; } records;
@ -917,11 +924,11 @@ save_state_data(const void *data, uint32 len)
if (padlen > records.bytes_free) if (padlen > records.bytes_free)
{ {
records.tail->next = palloc0(sizeof(XLogRecData)); records.tail->next = palloc0(sizeof(StateFileChunk));
records.tail = records.tail->next; records.tail = records.tail->next;
records.tail->buffer = InvalidBuffer;
records.tail->len = 0; records.tail->len = 0;
records.tail->next = NULL; records.tail->next = NULL;
records.num_chunks++;
records.bytes_free = Max(padlen, 512); records.bytes_free = Max(padlen, 512);
records.tail->data = palloc(records.bytes_free); records.tail->data = palloc(records.bytes_free);
@ -951,8 +958,7 @@ StartPrepare(GlobalTransaction gxact)
SharedInvalidationMessage *invalmsgs; SharedInvalidationMessage *invalmsgs;
/* Initialize linked list */ /* Initialize linked list */
records.head = palloc0(sizeof(XLogRecData)); records.head = palloc0(sizeof(StateFileChunk));
records.head->buffer = InvalidBuffer;
records.head->len = 0; records.head->len = 0;
records.head->next = NULL; records.head->next = NULL;
@ -960,6 +966,7 @@ StartPrepare(GlobalTransaction gxact)
records.head->data = palloc(records.bytes_free); records.head->data = palloc(records.bytes_free);
records.tail = records.head; records.tail = records.head;
records.num_chunks = 1;
records.total_len = 0; records.total_len = 0;
@ -1019,7 +1026,7 @@ EndPrepare(GlobalTransaction gxact)
TransactionId xid = pgxact->xid; TransactionId xid = pgxact->xid;
TwoPhaseFileHeader *hdr; TwoPhaseFileHeader *hdr;
char path[MAXPGPATH]; char path[MAXPGPATH];
XLogRecData *record; StateFileChunk *record;
pg_crc32 statefile_crc; pg_crc32 statefile_crc;
pg_crc32 bogus_crc; pg_crc32 bogus_crc;
int fd; int fd;
@ -1117,12 +1124,16 @@ EndPrepare(GlobalTransaction gxact)
* We save the PREPARE record's location in the gxact for later use by * We save the PREPARE record's location in the gxact for later use by
* CheckPointTwoPhase. * CheckPointTwoPhase.
*/ */
XLogEnsureRecordSpace(0, records.num_chunks);
START_CRIT_SECTION(); START_CRIT_SECTION();
MyPgXact->delayChkpt = true; MyPgXact->delayChkpt = true;
gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE, XLogBeginInsert();
records.head); for (record = records.head; record != NULL; record = record->next)
XLogRegisterData(record->data, record->len);
gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE);
XLogFlush(gxact->prepare_lsn); XLogFlush(gxact->prepare_lsn);
/* If we crash now, we have prepared: WAL replay will fix things */ /* If we crash now, we have prepared: WAL replay will fix things */
@ -1180,6 +1191,7 @@ EndPrepare(GlobalTransaction gxact)
SyncRepWaitForLSN(gxact->prepare_lsn); SyncRepWaitForLSN(gxact->prepare_lsn);
records.tail = records.head = NULL; records.tail = records.head = NULL;
records.num_chunks = 0;
} }
/* /*
@ -2071,8 +2083,6 @@ RecordTransactionCommitPrepared(TransactionId xid,
SharedInvalidationMessage *invalmsgs, SharedInvalidationMessage *invalmsgs,
bool initfileinval) bool initfileinval)
{ {
XLogRecData rdata[4];
int lastrdata = 0;
xl_xact_commit_prepared xlrec; xl_xact_commit_prepared xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
@ -2094,39 +2104,24 @@ RecordTransactionCommitPrepared(TransactionId xid,
xlrec.crec.nsubxacts = nchildren; xlrec.crec.nsubxacts = nchildren;
xlrec.crec.nmsgs = ninvalmsgs; xlrec.crec.nmsgs = ninvalmsgs;
rdata[0].data = (char *) (&xlrec); XLogBeginInsert();
rdata[0].len = MinSizeOfXactCommitPrepared; XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitPrepared);
rdata[0].buffer = InvalidBuffer;
/* dump rels to delete */ /* dump rels to delete */
if (nrels > 0) if (nrels > 0)
{ XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
/* dump committed child Xids */ /* dump committed child Xids */
if (nchildren > 0) if (nchildren > 0)
{ XLogRegisterData((char *) children,
rdata[lastrdata].next = &(rdata[2]); nchildren * sizeof(TransactionId));
rdata[2].data = (char *) children;
rdata[2].len = nchildren * sizeof(TransactionId);
rdata[2].buffer = InvalidBuffer;
lastrdata = 2;
}
/* dump cache invalidation messages */ /* dump cache invalidation messages */
if (ninvalmsgs > 0) if (ninvalmsgs > 0)
{ XLogRegisterData((char *) invalmsgs,
rdata[lastrdata].next = &(rdata[3]); ninvalmsgs * sizeof(SharedInvalidationMessage));
rdata[3].data = (char *) invalmsgs;
rdata[3].len = ninvalmsgs * sizeof(SharedInvalidationMessage);
rdata[3].buffer = InvalidBuffer;
lastrdata = 3;
}
rdata[lastrdata].next = NULL;
recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED, rdata); recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED);
/* /*
* We don't currently try to sleep before flush here ... nor is there any * We don't currently try to sleep before flush here ... nor is there any
@ -2169,8 +2164,6 @@ RecordTransactionAbortPrepared(TransactionId xid,
int nrels, int nrels,
RelFileNode *rels) RelFileNode *rels)
{ {
XLogRecData rdata[3];
int lastrdata = 0;
xl_xact_abort_prepared xlrec; xl_xact_abort_prepared xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
@ -2189,30 +2182,20 @@ RecordTransactionAbortPrepared(TransactionId xid,
xlrec.arec.xact_time = GetCurrentTimestamp(); xlrec.arec.xact_time = GetCurrentTimestamp();
xlrec.arec.nrels = nrels; xlrec.arec.nrels = nrels;
xlrec.arec.nsubxacts = nchildren; xlrec.arec.nsubxacts = nchildren;
rdata[0].data = (char *) (&xlrec);
rdata[0].len = MinSizeOfXactAbortPrepared; XLogBeginInsert();
rdata[0].buffer = InvalidBuffer; XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbortPrepared);
/* dump rels to delete */ /* dump rels to delete */
if (nrels > 0) if (nrels > 0)
{ XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
/* dump committed child Xids */ /* dump committed child Xids */
if (nchildren > 0) if (nchildren > 0)
{ XLogRegisterData((char *) children,
rdata[lastrdata].next = &(rdata[2]); nchildren * sizeof(TransactionId));
rdata[2].data = (char *) children;
rdata[2].len = nchildren * sizeof(TransactionId);
rdata[2].buffer = InvalidBuffer;
lastrdata = 2;
}
rdata[lastrdata].next = NULL;
recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED, rdata); recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED);
/* Always flush, since we're about to remove the 2PC state file */ /* Always flush, since we're about to remove the 2PC state file */
XLogFlush(recptr); XLogFlush(recptr);

View File

@ -571,7 +571,6 @@ AssignTransactionId(TransactionState s)
if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS || if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS ||
log_unknown_top) log_unknown_top)
{ {
XLogRecData rdata[2];
xl_xact_assignment xlrec; xl_xact_assignment xlrec;
/* /*
@ -582,17 +581,12 @@ AssignTransactionId(TransactionState s)
Assert(TransactionIdIsValid(xlrec.xtop)); Assert(TransactionIdIsValid(xlrec.xtop));
xlrec.nsubxacts = nUnreportedXids; xlrec.nsubxacts = nUnreportedXids;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = MinSizeOfXactAssignment; XLogRegisterData((char *) &xlrec, MinSizeOfXactAssignment);
rdata[0].buffer = InvalidBuffer; XLogRegisterData((char *) unreportedXids,
rdata[0].next = &rdata[1]; nUnreportedXids * sizeof(TransactionId));
rdata[1].data = (char *) unreportedXids; (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT);
rdata[1].len = nUnreportedXids * sizeof(TransactionId);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
nUnreportedXids = 0; nUnreportedXids = 0;
/* mark top, not current xact as having been logged */ /* mark top, not current xact as having been logged */
@ -1087,8 +1081,6 @@ RecordTransactionCommit(void)
if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit || if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit ||
XLogLogicalInfoActive()) XLogLogicalInfoActive())
{ {
XLogRecData rdata[4];
int lastrdata = 0;
xl_xact_commit xlrec; xl_xact_commit xlrec;
/* /*
@ -1107,63 +1099,38 @@ RecordTransactionCommit(void)
xlrec.nrels = nrels; xlrec.nrels = nrels;
xlrec.nsubxacts = nchildren; xlrec.nsubxacts = nchildren;
xlrec.nmsgs = nmsgs; xlrec.nmsgs = nmsgs;
rdata[0].data = (char *) (&xlrec);
rdata[0].len = MinSizeOfXactCommit; XLogBeginInsert();
rdata[0].buffer = InvalidBuffer; XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommit);
/* dump rels to delete */ /* dump rels to delete */
if (nrels > 0) if (nrels > 0)
{ XLogRegisterData((char *) rels,
rdata[0].next = &(rdata[1]); nrels * sizeof(RelFileNode));
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
/* dump committed child Xids */ /* dump committed child Xids */
if (nchildren > 0) if (nchildren > 0)
{ XLogRegisterData((char *) children,
rdata[lastrdata].next = &(rdata[2]); nchildren * sizeof(TransactionId));
rdata[2].data = (char *) children;
rdata[2].len = nchildren * sizeof(TransactionId);
rdata[2].buffer = InvalidBuffer;
lastrdata = 2;
}
/* dump shared cache invalidation messages */ /* dump shared cache invalidation messages */
if (nmsgs > 0) if (nmsgs > 0)
{ XLogRegisterData((char *) invalMessages,
rdata[lastrdata].next = &(rdata[3]); nmsgs * sizeof(SharedInvalidationMessage));
rdata[3].data = (char *) invalMessages; (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT);
rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
rdata[3].buffer = InvalidBuffer;
lastrdata = 3;
}
rdata[lastrdata].next = NULL;
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
} }
else else
{ {
XLogRecData rdata[2];
int lastrdata = 0;
xl_xact_commit_compact xlrec; xl_xact_commit_compact xlrec;
xlrec.xact_time = xactStopTimestamp; xlrec.xact_time = xactStopTimestamp;
xlrec.nsubxacts = nchildren; xlrec.nsubxacts = nchildren;
rdata[0].data = (char *) (&xlrec);
rdata[0].len = MinSizeOfXactCommitCompact; XLogBeginInsert();
rdata[0].buffer = InvalidBuffer; XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitCompact);
/* dump committed child Xids */ /* dump committed child Xids */
if (nchildren > 0) if (nchildren > 0)
{ XLogRegisterData((char *) children,
rdata[0].next = &(rdata[1]); nchildren * sizeof(TransactionId));
rdata[1].data = (char *) children;
rdata[1].len = nchildren * sizeof(TransactionId);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
rdata[lastrdata].next = NULL;
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata); (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT);
} }
} }
@ -1436,8 +1403,6 @@ RecordTransactionAbort(bool isSubXact)
RelFileNode *rels; RelFileNode *rels;
int nchildren; int nchildren;
TransactionId *children; TransactionId *children;
XLogRecData rdata[3];
int lastrdata = 0;
xl_xact_abort xlrec; xl_xact_abort xlrec;
/* /*
@ -1486,30 +1451,20 @@ RecordTransactionAbort(bool isSubXact)
} }
xlrec.nrels = nrels; xlrec.nrels = nrels;
xlrec.nsubxacts = nchildren; xlrec.nsubxacts = nchildren;
rdata[0].data = (char *) (&xlrec);
rdata[0].len = MinSizeOfXactAbort; XLogBeginInsert();
rdata[0].buffer = InvalidBuffer; XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbort);
/* dump rels to delete */ /* dump rels to delete */
if (nrels > 0) if (nrels > 0)
{ XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
/* dump committed child Xids */ /* dump committed child Xids */
if (nchildren > 0) if (nchildren > 0)
{ XLogRegisterData((char *) children,
rdata[lastrdata].next = &(rdata[2]); nchildren * sizeof(TransactionId));
rdata[2].data = (char *) children;
rdata[2].len = nchildren * sizeof(TransactionId);
rdata[2].buffer = InvalidBuffer;
lastrdata = 2;
}
rdata[lastrdata].next = NULL;
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT);
/* /*
* Report the latest async abort LSN, so that the WAL writer knows to * Report the latest async abort LSN, so that the WAL writer knows to
@ -2351,6 +2306,9 @@ AbortTransaction(void)
AbortBufferIO(); AbortBufferIO();
UnlockBuffers(); UnlockBuffers();
/* Reset WAL record construction state */
XLogResetInsertion();
/* /*
* Also clean up any open wait for lock, since the lock manager will choke * Also clean up any open wait for lock, since the lock manager will choke
* if we try to wait for another lock before doing this. * if we try to wait for another lock before doing this.
@ -4299,6 +4257,9 @@ AbortSubTransaction(void)
AbortBufferIO(); AbortBufferIO();
UnlockBuffers(); UnlockBuffers();
/* Reset WAL record construction state */
XLogResetInsertion();
/* /*
* Also clean up any open wait for lock, since the lock manager will choke * Also clean up any open wait for lock, since the lock manager will choke
* if we try to wait for another lock before doing this. * if we try to wait for another lock before doing this.
@ -4938,42 +4899,42 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
} }
void void
xact_redo(XLogRecPtr lsn, XLogRecord *record) xact_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in xact records */ /* Backup blocks are not used in xact records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_XACT_COMMIT_COMPACT) if (info == XLOG_XACT_COMMIT_COMPACT)
{ {
xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) XLogRecGetData(record); xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) XLogRecGetData(record);
xact_redo_commit_compact(xlrec, record->xl_xid, lsn); xact_redo_commit_compact(xlrec, XLogRecGetXid(record), record->EndRecPtr);
} }
else if (info == XLOG_XACT_COMMIT) else if (info == XLOG_XACT_COMMIT)
{ {
xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record); xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
xact_redo_commit(xlrec, record->xl_xid, lsn); xact_redo_commit(xlrec, XLogRecGetXid(record), record->EndRecPtr);
} }
else if (info == XLOG_XACT_ABORT) else if (info == XLOG_XACT_ABORT)
{ {
xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record); xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
xact_redo_abort(xlrec, record->xl_xid); xact_redo_abort(xlrec, XLogRecGetXid(record));
} }
else if (info == XLOG_XACT_PREPARE) else if (info == XLOG_XACT_PREPARE)
{ {
/* the record contents are exactly the 2PC file */ /* the record contents are exactly the 2PC file */
RecreateTwoPhaseFile(record->xl_xid, RecreateTwoPhaseFile(XLogRecGetXid(record),
XLogRecGetData(record), record->xl_len); XLogRecGetData(record), XLogRecGetDataLen(record));
} }
else if (info == XLOG_XACT_COMMIT_PREPARED) else if (info == XLOG_XACT_COMMIT_PREPARED)
{ {
xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record); xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
xact_redo_commit(&xlrec->crec, xlrec->xid, lsn); xact_redo_commit(&xlrec->crec, xlrec->xid, record->EndRecPtr);
RemoveTwoPhaseFile(xlrec->xid, false); RemoveTwoPhaseFile(xlrec->xid, false);
} }
else if (info == XLOG_XACT_ABORT_PREPARED) else if (info == XLOG_XACT_ABORT_PREPARED)

View File

@ -757,10 +757,10 @@ static MemoryContext walDebugCxt = NULL;
static void readRecoveryCommandFile(void); static void readRecoveryCommandFile(void);
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo); static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
static bool recoveryStopsBefore(XLogRecord *record); static bool recoveryStopsBefore(XLogReaderState *record);
static bool recoveryStopsAfter(XLogRecord *record); static bool recoveryStopsAfter(XLogReaderState *record);
static void recoveryPausesHere(void); static void recoveryPausesHere(void);
static bool recoveryApplyDelay(XLogRecord *record); static bool recoveryApplyDelay(XLogReaderState *record);
static void SetLatestXTime(TimestampTz xtime); static void SetLatestXTime(TimestampTz xtime);
static void SetCurrentChunkStartTime(TimestampTz xtime); static void SetCurrentChunkStartTime(TimestampTz xtime);
static void CheckRequiredParameterValues(void); static void CheckRequiredParameterValues(void);
@ -807,9 +807,9 @@ static char *str_time(pg_time_t tnow);
static bool CheckForStandbyTrigger(void); static bool CheckForStandbyTrigger(void);
#ifdef WAL_DEBUG #ifdef WAL_DEBUG
static void xlog_outrec(StringInfo buf, XLogRecord *record); static void xlog_outrec(StringInfo buf, XLogReaderState *record);
#endif #endif
static void xlog_outdesc(StringInfo buf, RmgrId rmid, XLogRecord *record); static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
static void pg_start_backup_callback(int code, Datum arg); static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc, static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby); bool *backupEndRequired, bool *backupFromStandby);
@ -861,7 +861,6 @@ XLogRecPtr
XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
{ {
XLogCtlInsert *Insert = &XLogCtl->Insert; XLogCtlInsert *Insert = &XLogCtl->Insert;
XLogRecData *rdt;
pg_crc32 rdata_crc; pg_crc32 rdata_crc;
bool inserted; bool inserted;
XLogRecord *rechdr = (XLogRecord *) rdata->data; XLogRecord *rechdr = (XLogRecord *) rdata->data;
@ -870,28 +869,13 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
XLogRecPtr StartPos; XLogRecPtr StartPos;
XLogRecPtr EndPos; XLogRecPtr EndPos;
/* we assume that all of the record header is in the first chunk */
Assert(rdata->len >= SizeOfXLogRecord);
/* cross-check on whether we should be here or not */ /* cross-check on whether we should be here or not */
if (!XLogInsertAllowed()) if (!XLogInsertAllowed())
elog(ERROR, "cannot make new WAL entries during recovery"); elog(ERROR, "cannot make new WAL entries during recovery");
/*
* Calculate CRC of the data, including all the backup blocks
*
* Note that the record header isn't added into the CRC initially since we
* don't know the prev-link yet. Thus, the CRC will represent the CRC of
* the whole record in the order: rdata, then backup blocks, then record
* header.
*/
INIT_CRC32C(rdata_crc);
for (rdt = rdata->next; rdt != NULL; rdt = rdt->next)
COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
/*
* Calculate CRC of the header, except for prev-link, because we don't
* know it yet. It will be added later.
*/
COMP_CRC32C(rdata_crc, ((char *) rechdr), offsetof(XLogRecord, xl_prev));
/*---------- /*----------
* *
* We have now done all the preparatory work we can without holding a * We have now done all the preparatory work we can without holding a
@ -976,10 +960,11 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
if (inserted) if (inserted)
{ {
/* /*
* Now that xl_prev has been filled in, finish CRC calculation of the * Now that xl_prev has been filled in, calculate CRC of the record
* record header. * header.
*/ */
COMP_CRC32C(rdata_crc, ((char *) &rechdr->xl_prev), sizeof(XLogRecPtr)); rdata_crc = rechdr->xl_crc;
COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(rdata_crc); FIN_CRC32C(rdata_crc);
rechdr->xl_crc = rdata_crc; rechdr->xl_crc = rdata_crc;
@ -1053,34 +1038,47 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
#ifdef WAL_DEBUG #ifdef WAL_DEBUG
if (XLOG_DEBUG) if (XLOG_DEBUG)
{ {
static XLogReaderState *debug_reader = NULL;
StringInfoData buf; StringInfoData buf;
MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt); StringInfoData recordBuf;
char *errormsg = NULL;
MemoryContext oldCxt;
oldCxt = MemoryContextSwitchTo(walDebugCxt);
initStringInfo(&buf); initStringInfo(&buf);
appendStringInfo(&buf, "INSERT @ %X/%X: ", appendStringInfo(&buf, "INSERT @ %X/%X: ",
(uint32) (EndPos >> 32), (uint32) EndPos); (uint32) (EndPos >> 32), (uint32) EndPos);
xlog_outrec(&buf, rechdr);
if (rdata->data != NULL) /*
* We have to piece together the WAL record data from the XLogRecData
* entries, so that we can pass it to the rm_desc function as one
* contiguous chunk.
*/
initStringInfo(&recordBuf);
for (; rdata != NULL; rdata = rdata->next)
appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
if (!debug_reader)
debug_reader = XLogReaderAllocate(NULL, NULL);
if (!debug_reader ||
!DecodeXLogRecord(debug_reader, (XLogRecord *) recordBuf.data,
&errormsg))
{
appendStringInfo(&buf, "error decoding record: %s",
errormsg ? errormsg : "no error message");
}
else
{ {
StringInfoData recordbuf;
/*
* We have to piece together the WAL record data from the
* XLogRecData entries, so that we can pass it to the rm_desc
* function as one contiguous chunk.
*/
initStringInfo(&recordbuf);
appendBinaryStringInfo(&recordbuf, (char *) rechdr, sizeof(XLogRecord));
for (; rdata != NULL; rdata = rdata->next)
appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
appendStringInfoString(&buf, " - "); appendStringInfoString(&buf, " - ");
xlog_outdesc(&buf, rechdr->xl_rmid, (XLogRecord *) recordbuf.data); xlog_outdesc(&buf, debug_reader);
} }
elog(LOG, "%s", buf.data); elog(LOG, "%s", buf.data);
pfree(buf.data);
pfree(recordBuf.data);
MemoryContextSwitchTo(oldCxt); MemoryContextSwitchTo(oldCxt);
MemoryContextReset(walDebugCxt);
} }
#endif #endif
@ -1170,7 +1168,7 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
uint64 startbytepos; uint64 startbytepos;
uint64 endbytepos; uint64 endbytepos;
uint64 prevbytepos; uint64 prevbytepos;
uint32 size = SizeOfXLogRecord; uint32 size = MAXALIGN(SizeOfXLogRecord);
XLogRecPtr ptr; XLogRecPtr ptr;
uint32 segleft; uint32 segleft;
@ -1234,9 +1232,6 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
XLogRecPtr CurrPos; XLogRecPtr CurrPos;
XLogPageHeader pagehdr; XLogPageHeader pagehdr;
/* The first chunk is the record header */
Assert(rdata->len == SizeOfXLogRecord);
/* /*
* Get a pointer to the right place in the right WAL buffer to start * Get a pointer to the right place in the right WAL buffer to start
* inserting to. * inserting to.
@ -1309,9 +1304,6 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
} }
Assert(written == write_len); Assert(written == write_len);
/* Align the end position, so that the next record starts aligned */
CurrPos = MAXALIGN64(CurrPos);
/* /*
* If this was an xlog-switch, it's not enough to write the switch record, * If this was an xlog-switch, it's not enough to write the switch record,
* we also have to consume all the remaining space in the WAL segment. We * we also have to consume all the remaining space in the WAL segment. We
@ -1341,6 +1333,11 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
CurrPos += XLOG_BLCKSZ; CurrPos += XLOG_BLCKSZ;
} }
} }
else
{
/* Align the end position, so that the next record starts aligned */
CurrPos = MAXALIGN64(CurrPos);
}
if (CurrPos != EndPos) if (CurrPos != EndPos)
elog(PANIC, "space reserved for WAL record does not match what was written"); elog(PANIC, "space reserved for WAL record does not match what was written");
@ -4470,6 +4467,7 @@ BootStrapXLOG(void)
XLogPageHeader page; XLogPageHeader page;
XLogLongPageHeader longpage; XLogLongPageHeader longpage;
XLogRecord *record; XLogRecord *record;
char *recptr;
bool use_existent; bool use_existent;
uint64 sysidentifier; uint64 sysidentifier;
struct timeval tv; struct timeval tv;
@ -4541,17 +4539,23 @@ BootStrapXLOG(void)
longpage->xlp_xlog_blcksz = XLOG_BLCKSZ; longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
/* Insert the initial checkpoint record */ /* Insert the initial checkpoint record */
record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD); recptr = ((char *) page + SizeOfXLogLongPHD);
record = (XLogRecord *) recptr;
record->xl_prev = 0; record->xl_prev = 0;
record->xl_xid = InvalidTransactionId; record->xl_xid = InvalidTransactionId;
record->xl_tot_len = SizeOfXLogRecord + sizeof(checkPoint); record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
record->xl_len = sizeof(checkPoint);
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
record->xl_rmid = RM_XLOG_ID; record->xl_rmid = RM_XLOG_ID;
memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint)); recptr += SizeOfXLogRecord;
/* fill the XLogRecordDataHeaderShort struct */
*(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
*(recptr++) = sizeof(checkPoint);
memcpy(recptr, &checkPoint, sizeof(checkPoint));
recptr += sizeof(checkPoint);
Assert(recptr - (char *) record == record->xl_tot_len);
INIT_CRC32C(crc); INIT_CRC32C(crc);
COMP_CRC32C(crc, &checkPoint, sizeof(checkPoint)); COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc)); COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(crc); FIN_CRC32C(crc);
record->xl_crc = crc; record->xl_crc = crc;
@ -4984,36 +4988,37 @@ exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo)
* timestamps. * timestamps.
*/ */
static bool static bool
getRecordTimestamp(XLogRecord *record, TimestampTz *recordXtime) getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
{ {
uint8 record_info = record->xl_info & ~XLR_INFO_MASK; uint8 record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
uint8 rmid = XLogRecGetRmid(record);
if (record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT) if (rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
{ {
*recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time; *recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time;
return true; return true;
} }
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_COMPACT) if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_COMPACT)
{ {
*recordXtime = ((xl_xact_commit_compact *) XLogRecGetData(record))->xact_time; *recordXtime = ((xl_xact_commit_compact *) XLogRecGetData(record))->xact_time;
return true; return true;
} }
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT) if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
{ {
*recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time; *recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time;
return true; return true;
} }
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED) if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED)
{ {
*recordXtime = ((xl_xact_commit_prepared *) XLogRecGetData(record))->crec.xact_time; *recordXtime = ((xl_xact_commit_prepared *) XLogRecGetData(record))->crec.xact_time;
return true; return true;
} }
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT) if (rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
{ {
*recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time; *recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time;
return true; return true;
} }
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED) if (rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED)
{ {
*recordXtime = ((xl_xact_abort_prepared *) XLogRecGetData(record))->arec.xact_time; *recordXtime = ((xl_xact_abort_prepared *) XLogRecGetData(record))->arec.xact_time;
return true; return true;
@ -5030,7 +5035,7 @@ getRecordTimestamp(XLogRecord *record, TimestampTz *recordXtime)
* new timeline's history file. * new timeline's history file.
*/ */
static bool static bool
recoveryStopsBefore(XLogRecord *record) recoveryStopsBefore(XLogReaderState *record)
{ {
bool stopsHere = false; bool stopsHere = false;
uint8 record_info; uint8 record_info;
@ -5052,14 +5057,14 @@ recoveryStopsBefore(XLogRecord *record)
} }
/* Otherwise we only consider stopping before COMMIT or ABORT records. */ /* Otherwise we only consider stopping before COMMIT or ABORT records. */
if (record->xl_rmid != RM_XACT_ID) if (XLogRecGetRmid(record) != RM_XACT_ID)
return false; return false;
record_info = record->xl_info & ~XLR_INFO_MASK; record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (record_info == XLOG_XACT_COMMIT_COMPACT || record_info == XLOG_XACT_COMMIT) if (record_info == XLOG_XACT_COMMIT_COMPACT || record_info == XLOG_XACT_COMMIT)
{ {
isCommit = true; isCommit = true;
recordXid = record->xl_xid; recordXid = XLogRecGetXid(record);
} }
else if (record_info == XLOG_XACT_COMMIT_PREPARED) else if (record_info == XLOG_XACT_COMMIT_PREPARED)
{ {
@ -5069,7 +5074,7 @@ recoveryStopsBefore(XLogRecord *record)
else if (record_info == XLOG_XACT_ABORT) else if (record_info == XLOG_XACT_ABORT)
{ {
isCommit = false; isCommit = false;
recordXid = record->xl_xid; recordXid = XLogRecGetXid(record);
} }
else if (record_info == XLOG_XACT_ABORT_PREPARED) else if (record_info == XLOG_XACT_ABORT_PREPARED)
{ {
@ -5140,19 +5145,21 @@ recoveryStopsBefore(XLogRecord *record)
* record in XLogCtl->recoveryLastXTime. * record in XLogCtl->recoveryLastXTime.
*/ */
static bool static bool
recoveryStopsAfter(XLogRecord *record) recoveryStopsAfter(XLogReaderState *record)
{ {
uint8 record_info; uint8 record_info;
uint8 rmid;
TimestampTz recordXtime; TimestampTz recordXtime;
record_info = record->xl_info & ~XLR_INFO_MASK; record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
rmid = XLogRecGetRmid(record);
/* /*
* There can be many restore points that share the same name; we stop at * There can be many restore points that share the same name; we stop at
* the first one. * the first one.
*/ */
if (recoveryTarget == RECOVERY_TARGET_NAME && if (recoveryTarget == RECOVERY_TARGET_NAME &&
record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT) rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
{ {
xl_restore_point *recordRestorePointData; xl_restore_point *recordRestorePointData;
@ -5173,7 +5180,7 @@ recoveryStopsAfter(XLogRecord *record)
} }
} }
if (record->xl_rmid == RM_XACT_ID && if (rmid == RM_XACT_ID &&
(record_info == XLOG_XACT_COMMIT_COMPACT || (record_info == XLOG_XACT_COMMIT_COMPACT ||
record_info == XLOG_XACT_COMMIT || record_info == XLOG_XACT_COMMIT ||
record_info == XLOG_XACT_COMMIT_PREPARED || record_info == XLOG_XACT_COMMIT_PREPARED ||
@ -5192,7 +5199,7 @@ recoveryStopsAfter(XLogRecord *record)
else if (record_info == XLOG_XACT_ABORT_PREPARED) else if (record_info == XLOG_XACT_ABORT_PREPARED)
recordXid = ((xl_xact_abort_prepared *) XLogRecGetData(record))->xid; recordXid = ((xl_xact_abort_prepared *) XLogRecGetData(record))->xid;
else else
recordXid = record->xl_xid; recordXid = XLogRecGetXid(record);
/* /*
* There can be only one transaction end record with this exact * There can be only one transaction end record with this exact
@ -5307,7 +5314,7 @@ SetRecoveryPause(bool recoveryPause)
* usability. * usability.
*/ */
static bool static bool
recoveryApplyDelay(XLogRecord *record) recoveryApplyDelay(XLogReaderState *record)
{ {
uint8 record_info; uint8 record_info;
TimestampTz xtime; TimestampTz xtime;
@ -5326,8 +5333,8 @@ recoveryApplyDelay(XLogRecord *record)
* so there is already opportunity for issues caused by early conflicts on * so there is already opportunity for issues caused by early conflicts on
* standbys. * standbys.
*/ */
record_info = record->xl_info & ~XLR_INFO_MASK; record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (!(record->xl_rmid == RM_XACT_ID && if (!(XLogRecGetRmid(record) == RM_XACT_ID &&
(record_info == XLOG_XACT_COMMIT_COMPACT || (record_info == XLOG_XACT_COMMIT_COMPACT ||
record_info == XLOG_XACT_COMMIT || record_info == XLOG_XACT_COMMIT ||
record_info == XLOG_XACT_COMMIT_PREPARED))) record_info == XLOG_XACT_COMMIT_PREPARED)))
@ -5696,7 +5703,7 @@ StartupXLOG(void)
record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true); record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true);
if (record != NULL) if (record != NULL)
{ {
memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint)); memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN); wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
ereport(DEBUG1, ereport(DEBUG1,
(errmsg("checkpoint record is at %X/%X", (errmsg("checkpoint record is at %X/%X",
@ -5793,7 +5800,7 @@ StartupXLOG(void)
ereport(PANIC, ereport(PANIC,
(errmsg("could not locate a valid checkpoint record"))); (errmsg("could not locate a valid checkpoint record")));
} }
memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint)); memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN); wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
} }
@ -6230,9 +6237,9 @@ StartupXLOG(void)
appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ", appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
(uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr, (uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr,
(uint32) (EndRecPtr >> 32), (uint32) EndRecPtr); (uint32) (EndRecPtr >> 32), (uint32) EndRecPtr);
xlog_outrec(&buf, record); xlog_outrec(&buf, xlogreader);
appendStringInfoString(&buf, " - "); appendStringInfoString(&buf, " - ");
xlog_outdesc(&buf, record->xl_rmid, record); xlog_outdesc(&buf, xlogreader);
elog(LOG, "%s", buf.data); elog(LOG, "%s", buf.data);
pfree(buf.data); pfree(buf.data);
} }
@ -6260,7 +6267,7 @@ StartupXLOG(void)
/* /*
* Have we reached our recovery target? * Have we reached our recovery target?
*/ */
if (recoveryStopsBefore(record)) if (recoveryStopsBefore(xlogreader))
{ {
reachedStopPoint = true; /* see below */ reachedStopPoint = true; /* see below */
break; break;
@ -6270,7 +6277,7 @@ StartupXLOG(void)
* If we've been asked to lag the master, wait on latch until * If we've been asked to lag the master, wait on latch until
* enough time has passed. * enough time has passed.
*/ */
if (recoveryApplyDelay(record)) if (recoveryApplyDelay(xlogreader))
{ {
/* /*
* We test for paused recovery again here. If user sets * We test for paused recovery again here. If user sets
@ -6285,7 +6292,7 @@ StartupXLOG(void)
/* Setup error traceback support for ereport() */ /* Setup error traceback support for ereport() */
errcallback.callback = rm_redo_error_callback; errcallback.callback = rm_redo_error_callback;
errcallback.arg = (void *) record; errcallback.arg = (void *) xlogreader;
errcallback.previous = error_context_stack; errcallback.previous = error_context_stack;
error_context_stack = &errcallback; error_context_stack = &errcallback;
@ -6324,7 +6331,7 @@ StartupXLOG(void)
{ {
CheckPoint checkPoint; CheckPoint checkPoint;
memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint)); memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
newTLI = checkPoint.ThisTimeLineID; newTLI = checkPoint.ThisTimeLineID;
prevTLI = checkPoint.PrevTimeLineID; prevTLI = checkPoint.PrevTimeLineID;
} }
@ -6332,7 +6339,7 @@ StartupXLOG(void)
{ {
xl_end_of_recovery xlrec; xl_end_of_recovery xlrec;
memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery)); memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
newTLI = xlrec.ThisTimeLineID; newTLI = xlrec.ThisTimeLineID;
prevTLI = xlrec.PrevTimeLineID; prevTLI = xlrec.PrevTimeLineID;
} }
@ -6366,7 +6373,7 @@ StartupXLOG(void)
RecordKnownAssignedTransactionIds(record->xl_xid); RecordKnownAssignedTransactionIds(record->xl_xid);
/* Now apply the WAL record itself */ /* Now apply the WAL record itself */
RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record); RmgrTable[record->xl_rmid].rm_redo(xlogreader);
/* Pop the error context stack */ /* Pop the error context stack */
error_context_stack = errcallback.previous; error_context_stack = errcallback.previous;
@ -6394,7 +6401,7 @@ StartupXLOG(void)
WalSndWakeup(); WalSndWakeup();
/* Exit loop if we reached inclusive recovery target */ /* Exit loop if we reached inclusive recovery target */
if (recoveryStopsAfter(record)) if (recoveryStopsAfter(xlogreader))
{ {
reachedStopPoint = true; reachedStopPoint = true;
break; break;
@ -7148,8 +7155,7 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
} }
return NULL; return NULL;
} }
if (record->xl_len != sizeof(CheckPoint) || if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
record->xl_tot_len != SizeOfXLogRecord + sizeof(CheckPoint))
{ {
switch (whichChkpt) switch (whichChkpt)
{ {
@ -7194,6 +7200,9 @@ InitXLOGAccess(void)
(void) GetRedoRecPtr(); (void) GetRedoRecPtr();
/* Also update our copy of doPageWrites. */ /* Also update our copy of doPageWrites. */
doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites); doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites);
/* Also initialize the working areas for constructing WAL records */
InitXLogInsert();
} }
/* /*
@ -7490,7 +7499,6 @@ CreateCheckPoint(int flags)
CheckPoint checkPoint; CheckPoint checkPoint;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogCtlInsert *Insert = &XLogCtl->Insert; XLogCtlInsert *Insert = &XLogCtl->Insert;
XLogRecData rdata;
uint32 freespace; uint32 freespace;
XLogSegNo _logSegNo; XLogSegNo _logSegNo;
XLogRecPtr curInsert; XLogRecPtr curInsert;
@ -7760,15 +7768,11 @@ CreateCheckPoint(int flags)
/* /*
* Now insert the checkpoint record into XLOG. * Now insert the checkpoint record into XLOG.
*/ */
rdata.data = (char *) (&checkPoint); XLogBeginInsert();
rdata.len = sizeof(checkPoint); XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
recptr = XLogInsert(RM_XLOG_ID, recptr = XLogInsert(RM_XLOG_ID,
shutdown ? XLOG_CHECKPOINT_SHUTDOWN : shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_ONLINE);
&rdata);
XLogFlush(recptr); XLogFlush(recptr);
@ -7908,7 +7912,6 @@ static void
CreateEndOfRecoveryRecord(void) CreateEndOfRecoveryRecord(void)
{ {
xl_end_of_recovery xlrec; xl_end_of_recovery xlrec;
XLogRecData rdata;
XLogRecPtr recptr; XLogRecPtr recptr;
/* sanity check */ /* sanity check */
@ -7926,12 +7929,9 @@ CreateEndOfRecoveryRecord(void)
START_CRIT_SECTION(); START_CRIT_SECTION();
rdata.data = (char *) &xlrec; XLogBeginInsert();
rdata.len = sizeof(xl_end_of_recovery); XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
rdata.buffer = InvalidBuffer; recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
rdata.next = NULL;
recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
XLogFlush(recptr); XLogFlush(recptr);
@ -8307,13 +8307,9 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
void void
XLogPutNextOid(Oid nextOid) XLogPutNextOid(Oid nextOid)
{ {
XLogRecData rdata; XLogBeginInsert();
XLogRegisterData((char *) (&nextOid), sizeof(Oid));
rdata.data = (char *) (&nextOid); (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
rdata.len = sizeof(Oid);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
(void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata);
/* /*
* We need not flush the NEXTOID record immediately, because any of the * We need not flush the NEXTOID record immediately, because any of the
@ -8349,15 +8345,10 @@ XLogRecPtr
RequestXLogSwitch(void) RequestXLogSwitch(void)
{ {
XLogRecPtr RecPtr; XLogRecPtr RecPtr;
XLogRecData rdata;
/* XLOG SWITCH, alone among xlog record types, has no data */ /* XLOG SWITCH has no data */
rdata.buffer = InvalidBuffer; XLogBeginInsert();
rdata.data = NULL; RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
rdata.len = 0;
rdata.next = NULL;
RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH, &rdata);
return RecPtr; return RecPtr;
} }
@ -8369,18 +8360,15 @@ XLogRecPtr
XLogRestorePoint(const char *rpName) XLogRestorePoint(const char *rpName)
{ {
XLogRecPtr RecPtr; XLogRecPtr RecPtr;
XLogRecData rdata;
xl_restore_point xlrec; xl_restore_point xlrec;
xlrec.rp_time = GetCurrentTimestamp(); xlrec.rp_time = GetCurrentTimestamp();
strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN); strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
rdata.buffer = InvalidBuffer; XLogBeginInsert();
rdata.data = (char *) &xlrec; XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
rdata.len = sizeof(xl_restore_point);
rdata.next = NULL;
RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT, &rdata); RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
ereport(LOG, ereport(LOG,
(errmsg("restore point \"%s\" created at %X/%X", (errmsg("restore point \"%s\" created at %X/%X",
@ -8412,7 +8400,6 @@ XLogReportParameters(void)
*/ */
if (wal_level != ControlFile->wal_level || XLogIsNeeded()) if (wal_level != ControlFile->wal_level || XLogIsNeeded())
{ {
XLogRecData rdata;
xl_parameter_change xlrec; xl_parameter_change xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
@ -8423,12 +8410,10 @@ XLogReportParameters(void)
xlrec.wal_level = wal_level; xlrec.wal_level = wal_level;
xlrec.wal_log_hints = wal_log_hints; xlrec.wal_log_hints = wal_log_hints;
rdata.buffer = InvalidBuffer; XLogBeginInsert();
rdata.data = (char *) &xlrec; XLogRegisterData((char *) &xlrec, sizeof(xlrec));
rdata.len = sizeof(xlrec);
rdata.next = NULL;
recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE, &rdata); recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
XLogFlush(recptr); XLogFlush(recptr);
} }
@ -8486,14 +8471,10 @@ UpdateFullPageWrites(void)
*/ */
if (XLogStandbyInfoActive() && !RecoveryInProgress()) if (XLogStandbyInfoActive() && !RecoveryInProgress())
{ {
XLogRecData rdata; XLogBeginInsert();
XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
rdata.data = (char *) (&fullPageWrites); XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
rdata.len = sizeof(bool);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata);
} }
if (!fullPageWrites) if (!fullPageWrites)
@ -8558,12 +8539,13 @@ checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI)
* not all record types are related to control file updates. * not all record types are related to control file updates.
*/ */
void void
xlog_redo(XLogRecPtr lsn, XLogRecord *record) xlog_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
XLogRecPtr lsn = record->EndRecPtr;
/* Backup blocks are not used by XLOG rmgr */ /* in XLOG rmgr, backup blocks are only used by XLOG_FPI records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(!XLogRecHasAnyBlockRefs(record) || info == XLOG_FPI);
if (info == XLOG_NEXTOID) if (info == XLOG_NEXTOID)
{ {
@ -8750,14 +8732,12 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
} }
else if (info == XLOG_FPI) else if (info == XLOG_FPI)
{ {
char *data; Buffer buffer;
BkpBlock bkpb;
/* /*
* Full-page image (FPI) records contain a backup block stored * Full-page image (FPI) records contain nothing else but a backup
* "inline" in the normal data since the locking when writing hint * block. The block reference must include a full-page image -
* records isn't sufficient to use the normal backup block mechanism, * otherwise there would be no point in this record.
* which assumes exclusive lock on the buffer supplied.
* *
* Since the only change in these backup block are hint bits, there * Since the only change in these backup block are hint bits, there
* are no recovery conflicts generated. * are no recovery conflicts generated.
@ -8766,11 +8746,9 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
* smgr implementation has no need to implement anything. Which means * smgr implementation has no need to implement anything. Which means
* nothing is needed in md.c etc * nothing is needed in md.c etc
*/ */
data = XLogRecGetData(record); if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
memcpy(&bkpb, data, sizeof(BkpBlock)); elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
data += sizeof(BkpBlock); UnlockReleaseBuffer(buffer);
RestoreBackupBlockContents(lsn, bkpb, data, false, false);
} }
else if (info == XLOG_BACKUP_END) else if (info == XLOG_BACKUP_END)
{ {
@ -8867,22 +8845,42 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
#ifdef WAL_DEBUG #ifdef WAL_DEBUG
static void static void
xlog_outrec(StringInfo buf, XLogRecord *record) xlog_outrec(StringInfo buf, XLogReaderState *record)
{ {
int i; int block_id;
appendStringInfo(buf, "prev %X/%X; xid %u", appendStringInfo(buf, "prev %X/%X; xid %u",
(uint32) (record->xl_prev >> 32), (uint32) (XLogRecGetPrev(record) >> 32),
(uint32) record->xl_prev, (uint32) XLogRecGetPrev(record),
record->xl_xid); XLogRecGetXid(record));
appendStringInfo(buf, "; len %u", appendStringInfo(buf, "; len %u",
record->xl_len); XLogRecGetDataLen(record));
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) /* decode block references */
for (block_id = 0; block_id <= record->max_block_id; block_id++)
{ {
if (record->xl_info & XLR_BKP_BLOCK(i)) RelFileNode rnode;
appendStringInfo(buf, "; bkpb%d", i); ForkNumber forknum;
BlockNumber blk;
if (!XLogRecHasBlockRef(record, block_id))
continue;
XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
if (forknum != MAIN_FORKNUM)
appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, fork %u, blk %u",
block_id,
rnode.spcNode, rnode.dbNode, rnode.relNode,
forknum,
blk);
else
appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, blk %u",
block_id,
rnode.spcNode, rnode.dbNode, rnode.relNode,
blk);
if (XLogRecHasBlockImage(record, block_id))
appendStringInfo(buf, " FPW");
} }
} }
#endif /* WAL_DEBUG */ #endif /* WAL_DEBUG */
@ -8892,17 +8890,18 @@ xlog_outrec(StringInfo buf, XLogRecord *record)
* optionally followed by a colon, a space, and a further description. * optionally followed by a colon, a space, and a further description.
*/ */
static void static void
xlog_outdesc(StringInfo buf, RmgrId rmid, XLogRecord *record) xlog_outdesc(StringInfo buf, XLogReaderState *record)
{ {
RmgrId rmid = XLogRecGetRmid(record);
uint8 info = XLogRecGetInfo(record);
const char *id; const char *id;
appendStringInfoString(buf, RmgrTable[rmid].rm_name); appendStringInfoString(buf, RmgrTable[rmid].rm_name);
appendStringInfoChar(buf, '/'); appendStringInfoChar(buf, '/');
id = RmgrTable[rmid].rm_identify(record->xl_info); id = RmgrTable[rmid].rm_identify(info);
if (id == NULL) if (id == NULL)
appendStringInfo(buf, "UNKNOWN (%X): ", appendStringInfo(buf, "UNKNOWN (%X): ", info & ~XLR_INFO_MASK);
record->xl_info & ~XLR_INFO_MASK);
else else
appendStringInfo(buf, "%s: ", id); appendStringInfo(buf, "%s: ", id);
@ -9411,7 +9410,6 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
XLogRecPtr startpoint; XLogRecPtr startpoint;
XLogRecPtr stoppoint; XLogRecPtr stoppoint;
TimeLineID stoptli; TimeLineID stoptli;
XLogRecData rdata;
pg_time_t stamp_time; pg_time_t stamp_time;
char strfbuf[128]; char strfbuf[128];
char histfilepath[MAXPGPATH]; char histfilepath[MAXPGPATH];
@ -9618,11 +9616,9 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
/* /*
* Write the backup-end xlog record * Write the backup-end xlog record
*/ */
rdata.data = (char *) (&startpoint); XLogBeginInsert();
rdata.len = sizeof(startpoint); XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
rdata.buffer = InvalidBuffer; stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
rdata.next = NULL;
stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, &rdata);
stoptli = ThisTimeLineID; stoptli = ThisTimeLineID;
/* /*
@ -9930,15 +9926,13 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
static void static void
rm_redo_error_callback(void *arg) rm_redo_error_callback(void *arg)
{ {
XLogRecord *record = (XLogRecord *) arg; XLogReaderState *record = (XLogReaderState *) arg;
StringInfoData buf; StringInfoData buf;
initStringInfo(&buf); initStringInfo(&buf);
xlog_outdesc(&buf, record->xl_rmid, record); xlog_outdesc(&buf, record);
/* don't bother emitting empty description */ errcontext("xlog redo %s", buf.data);
if (buf.len > 0)
errcontext("xlog redo %s", buf.data);
pfree(buf.data); pfree(buf.data);
} }

File diff suppressed because it is too large Load Diff

View File

@ -37,6 +37,8 @@ report_invalid_record(XLogReaderState *state, const char *fmt,...)
the supplied arguments. */ the supplied arguments. */
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3))); __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
static void ResetDecoder(XLogReaderState *state);
/* size of the buffer allocated for error message. */ /* size of the buffer allocated for error message. */
#define MAX_ERRORMSG_LEN 1000 #define MAX_ERRORMSG_LEN 1000
@ -59,46 +61,33 @@ report_invalid_record(XLogReaderState *state, const char *fmt,...)
/* /*
* Allocate and initialize a new XLogReader. * Allocate and initialize a new XLogReader.
* *
* Returns NULL if the xlogreader couldn't be allocated. * The returned XLogReader is palloc'd. (In FRONTEND code, that means that
* running out-of-memory causes an immediate exit(1).
*/ */
XLogReaderState * XLogReaderState *
XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data) XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
{ {
XLogReaderState *state; XLogReaderState *state;
AssertArg(pagereadfunc != NULL); state = (XLogReaderState *) palloc0(sizeof(XLogReaderState));
state = (XLogReaderState *) malloc(sizeof(XLogReaderState)); state->max_block_id = -1;
if (!state)
return NULL;
MemSet(state, 0, sizeof(XLogReaderState));
/* /*
* Permanently allocate readBuf. We do it this way, rather than just * Permanently allocate readBuf. We do it this way, rather than just
* making a static array, for two reasons: (1) no need to waste the * making a static array, for two reasons: (1) no need to waste the
* storage in most instantiations of the backend; (2) a static char array * storage in most instantiations of the backend; (2) a static char array
* isn't guaranteed to have any particular alignment, whereas malloc() * isn't guaranteed to have any particular alignment, whereas palloc()
* will provide MAXALIGN'd storage. * will provide MAXALIGN'd storage.
*/ */
state->readBuf = (char *) malloc(XLOG_BLCKSZ); state->readBuf = (char *) palloc(XLOG_BLCKSZ);
if (!state->readBuf)
{
free(state);
return NULL;
}
state->read_page = pagereadfunc; state->read_page = pagereadfunc;
/* system_identifier initialized to zeroes above */ /* system_identifier initialized to zeroes above */
state->private_data = private_data; state->private_data = private_data;
/* ReadRecPtr and EndRecPtr initialized to zeroes above */ /* ReadRecPtr and EndRecPtr initialized to zeroes above */
/* readSegNo, readOff, readLen, readPageTLI initialized to zeroes above */ /* readSegNo, readOff, readLen, readPageTLI initialized to zeroes above */
state->errormsg_buf = malloc(MAX_ERRORMSG_LEN + 1); state->errormsg_buf = palloc(MAX_ERRORMSG_LEN + 1);
if (!state->errormsg_buf)
{
free(state->readBuf);
free(state);
return NULL;
}
state->errormsg_buf[0] = '\0'; state->errormsg_buf[0] = '\0';
/* /*
@ -107,9 +96,9 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
*/ */
if (!allocate_recordbuf(state, 0)) if (!allocate_recordbuf(state, 0))
{ {
free(state->errormsg_buf); pfree(state->errormsg_buf);
free(state->readBuf); pfree(state->readBuf);
free(state); pfree(state);
return NULL; return NULL;
} }
@ -119,11 +108,24 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
void void
XLogReaderFree(XLogReaderState *state) XLogReaderFree(XLogReaderState *state)
{ {
free(state->errormsg_buf); int block_id;
for (block_id = 0; block_id <= state->max_block_id; block_id++)
{
if (state->blocks[block_id].in_use)
{
if (state->blocks[block_id].data)
pfree(state->blocks[block_id].data);
}
}
if (state->main_data)
pfree(state->main_data);
pfree(state->errormsg_buf);
if (state->readRecordBuf) if (state->readRecordBuf)
free(state->readRecordBuf); pfree(state->readRecordBuf);
free(state->readBuf); pfree(state->readBuf);
free(state); pfree(state);
} }
/* /*
@ -146,14 +148,8 @@ allocate_recordbuf(XLogReaderState *state, uint32 reclength)
newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ)); newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ));
if (state->readRecordBuf) if (state->readRecordBuf)
free(state->readRecordBuf); pfree(state->readRecordBuf);
state->readRecordBuf = (char *) malloc(newSize); state->readRecordBuf = (char *) palloc(newSize);
if (!state->readRecordBuf)
{
state->readRecordBufSize = 0;
return false;
}
state->readRecordBufSize = newSize; state->readRecordBufSize = newSize;
return true; return true;
} }
@ -191,6 +187,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
*errormsg = NULL; *errormsg = NULL;
state->errormsg_buf[0] = '\0'; state->errormsg_buf[0] = '\0';
ResetDecoder(state);
if (RecPtr == InvalidXLogRecPtr) if (RecPtr == InvalidXLogRecPtr)
{ {
RecPtr = state->EndRecPtr; RecPtr = state->EndRecPtr;
@ -440,7 +438,10 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
state->EndRecPtr -= state->EndRecPtr % XLogSegSize; state->EndRecPtr -= state->EndRecPtr % XLogSegSize;
} }
return record; if (DecodeXLogRecord(state, record, errormsg))
return record;
else
return NULL;
err: err:
@ -579,30 +580,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
XLogRecPtr PrevRecPtr, XLogRecord *record, XLogRecPtr PrevRecPtr, XLogRecord *record,
bool randAccess) bool randAccess)
{ {
/* if (record->xl_tot_len < SizeOfXLogRecord)
* xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
* required.
*/
if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
{
if (record->xl_len != 0)
{
report_invalid_record(state,
"invalid xlog switch record at %X/%X",
(uint32) (RecPtr >> 32), (uint32) RecPtr);
return false;
}
}
else if (record->xl_len == 0)
{
report_invalid_record(state,
"record with zero length at %X/%X",
(uint32) (RecPtr >> 32), (uint32) RecPtr);
return false;
}
if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
{ {
report_invalid_record(state, report_invalid_record(state,
"invalid record length at %X/%X", "invalid record length at %X/%X",
@ -663,79 +641,17 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
* We assume all of the record (that is, xl_tot_len bytes) has been read * We assume all of the record (that is, xl_tot_len bytes) has been read
* into memory at *record. Also, ValidXLogRecordHeader() has accepted the * into memory at *record. Also, ValidXLogRecordHeader() has accepted the
* record's header, which means in particular that xl_tot_len is at least * record's header, which means in particular that xl_tot_len is at least
* SizeOfXlogRecord, so it is safe to fetch xl_len. * SizeOfXlogRecord.
*/ */
static bool static bool
ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr) ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
{ {
pg_crc32 crc; pg_crc32 crc;
int i;
uint32 len = record->xl_len;
BkpBlock bkpb;
char *blk;
size_t remaining = record->xl_tot_len;
/* First the rmgr data */ /* Calculate the CRC */
if (remaining < SizeOfXLogRecord + len)
{
/* ValidXLogRecordHeader() should've caught this already... */
report_invalid_record(state, "invalid record length at %X/%X",
(uint32) (recptr >> 32), (uint32) recptr);
return false;
}
remaining -= SizeOfXLogRecord + len;
INIT_CRC32C(crc); INIT_CRC32C(crc);
COMP_CRC32C(crc, XLogRecGetData(record), len); COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
/* include the record header last */
/* Add in the backup blocks, if any */
blk = (char *) XLogRecGetData(record) + len;
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
{
uint32 blen;
if (!(record->xl_info & XLR_BKP_BLOCK(i)))
continue;
if (remaining < sizeof(BkpBlock))
{
report_invalid_record(state,
"invalid backup block size in record at %X/%X",
(uint32) (recptr >> 32), (uint32) recptr);
return false;
}
memcpy(&bkpb, blk, sizeof(BkpBlock));
if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
{
report_invalid_record(state,
"incorrect hole size in record at %X/%X",
(uint32) (recptr >> 32), (uint32) recptr);
return false;
}
blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
if (remaining < blen)
{
report_invalid_record(state,
"invalid backup block size in record at %X/%X",
(uint32) (recptr >> 32), (uint32) recptr);
return false;
}
remaining -= blen;
COMP_CRC32C(crc, blk, blen);
blk += blen;
}
/* Check that xl_tot_len agrees with our calculation */
if (remaining != 0)
{
report_invalid_record(state,
"incorrect total length in record at %X/%X",
(uint32) (recptr >> 32), (uint32) recptr);
return false;
}
/* Finally include the record header */
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc)); COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(crc); FIN_CRC32C(crc);
@ -985,3 +901,321 @@ out:
} }
#endif /* FRONTEND */ #endif /* FRONTEND */
/* ----------------------------------------
* Functions for decoding the data and block references in a record.
* ----------------------------------------
*/
/* private function to reset the state between records */
static void
ResetDecoder(XLogReaderState *state)
{
int block_id;
state->decoded_record = NULL;
state->main_data_len = 0;
for (block_id = 0; block_id <= state->max_block_id; block_id++)
{
state->blocks[block_id].in_use = false;
state->blocks[block_id].has_image = false;
state->blocks[block_id].has_data = false;
}
state->max_block_id = -1;
}
/*
* Decode the previously read record.
*
* On error, a human-readable error message is returned in *errormsg, and
* the return value is false.
*/
bool
DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
{
/*
* read next _size bytes from record buffer, but check for overrun first.
*/
#define COPY_HEADER_FIELD(_dst, _size) \
do { \
if (remaining < _size) \
goto shortdata_err; \
memcpy(_dst, ptr, _size); \
ptr += _size; \
remaining -= _size; \
} while(0)
char *ptr;
uint32 remaining;
uint32 datatotal;
RelFileNode *rnode = NULL;
uint8 block_id;
ResetDecoder(state);
state->decoded_record = record;
ptr = (char *) record;
ptr += SizeOfXLogRecord;
remaining = record->xl_tot_len - SizeOfXLogRecord;
/* Decode the headers */
datatotal = 0;
while (remaining > datatotal)
{
COPY_HEADER_FIELD(&block_id, sizeof(uint8));
if (block_id == XLR_BLOCK_ID_DATA_SHORT)
{
/* XLogRecordDataHeaderShort */
uint8 main_data_len;
COPY_HEADER_FIELD(&main_data_len, sizeof(uint8));
state->main_data_len = main_data_len;
datatotal += main_data_len;
break; /* by convention, the main data fragment is
* always last */
}
else if (block_id == XLR_BLOCK_ID_DATA_LONG)
{
/* XLogRecordDataHeaderLong */
uint32 main_data_len;
COPY_HEADER_FIELD(&main_data_len, sizeof(uint32));
state->main_data_len = main_data_len;
datatotal += main_data_len;
break; /* by convention, the main data fragment is
* always last */
}
else if (block_id <= XLR_MAX_BLOCK_ID)
{
/* XLogRecordBlockHeader */
DecodedBkpBlock *blk;
uint8 fork_flags;
if (block_id <= state->max_block_id)
{
report_invalid_record(state,
"out-of-order block_id %u at %X/%X",
block_id,
(uint32) (state->ReadRecPtr >> 32),
(uint32) state->ReadRecPtr);
goto err;
}
state->max_block_id = block_id;
blk = &state->blocks[block_id];
blk->in_use = true;
COPY_HEADER_FIELD(&fork_flags, sizeof(uint8));
blk->forknum = fork_flags & BKPBLOCK_FORK_MASK;
blk->flags = fork_flags;
blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0);
blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0);
COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16));
/* cross-check that the HAS_DATA flag is set iff data_length > 0 */
if (blk->has_data && blk->data_len == 0)
report_invalid_record(state,
"BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
if (!blk->has_data && blk->data_len != 0)
report_invalid_record(state,
"BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
(unsigned int) blk->data_len,
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
datatotal += blk->data_len;
if (blk->has_image)
{
COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
datatotal += BLCKSZ - blk->hole_length;
}
if (!(fork_flags & BKPBLOCK_SAME_REL))
{
COPY_HEADER_FIELD(&blk->rnode, sizeof(RelFileNode));
rnode = &blk->rnode;
}
else
{
if (rnode == NULL)
{
report_invalid_record(state,
"BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
goto err;
}
blk->rnode = *rnode;
}
COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber));
}
else
{
report_invalid_record(state,
"invalid block_id %u at %X/%X",
block_id,
(uint32) (state->ReadRecPtr >> 32),
(uint32) state->ReadRecPtr);
goto err;
}
}
if (remaining != datatotal)
goto shortdata_err;
/*
* Ok, we've parsed the fragment headers, and verified that the total
* length of the payload in the fragments is equal to the amount of data
* left. Copy the data of each fragment to a separate buffer.
*
* We could just set up pointers into readRecordBuf, but we want to align
* the data for the convenience of the callers. Backup images are not
* copied, however; they don't need alignment.
*/
/* block data first */
for (block_id = 0; block_id <= state->max_block_id; block_id++)
{
DecodedBkpBlock *blk = &state->blocks[block_id];
if (!blk->in_use)
continue;
if (blk->has_image)
{
blk->bkp_image = ptr;
ptr += BLCKSZ - blk->hole_length;
}
if (blk->has_data)
{
if (!blk->data || blk->data_len > blk->data_bufsz)
{
if (blk->data)
pfree(blk->data);
blk->data_bufsz = blk->data_len;
blk->data = palloc(blk->data_bufsz);
}
memcpy(blk->data, ptr, blk->data_len);
ptr += blk->data_len;
}
}
/* and finally, the main data */
if (state->main_data_len > 0)
{
if (!state->main_data || state->main_data_len > state->main_data_bufsz)
{
if (state->main_data)
pfree(state->main_data);
state->main_data_bufsz = state->main_data_len;
state->main_data = palloc(state->main_data_bufsz);
}
memcpy(state->main_data, ptr, state->main_data_len);
ptr += state->main_data_len;
}
return true;
shortdata_err:
report_invalid_record(state,
"record with invalid length at %X/%X",
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
err:
*errormsg = state->errormsg_buf;
return false;
}
/*
* Returns information about the block that a block reference refers to.
*
* If the WAL record contains a block reference with the given ID, *rnode,
* *forknum, and *blknum are filled in (if not NULL), and returns TRUE.
* Otherwise returns FALSE.
*/
bool
XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
{
DecodedBkpBlock *bkpb;
if (!record->blocks[block_id].in_use)
return false;
bkpb = &record->blocks[block_id];
if (rnode)
*rnode = bkpb->rnode;
if (forknum)
*forknum = bkpb->forknum;
if (blknum)
*blknum = bkpb->blkno;
return true;
}
/*
* Returns the data associated with a block reference, or NULL if there is
* no data (e.g. because a full-page image was taken instead). The returned
* pointer points to a MAXALIGNed buffer.
*/
char *
XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
{
DecodedBkpBlock *bkpb;
if (!record->blocks[block_id].in_use)
return NULL;
bkpb = &record->blocks[block_id];
if (!bkpb->has_data)
{
if (len)
*len = 0;
return NULL;
}
else
{
if (len)
*len = bkpb->data_len;
return bkpb->data;
}
}
/*
* Restore a full-page image from a backup block attached to an XLOG record.
*
* Returns the buffer number containing the page.
*/
bool
RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
{
DecodedBkpBlock *bkpb;
if (!record->blocks[block_id].in_use)
return false;
if (!record->blocks[block_id].has_image)
return false;
bkpb = &record->blocks[block_id];
if (bkpb->hole_length == 0)
{
memcpy(page, bkpb->bkp_image, BLCKSZ);
}
else
{
memcpy(page, bkpb->bkp_image, bkpb->hole_offset);
/* must zero-fill the hole */
MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
bkpb->bkp_image + bkpb->hole_offset,
BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
}
return true;
}

View File

@ -253,9 +253,8 @@ XLogCheckInvalidPages(void)
* *
* 'lsn' is the LSN of the record being replayed. It is compared with the * 'lsn' is the LSN of the record being replayed. It is compared with the
* page's LSN to determine if the record has already been replayed. * page's LSN to determine if the record has already been replayed.
* 'rnode' and 'blkno' point to the block being replayed (main fork number * 'block_id' is the ID number the block was registered with, when the WAL
* is implied, use XLogReadBufferForRedoExtended for other forks). * record was created.
* 'block_index' identifies the backup block in the record for the page.
* *
* Returns one of the following: * Returns one of the following:
* *
@ -272,15 +271,36 @@ XLogCheckInvalidPages(void)
* single-process crash recovery, but some subroutines such as MarkBufferDirty * single-process crash recovery, but some subroutines such as MarkBufferDirty
* will complain if we don't have the lock. In hot standby mode it's * will complain if we don't have the lock. In hot standby mode it's
* definitely necessary.) * definitely necessary.)
*
* Note: when a backup block is available in XLOG, we restore it
* unconditionally, even if the page in the database appears newer. This is
* to protect ourselves against database pages that were partially or
* incorrectly written during a crash. We assume that the XLOG data must be
* good because it has passed a CRC check, while the database page might not
* be. This will force us to replay all subsequent modifications of the page
* that appear in XLOG, rather than possibly ignoring them as already
* applied, but that's not a huge drawback.
*/ */
XLogRedoAction XLogRedoAction
XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index, XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id,
RelFileNode rnode, BlockNumber blkno,
Buffer *buf) Buffer *buf)
{ {
return XLogReadBufferForRedoExtended(lsn, record, block_index, return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
rnode, MAIN_FORKNUM, blkno, false, buf);
RBM_NORMAL, false, buf); }
/*
* Pin and lock a buffer referenced by a WAL record, for the purpose of
* re-initializing it.
*/
Buffer
XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
{
Buffer buf;
XLogReadBufferForRedoExtended(record, block_id, RBM_ZERO_AND_LOCK, false,
&buf);
return buf;
} }
/* /*
@ -299,21 +319,54 @@ XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
* using LockBufferForCleanup(), instead of a regular exclusive lock. * using LockBufferForCleanup(), instead of a regular exclusive lock.
*/ */
XLogRedoAction XLogRedoAction
XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record, XLogReadBufferForRedoExtended(XLogReaderState *record,
int block_index, RelFileNode rnode, uint8 block_id,
ForkNumber forkno, BlockNumber blkno,
ReadBufferMode mode, bool get_cleanup_lock, ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf) Buffer *buf)
{ {
if (record->xl_info & XLR_BKP_BLOCK(block_index)) XLogRecPtr lsn = record->EndRecPtr;
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blkno;
Page page;
if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
{ {
*buf = RestoreBackupBlock(lsn, record, block_index, /* Caller specified a bogus block_id */
get_cleanup_lock, true); elog(PANIC, "failed to locate backup block with ID %d", block_id);
}
/* If it's a full-page image, restore it. */
if (XLogRecHasBlockImage(record, block_id))
{
*buf = XLogReadBufferExtended(rnode, forknum, blkno,
get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
page = BufferGetPage(*buf);
if (!RestoreBlockImage(record, block_id, page))
elog(ERROR, "failed to restore block image");
/*
* The page may be uninitialized. If so, we can't set the LSN because
* that would corrupt the page.
*/
if (!PageIsNew(page))
{
PageSetLSN(page, lsn);
}
MarkBufferDirty(*buf);
return BLK_RESTORED; return BLK_RESTORED;
} }
else else
{ {
*buf = XLogReadBufferExtended(rnode, forkno, blkno, mode); if ((record->blocks[block_id].flags & BKPBLOCK_WILL_INIT) != 0 &&
mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
{
elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
}
*buf = XLogReadBufferExtended(rnode, forknum, blkno, mode);
if (BufferIsValid(*buf)) if (BufferIsValid(*buf))
{ {
if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK) if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
@ -333,37 +386,6 @@ XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
} }
} }
/*
* XLogReadBuffer
* Read a page during XLOG replay.
*
* This is a shorthand of XLogReadBufferExtended() followed by
* LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), for reading from the main
* fork.
*
* (Getting the buffer lock is not really necessary during single-process
* crash recovery, but some subroutines such as MarkBufferDirty will complain
* if we don't have the lock. In hot standby mode it's definitely necessary.)
*
* The returned buffer is exclusively-locked.
*
* For historical reasons, instead of a ReadBufferMode argument, this only
* supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false)
* modes.
*/
Buffer
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
{
Buffer buf;
buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
init ? RBM_ZERO_AND_LOCK : RBM_NORMAL);
if (BufferIsValid(buf) && !init)
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
return buf;
}
/* /*
* XLogReadBufferExtended * XLogReadBufferExtended
* Read a page during XLOG replay * Read a page during XLOG replay
@ -383,6 +405,11 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
* In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
* exist, and we don't check for all-zeroes. Thus, no log entry is made * exist, and we don't check for all-zeroes. Thus, no log entry is made
* to imply that the page should be dropped or truncated later. * to imply that the page should be dropped or truncated later.
*
* NB: A redo function should normally not call this directly. To get a page
* to modify, use XLogReplayBuffer instead. It is important that all pages
* modified by a WAL record are registered in the WAL records, or they will be
* invisible to tools that that need to know which pages are modified.
*/ */
Buffer Buffer
XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
@ -473,124 +500,6 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
return buffer; return buffer;
} }
/*
* Restore a full-page image from a backup block attached to an XLOG record.
*
* lsn: LSN of the XLOG record being replayed
* record: the complete XLOG record
* block_index: which backup block to restore (0 .. XLR_MAX_BKP_BLOCKS - 1)
* get_cleanup_lock: TRUE to get a cleanup rather than plain exclusive lock
* keep_buffer: TRUE to return the buffer still locked and pinned
*
* Returns the buffer number containing the page. Note this is not terribly
* useful unless keep_buffer is specified as TRUE.
*
* Note: when a backup block is available in XLOG, we restore it
* unconditionally, even if the page in the database appears newer.
* This is to protect ourselves against database pages that were partially
* or incorrectly written during a crash. We assume that the XLOG data
* must be good because it has passed a CRC check, while the database
* page might not be. This will force us to replay all subsequent
* modifications of the page that appear in XLOG, rather than possibly
* ignoring them as already applied, but that's not a huge drawback.
*
* If 'get_cleanup_lock' is true, a cleanup lock is obtained on the buffer,
* else a normal exclusive lock is used. During crash recovery, that's just
* pro forma because there can't be any regular backends in the system, but
* in hot standby mode the distinction is important.
*
* If 'keep_buffer' is true, return without releasing the buffer lock and pin;
* then caller is responsible for doing UnlockReleaseBuffer() later. This
* is needed in some cases when replaying XLOG records that touch multiple
* pages, to prevent inconsistent states from being visible to other backends.
* (Again, that's only important in hot standby mode.)
*/
Buffer
RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
bool get_cleanup_lock, bool keep_buffer)
{
BkpBlock bkpb;
char *blk;
int i;
/* Locate requested BkpBlock in the record */
blk = (char *) XLogRecGetData(record) + record->xl_len;
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
{
if (!(record->xl_info & XLR_BKP_BLOCK(i)))
continue;
memcpy(&bkpb, blk, sizeof(BkpBlock));
blk += sizeof(BkpBlock);
if (i == block_index)
{
/* Found it, apply the update */
return RestoreBackupBlockContents(lsn, bkpb, blk, get_cleanup_lock,
keep_buffer);
}
blk += BLCKSZ - bkpb.hole_length;
}
/* Caller specified a bogus block_index */
elog(ERROR, "failed to restore block_index %d", block_index);
return InvalidBuffer; /* keep compiler quiet */
}
/*
* Workhorse for RestoreBackupBlock usable without an xlog record
*
* Restores a full-page image from BkpBlock and a data pointer.
*/
Buffer
RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
bool get_cleanup_lock, bool keep_buffer)
{
Buffer buffer;
Page page;
buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);
if (bkpb.hole_length == 0)
{
memcpy((char *) page, blk, BLCKSZ);
}
else
{
memcpy((char *) page, blk, bkpb.hole_offset);
/* must zero-fill the hole */
MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length);
memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
blk + bkpb.hole_offset,
BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
}
/*
* The checksum value on this page is currently invalid. We don't need to
* reset it here since it will be set before being written.
*/
/*
* The page may be uninitialized. If so, we can't set the LSN because that
* would corrupt the page.
*/
if (!PageIsNew(page))
{
PageSetLSN(page, lsn);
}
MarkBufferDirty(buffer);
if (!keep_buffer)
UnlockReleaseBuffer(buffer);
return buffer;
}
/* /*
* Struct actually returned by XLogFakeRelcacheEntry, though the declared * Struct actually returned by XLogFakeRelcacheEntry, though the declared
* return type is Relation. * return type is Relation.

View File

@ -125,7 +125,6 @@ void
log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum) log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
{ {
xl_smgr_create xlrec; xl_smgr_create xlrec;
XLogRecData rdata;
/* /*
* Make an XLOG entry reporting the file creation. * Make an XLOG entry reporting the file creation.
@ -133,12 +132,9 @@ log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
xlrec.rnode = *rnode; xlrec.rnode = *rnode;
xlrec.forkNum = forkNum; xlrec.forkNum = forkNum;
rdata.data = (char *) &xlrec; XLogBeginInsert();
rdata.len = sizeof(xlrec); XLogRegisterData((char *) &xlrec, sizeof(xlrec));
rdata.buffer = InvalidBuffer; XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE);
rdata.next = NULL;
XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata);
} }
/* /*
@ -268,18 +264,16 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
* Make an XLOG entry reporting the file truncation. * Make an XLOG entry reporting the file truncation.
*/ */
XLogRecPtr lsn; XLogRecPtr lsn;
XLogRecData rdata;
xl_smgr_truncate xlrec; xl_smgr_truncate xlrec;
xlrec.blkno = nblocks; xlrec.blkno = nblocks;
xlrec.rnode = rel->rd_node; xlrec.rnode = rel->rd_node;
rdata.data = (char *) &xlrec; XLogBeginInsert();
rdata.len = sizeof(xlrec); XLogRegisterData((char *) &xlrec, sizeof(xlrec));
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE, &rdata); lsn = XLogInsert(RM_SMGR_ID,
XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
/* /*
* Flush, because otherwise the truncation of the main relation might * Flush, because otherwise the truncation of the main relation might
@ -479,12 +473,13 @@ AtSubAbort_smgr(void)
} }
void void
smgr_redo(XLogRecPtr lsn, XLogRecord *record) smgr_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; XLogRecPtr lsn = record->EndRecPtr;
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in smgr records */ /* Backup blocks are not used in smgr records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_SMGR_CREATE) if (info == XLOG_SMGR_CREATE)
{ {
@ -505,8 +500,8 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
/* /*
* Forcibly create relation if it doesn't exist (which suggests that * Forcibly create relation if it doesn't exist (which suggests that
* it was dropped somewhere later in the WAL sequence). As in * it was dropped somewhere later in the WAL sequence). As in
* XLogReadBuffer, we prefer to recreate the rel and replay the log as * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
* best we can until the drop is seen. * log as best we can until the drop is seen.
*/ */
smgrcreate(reln, MAIN_FORKNUM, true); smgrcreate(reln, MAIN_FORKNUM, true);

View File

@ -619,19 +619,17 @@ createdb(const CreatedbStmt *stmt)
/* Record the filesystem change in XLOG */ /* Record the filesystem change in XLOG */
{ {
xl_dbase_create_rec xlrec; xl_dbase_create_rec xlrec;
XLogRecData rdata[1];
xlrec.db_id = dboid; xlrec.db_id = dboid;
xlrec.tablespace_id = dsttablespace; xlrec.tablespace_id = dsttablespace;
xlrec.src_db_id = src_dboid; xlrec.src_db_id = src_dboid;
xlrec.src_tablespace_id = srctablespace; xlrec.src_tablespace_id = srctablespace;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = sizeof(xl_dbase_create_rec); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
(void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata); (void) XLogInsert(RM_DBASE_ID,
XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
} }
} }
heap_endscan(scan); heap_endscan(scan);
@ -1226,19 +1224,17 @@ movedb(const char *dbname, const char *tblspcname)
*/ */
{ {
xl_dbase_create_rec xlrec; xl_dbase_create_rec xlrec;
XLogRecData rdata[1];
xlrec.db_id = db_id; xlrec.db_id = db_id;
xlrec.tablespace_id = dst_tblspcoid; xlrec.tablespace_id = dst_tblspcoid;
xlrec.src_db_id = db_id; xlrec.src_db_id = db_id;
xlrec.src_tablespace_id = src_tblspcoid; xlrec.src_tablespace_id = src_tblspcoid;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = sizeof(xl_dbase_create_rec); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
(void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata); (void) XLogInsert(RM_DBASE_ID,
XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
} }
/* /*
@ -1330,17 +1326,15 @@ movedb(const char *dbname, const char *tblspcname)
*/ */
{ {
xl_dbase_drop_rec xlrec; xl_dbase_drop_rec xlrec;
XLogRecData rdata[1];
xlrec.db_id = db_id; xlrec.db_id = db_id;
xlrec.tablespace_id = src_tblspcoid; xlrec.tablespace_id = src_tblspcoid;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = sizeof(xl_dbase_drop_rec); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
(void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata); (void) XLogInsert(RM_DBASE_ID,
XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
} }
/* Now it's safe to release the database lock */ /* Now it's safe to release the database lock */
@ -1870,17 +1864,15 @@ remove_dbtablespaces(Oid db_id)
/* Record the filesystem change in XLOG */ /* Record the filesystem change in XLOG */
{ {
xl_dbase_drop_rec xlrec; xl_dbase_drop_rec xlrec;
XLogRecData rdata[1];
xlrec.db_id = db_id; xlrec.db_id = db_id;
xlrec.tablespace_id = dsttablespace; xlrec.tablespace_id = dsttablespace;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = sizeof(xl_dbase_drop_rec); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
(void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata); (void) XLogInsert(RM_DBASE_ID,
XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
} }
pfree(dstpath); pfree(dstpath);
@ -2043,12 +2035,12 @@ get_database_name(Oid dbid)
* DATABASE resource manager's routines * DATABASE resource manager's routines
*/ */
void void
dbase_redo(XLogRecPtr lsn, XLogRecord *record) dbase_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in dbase records */ /* Backup blocks are not used in dbase records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_DBASE_CREATE) if (info == XLOG_DBASE_CREATE)
{ {

View File

@ -372,20 +372,16 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
{ {
xl_seq_rec xlrec; xl_seq_rec xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[2];
XLogBeginInsert();
XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
xlrec.node = rel->rd_node; xlrec.node = rel->rd_node;
rdata[0].data = (char *) &xlrec;
rdata[0].len = sizeof(xl_seq_rec);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) tuple->t_data; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
rdata[1].len = tuple->t_len; XLogRegisterData((char *) tuple->t_data, tuple->t_len);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -454,21 +450,17 @@ AlterSequence(AlterSeqStmt *stmt)
{ {
xl_seq_rec xlrec; xl_seq_rec xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[2];
Page page = BufferGetPage(buf); Page page = BufferGetPage(buf);
XLogBeginInsert();
XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
xlrec.node = seqrel->rd_node; xlrec.node = seqrel->rd_node;
rdata[0].data = (char *) &xlrec; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
rdata[0].len = sizeof(xl_seq_rec);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) seqtuple.t_data; XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
rdata[1].len = seqtuple.t_len;
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -706,7 +698,6 @@ nextval_internal(Oid relid)
{ {
xl_seq_rec xlrec; xl_seq_rec xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[2];
/* /*
* We don't log the current state of the tuple, but rather the state * We don't log the current state of the tuple, but rather the state
@ -714,6 +705,8 @@ nextval_internal(Oid relid)
* that many future WAL records, at the cost that we lose those * that many future WAL records, at the cost that we lose those
* sequence values if we crash. * sequence values if we crash.
*/ */
XLogBeginInsert();
XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
/* set values that will be saved in xlog */ /* set values that will be saved in xlog */
seq->last_value = next; seq->last_value = next;
@ -721,17 +714,11 @@ nextval_internal(Oid relid)
seq->log_cnt = 0; seq->log_cnt = 0;
xlrec.node = seqrel->rd_node; xlrec.node = seqrel->rd_node;
rdata[0].data = (char *) &xlrec;
rdata[0].len = sizeof(xl_seq_rec);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) seqtuple.t_data; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
rdata[1].len = seqtuple.t_len; XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -894,21 +881,16 @@ do_setval(Oid relid, int64 next, bool iscalled)
{ {
xl_seq_rec xlrec; xl_seq_rec xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData rdata[2];
Page page = BufferGetPage(buf); Page page = BufferGetPage(buf);
XLogBeginInsert();
XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
xlrec.node = seqrel->rd_node; xlrec.node = seqrel->rd_node;
rdata[0].data = (char *) &xlrec; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
rdata[0].len = sizeof(xl_seq_rec); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) seqtuple.t_data; recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
rdata[1].len = seqtuple.t_len;
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
} }
@ -1552,9 +1534,10 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
void void
seq_redo(XLogRecPtr lsn, XLogRecord *record) seq_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; XLogRecPtr lsn = record->EndRecPtr;
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
Buffer buffer; Buffer buffer;
Page page; Page page;
Page localpage; Page localpage;
@ -1563,14 +1546,10 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record); xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
sequence_magic *sm; sequence_magic *sm;
/* Backup blocks are not used in seq records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
if (info != XLOG_SEQ_LOG) if (info != XLOG_SEQ_LOG)
elog(PANIC, "seq_redo: unknown op code %u", info); elog(PANIC, "seq_redo: unknown op code %u", info);
buffer = XLogReadBuffer(xlrec->node, 0, true); buffer = XLogInitBufferForRedo(record, 0);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
/* /*
@ -1589,7 +1568,7 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
sm->magic = SEQ_MAGIC; sm->magic = SEQ_MAGIC;
item = (char *) xlrec + sizeof(xl_seq_rec); item = (char *) xlrec + sizeof(xl_seq_rec);
itemsz = record->xl_len - sizeof(xl_seq_rec); itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec);
if (PageAddItem(localpage, (Item) item, itemsz, if (PageAddItem(localpage, (Item) item, itemsz,
FirstOffsetNumber, false, false) == InvalidOffsetNumber) FirstOffsetNumber, false, false) == InvalidOffsetNumber)

View File

@ -354,20 +354,15 @@ CreateTableSpace(CreateTableSpaceStmt *stmt)
/* Record the filesystem change in XLOG */ /* Record the filesystem change in XLOG */
{ {
xl_tblspc_create_rec xlrec; xl_tblspc_create_rec xlrec;
XLogRecData rdata[2];
xlrec.ts_id = tablespaceoid; xlrec.ts_id = tablespaceoid;
rdata[0].data = (char *) &xlrec;
rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) location; XLogBeginInsert();
rdata[1].len = strlen(location) + 1; XLogRegisterData((char *) &xlrec,
rdata[1].buffer = InvalidBuffer; offsetof(xl_tblspc_create_rec, ts_path));
rdata[1].next = NULL; XLogRegisterData((char *) location, strlen(location) + 1);
(void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata); (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE);
} }
/* /*
@ -515,15 +510,13 @@ DropTableSpace(DropTableSpaceStmt *stmt)
/* Record the filesystem change in XLOG */ /* Record the filesystem change in XLOG */
{ {
xl_tblspc_drop_rec xlrec; xl_tblspc_drop_rec xlrec;
XLogRecData rdata[1];
xlrec.ts_id = tablespaceoid; xlrec.ts_id = tablespaceoid;
rdata[0].data = (char *) &xlrec;
rdata[0].len = sizeof(xl_tblspc_drop_rec);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
(void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata); XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec));
(void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP);
} }
/* /*
@ -1408,12 +1401,12 @@ get_tablespace_name(Oid spc_oid)
* TABLESPACE resource manager's routines * TABLESPACE resource manager's routines
*/ */
void void
tblspc_redo(XLogRecPtr lsn, XLogRecord *record) tblspc_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in tblspc records */ /* Backup blocks are not used in tblspc records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_TBLSPC_CREATE) if (info == XLOG_TBLSPC_CREATE)
{ {

View File

@ -31,7 +31,9 @@
#include "access/transam.h" #include "access/transam.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlog_internal.h" #include "access/xlog_internal.h"
#include "access/xlogutils.h"
#include "access/xlogreader.h" #include "access/xlogreader.h"
#include "access/xlogrecord.h"
#include "catalog/pg_control.h" #include "catalog/pg_control.h"
@ -46,8 +48,7 @@ typedef struct XLogRecordBuffer
{ {
XLogRecPtr origptr; XLogRecPtr origptr;
XLogRecPtr endptr; XLogRecPtr endptr;
XLogRecord record; XLogReaderState *record;
char *record_data;
} XLogRecordBuffer; } XLogRecordBuffer;
/* RMGR Handlers */ /* RMGR Handlers */
@ -79,17 +80,16 @@ static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tup);
* context. * context.
*/ */
void void
LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record) LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *record)
{ {
XLogRecordBuffer buf; XLogRecordBuffer buf;
buf.origptr = ctx->reader->ReadRecPtr; buf.origptr = ctx->reader->ReadRecPtr;
buf.endptr = ctx->reader->EndRecPtr; buf.endptr = ctx->reader->EndRecPtr;
buf.record = *record; buf.record = record;
buf.record_data = XLogRecGetData(record);
/* cast so we get a warning when new rmgrs are added */ /* cast so we get a warning when new rmgrs are added */
switch ((RmgrIds) buf.record.xl_rmid) switch ((RmgrIds) XLogRecGetRmid(record))
{ {
/* /*
* Rmgrs we care about for logical decoding. Add new rmgrs in * Rmgrs we care about for logical decoding. Add new rmgrs in
@ -135,7 +135,7 @@ LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
case RM_BRIN_ID: case RM_BRIN_ID:
break; break;
case RM_NEXT_ID: case RM_NEXT_ID:
elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid); elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) XLogRecGetRmid(buf.record));
} }
} }
@ -146,7 +146,7 @@ static void
DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
SnapBuild *builder = ctx->snapshot_builder; SnapBuild *builder = ctx->snapshot_builder;
uint8 info = buf->record.xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(buf->record) & ~XLR_INFO_MASK;
switch (info) switch (info)
{ {
@ -185,8 +185,8 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
SnapBuild *builder = ctx->snapshot_builder; SnapBuild *builder = ctx->snapshot_builder;
ReorderBuffer *reorder = ctx->reorder; ReorderBuffer *reorder = ctx->reorder;
XLogRecord *r = &buf->record; XLogReaderState *r = buf->record;
uint8 info = r->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
/* no point in doing anything yet, data could not be decoded anyway */ /* no point in doing anything yet, data could not be decoded anyway */
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT) if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
@ -200,12 +200,12 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
TransactionId *subxacts = NULL; TransactionId *subxacts = NULL;
SharedInvalidationMessage *invals = NULL; SharedInvalidationMessage *invals = NULL;
xlrec = (xl_xact_commit *) buf->record_data; xlrec = (xl_xact_commit *) XLogRecGetData(r);
subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]); invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
DecodeCommit(ctx, buf, r->xl_xid, xlrec->dbId, DecodeCommit(ctx, buf, XLogRecGetXid(r), xlrec->dbId,
xlrec->xact_time, xlrec->xact_time,
xlrec->nsubxacts, subxacts, xlrec->nsubxacts, subxacts,
xlrec->nmsgs, invals); xlrec->nmsgs, invals);
@ -220,7 +220,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
SharedInvalidationMessage *invals = NULL; SharedInvalidationMessage *invals = NULL;
/* Prepared commits contain a normal commit record... */ /* Prepared commits contain a normal commit record... */
prec = (xl_xact_commit_prepared *) buf->record_data; prec = (xl_xact_commit_prepared *) XLogRecGetData(r);
xlrec = &prec->crec; xlrec = &prec->crec;
subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
@ -237,9 +237,9 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
xl_xact_commit_compact *xlrec; xl_xact_commit_compact *xlrec;
xlrec = (xl_xact_commit_compact *) buf->record_data; xlrec = (xl_xact_commit_compact *) XLogRecGetData(r);
DecodeCommit(ctx, buf, r->xl_xid, InvalidOid, DecodeCommit(ctx, buf, XLogRecGetXid(r), InvalidOid,
xlrec->xact_time, xlrec->xact_time,
xlrec->nsubxacts, xlrec->subxacts, xlrec->nsubxacts, xlrec->subxacts,
0, NULL); 0, NULL);
@ -250,11 +250,11 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
xl_xact_abort *xlrec; xl_xact_abort *xlrec;
TransactionId *sub_xids; TransactionId *sub_xids;
xlrec = (xl_xact_abort *) buf->record_data; xlrec = (xl_xact_abort *) XLogRecGetData(r);
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
DecodeAbort(ctx, buf->origptr, r->xl_xid, DecodeAbort(ctx, buf->origptr, XLogRecGetXid(r),
sub_xids, xlrec->nsubxacts); sub_xids, xlrec->nsubxacts);
break; break;
} }
@ -265,7 +265,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
TransactionId *sub_xids; TransactionId *sub_xids;
/* prepared abort contain a normal commit abort... */ /* prepared abort contain a normal commit abort... */
prec = (xl_xact_abort_prepared *) buf->record_data; prec = (xl_xact_abort_prepared *) XLogRecGetData(r);
xlrec = &prec->arec; xlrec = &prec->arec;
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
@ -282,7 +282,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
int i; int i;
TransactionId *sub_xid; TransactionId *sub_xid;
xlrec = (xl_xact_assignment *) buf->record_data; xlrec = (xl_xact_assignment *) XLogRecGetData(r);
sub_xid = &xlrec->xsub[0]; sub_xid = &xlrec->xsub[0];
@ -316,14 +316,14 @@ static void
DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
SnapBuild *builder = ctx->snapshot_builder; SnapBuild *builder = ctx->snapshot_builder;
XLogRecord *r = &buf->record; XLogReaderState *r = buf->record;
uint8 info = r->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
switch (info) switch (info)
{ {
case XLOG_RUNNING_XACTS: case XLOG_RUNNING_XACTS:
{ {
xl_running_xacts *running = (xl_running_xacts *) buf->record_data; xl_running_xacts *running = (xl_running_xacts *) XLogRecGetData(r);
SnapBuildProcessRunningXacts(builder, buf->origptr, running); SnapBuildProcessRunningXacts(builder, buf->origptr, running);
@ -352,8 +352,8 @@ DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void static void
DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK; uint8 info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
TransactionId xid = buf->record.xl_xid; TransactionId xid = XLogRecGetXid(buf->record);
SnapBuild *builder = ctx->snapshot_builder; SnapBuild *builder = ctx->snapshot_builder;
/* no point in doing anything yet */ /* no point in doing anything yet */
@ -370,7 +370,7 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
xl_heap_new_cid *xlrec; xl_heap_new_cid *xlrec;
xlrec = (xl_heap_new_cid *) buf->record_data; xlrec = (xl_heap_new_cid *) XLogRecGetData(buf->record);
SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec); SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
break; break;
@ -405,8 +405,8 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void static void
DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK; uint8 info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
TransactionId xid = buf->record.xl_xid; TransactionId xid = XLogRecGetXid(buf->record);
SnapBuild *builder = ctx->snapshot_builder; SnapBuild *builder = ctx->snapshot_builder;
/* no point in doing anything yet */ /* no point in doing anything yet */
@ -576,34 +576,35 @@ DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
static void static void
DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
XLogRecord *r = &buf->record; XLogReaderState *r = buf->record;
xl_heap_insert *xlrec; xl_heap_insert *xlrec;
ReorderBufferChange *change; ReorderBufferChange *change;
RelFileNode target_node;
xlrec = (xl_heap_insert *) buf->record_data; xlrec = (xl_heap_insert *) XLogRecGetData(r);
/* only interested in our database */ /* only interested in our database */
if (xlrec->target.node.dbNode != ctx->slot->data.database) XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
if (target_node.dbNode != ctx->slot->data.database)
return; return;
change = ReorderBufferGetChange(ctx->reorder); change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT; change->action = REORDER_BUFFER_CHANGE_INSERT;
memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode)); memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE) if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
{ {
Assert(r->xl_len > (SizeOfHeapInsert + SizeOfHeapHeader)); Size tuplelen;
char *tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder); change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
DecodeXLogTuple((char *) xlrec + SizeOfHeapInsert, DecodeXLogTuple(tupledata, tuplelen, change->data.tp.newtuple);
r->xl_len - SizeOfHeapInsert,
change->data.tp.newtuple);
} }
change->data.tp.clear_toast_afterwards = true; change->data.tp.clear_toast_afterwards = true;
ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change); ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
} }
/* /*
@ -615,62 +616,47 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void static void
DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
XLogRecord *r = &buf->record; XLogReaderState *r = buf->record;
xl_heap_update *xlrec; xl_heap_update *xlrec;
xl_heap_header_len xlhdr;
ReorderBufferChange *change; ReorderBufferChange *change;
char *data; char *data;
Size datalen;
RelFileNode target_node;
xlrec = (xl_heap_update *) buf->record_data; xlrec = (xl_heap_update *) XLogRecGetData(r);
/* only interested in our database */ /* only interested in our database */
if (xlrec->target.node.dbNode != ctx->slot->data.database) XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
if (target_node.dbNode != ctx->slot->data.database)
return; return;
change = ReorderBufferGetChange(ctx->reorder); change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_UPDATE; change->action = REORDER_BUFFER_CHANGE_UPDATE;
memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode)); memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
/* caution, remaining data in record is not aligned */
data = buf->record_data + SizeOfHeapUpdate;
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE) if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
{ {
Assert(r->xl_len > (SizeOfHeapUpdate + SizeOfHeapHeaderLen)); data = XLogRecGetBlockData(r, 0, &datalen);
memcpy(&xlhdr, data, sizeof(xlhdr));
data += offsetof(xl_heap_header_len, header);
change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder); change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
DecodeXLogTuple(data, DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
xlhdr.t_len + SizeOfHeapHeader,
change->data.tp.newtuple);
/* skip over the rest of the tuple header */
data += SizeOfHeapHeader;
/* skip over the tuple data */
data += xlhdr.t_len;
} }
if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD) if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
{ {
memcpy(&xlhdr, data, sizeof(xlhdr)); /* caution, remaining data in record is not aligned */
data += offsetof(xl_heap_header_len, header); data = XLogRecGetData(r) + SizeOfHeapUpdate;
datalen = XLogRecGetDataLen(r) - SizeOfHeapUpdate;
change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder); change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
DecodeXLogTuple(data, DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
xlhdr.t_len + SizeOfHeapHeader,
change->data.tp.oldtuple);
#ifdef NOT_USED
data += SizeOfHeapHeader;
data += xlhdr.t_len;
#endif
} }
change->data.tp.clear_toast_afterwards = true; change->data.tp.clear_toast_afterwards = true;
ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change); ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
} }
/* /*
@ -681,36 +667,38 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void static void
DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
XLogRecord *r = &buf->record; XLogReaderState *r = buf->record;
xl_heap_delete *xlrec; xl_heap_delete *xlrec;
ReorderBufferChange *change; ReorderBufferChange *change;
RelFileNode target_node;
xlrec = (xl_heap_delete *) buf->record_data; xlrec = (xl_heap_delete *) XLogRecGetData(r);
/* only interested in our database */ /* only interested in our database */
if (xlrec->target.node.dbNode != ctx->slot->data.database) XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
if (target_node.dbNode != ctx->slot->data.database)
return; return;
change = ReorderBufferGetChange(ctx->reorder); change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_DELETE; change->action = REORDER_BUFFER_CHANGE_DELETE;
memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode)); memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
/* old primary key stored */ /* old primary key stored */
if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD) if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
{ {
Assert(r->xl_len > (SizeOfHeapDelete + SizeOfHeapHeader)); Assert(XLogRecGetDataLen(r) > (SizeOfHeapDelete + SizeOfHeapHeader));
change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder); change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
DecodeXLogTuple((char *) xlrec + SizeOfHeapDelete, DecodeXLogTuple((char *) xlrec + SizeOfHeapDelete,
r->xl_len - SizeOfHeapDelete, XLogRecGetDataLen(r) - SizeOfHeapDelete,
change->data.tp.oldtuple); change->data.tp.oldtuple);
} }
change->data.tp.clear_toast_afterwards = true; change->data.tp.clear_toast_afterwards = true;
ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change); ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
} }
/* /*
@ -721,27 +709,24 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void static void
DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{ {
XLogRecord *r = &buf->record; XLogReaderState *r = buf->record;
xl_heap_multi_insert *xlrec; xl_heap_multi_insert *xlrec;
int i; int i;
char *data; char *data;
bool isinit = (r->xl_info & XLOG_HEAP_INIT_PAGE) != 0; char *tupledata;
Size tuplelen;
RelFileNode rnode;
xlrec = (xl_heap_multi_insert *) buf->record_data; xlrec = (xl_heap_multi_insert *) XLogRecGetData(r);
/* only interested in our database */ /* only interested in our database */
if (xlrec->node.dbNode != ctx->slot->data.database) XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL);
if (rnode.dbNode != ctx->slot->data.database)
return; return;
data = buf->record_data + SizeOfHeapMultiInsert; tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
/*
* OffsetNumbers (which are not of interest to us) are stored when
* XLOG_HEAP_INIT_PAGE is not set -- skip over them.
*/
if (!isinit)
data += sizeof(OffsetNumber) * xlrec->ntuples;
data = tupledata;
for (i = 0; i < xlrec->ntuples; i++) for (i = 0; i < xlrec->ntuples; i++)
{ {
ReorderBufferChange *change; ReorderBufferChange *change;
@ -751,7 +736,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
change = ReorderBufferGetChange(ctx->reorder); change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT; change->action = REORDER_BUFFER_CHANGE_INSERT;
memcpy(&change->data.tp.relnode, &xlrec->node, sizeof(RelFileNode)); memcpy(&change->data.tp.relnode, &rnode, sizeof(RelFileNode));
/* /*
* CONTAINS_NEW_TUPLE will always be set currently as multi_insert * CONTAINS_NEW_TUPLE will always be set currently as multi_insert
@ -806,9 +791,10 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
else else
change->data.tp.clear_toast_afterwards = false; change->data.tp.clear_toast_afterwards = false;
ReorderBufferQueueChange(ctx->reorder, r->xl_xid, ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),
buf->origptr, change); buf->origptr, change);
} }
Assert(data == tupledata + tuplelen);
} }
/* /*

View File

@ -34,6 +34,7 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlog_internal.h"
#include "replication/decode.h" #include "replication/decode.h"
#include "replication/logical.h" #include "replication/logical.h"
@ -455,12 +456,12 @@ DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
record = XLogReadRecord(ctx->reader, startptr, &err); record = XLogReadRecord(ctx->reader, startptr, &err);
if (err) if (err)
elog(ERROR, "%s", err); elog(ERROR, "%s", err);
if (!record)
Assert(record); elog(ERROR, "no record found"); /* shouldn't happen */
startptr = InvalidXLogRecPtr; startptr = InvalidXLogRecPtr;
LogicalDecodingProcessRecord(ctx, record); LogicalDecodingProcessRecord(ctx, ctx->reader);
/* only continue till we found a consistent spot */ /* only continue till we found a consistent spot */
if (DecodingContextReady(ctx)) if (DecodingContextReady(ctx))

View File

@ -21,6 +21,8 @@
#include "funcapi.h" #include "funcapi.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "access/xlog_internal.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "nodes/makefuncs.h" #include "nodes/makefuncs.h"
@ -431,7 +433,7 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin
* store the description into our tuplestore. * store the description into our tuplestore.
*/ */
if (record != NULL) if (record != NULL)
LogicalDecodingProcessRecord(ctx, record); LogicalDecodingProcessRecord(ctx, ctx->reader);
/* check limits */ /* check limits */
if (upto_lsn != InvalidXLogRecPtr && if (upto_lsn != InvalidXLogRecPtr &&

View File

@ -54,6 +54,7 @@
#include "access/transam.h" #include "access/transam.h"
#include "access/tuptoaster.h" #include "access/tuptoaster.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlog_internal.h"
#include "catalog/catalog.h" #include "catalog/catalog.h"
#include "lib/binaryheap.h" #include "lib/binaryheap.h"
#include "miscadmin.h" #include "miscadmin.h"

View File

@ -699,7 +699,7 @@ SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn); ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn, ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
xlrec->target.node, xlrec->target.tid, xlrec->target_node, xlrec->target_tid,
xlrec->cmin, xlrec->cmax, xlrec->cmin, xlrec->cmax,
xlrec->combocid); xlrec->combocid);

View File

@ -2444,7 +2444,7 @@ XLogSendLogical(void)
if (record != NULL) if (record != NULL)
{ {
LogicalDecodingProcessRecord(logical_decoding_ctx, record); LogicalDecodingProcessRecord(logical_decoding_ctx, logical_decoding_ctx->reader);
sentPtr = logical_decoding_ctx->reader->EndRecPtr; sentPtr = logical_decoding_ctx->reader->EndRecPtr;
} }

View File

@ -759,12 +759,12 @@ StandbyReleaseOldLocks(int nxids, TransactionId *xids)
*/ */
void void
standby_redo(XLogRecPtr lsn, XLogRecord *record) standby_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in standby records */ /* Backup blocks are not used in standby records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(!XLogRecHasAnyBlockRefs(record));
/* Do nothing if we're not in hot standby mode */ /* Do nothing if we're not in hot standby mode */
if (standbyState == STANDBY_DISABLED) if (standbyState == STANDBY_DISABLED)
@ -928,8 +928,6 @@ static XLogRecPtr
LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
{ {
xl_running_xacts xlrec; xl_running_xacts xlrec;
XLogRecData rdata[2];
int lastrdata = 0;
XLogRecPtr recptr; XLogRecPtr recptr;
xlrec.xcnt = CurrRunningXacts->xcnt; xlrec.xcnt = CurrRunningXacts->xcnt;
@ -940,23 +938,15 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid; xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
/* Header */ /* Header */
rdata[0].data = (char *) (&xlrec); XLogBeginInsert();
rdata[0].len = MinSizeOfXactRunningXacts; XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
rdata[0].buffer = InvalidBuffer;
/* array of TransactionIds */ /* array of TransactionIds */
if (xlrec.xcnt > 0) if (xlrec.xcnt > 0)
{ XLogRegisterData((char *) CurrRunningXacts->xids,
rdata[0].next = &(rdata[1]); (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
rdata[1].data = (char *) CurrRunningXacts->xids;
rdata[1].len = (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
rdata[lastrdata].next = NULL; recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS, rdata);
if (CurrRunningXacts->subxid_overflow) if (CurrRunningXacts->subxid_overflow)
elog(trace_recovery(DEBUG2), elog(trace_recovery(DEBUG2),
@ -996,22 +986,15 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
static void static void
LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks) LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
{ {
XLogRecData rdata[2];
xl_standby_locks xlrec; xl_standby_locks xlrec;
xlrec.nlocks = nlocks; xlrec.nlocks = nlocks;
rdata[0].data = (char *) &xlrec; XLogBeginInsert();
rdata[0].len = offsetof(xl_standby_locks, locks); XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
rdata[0].buffer = InvalidBuffer; XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
rdata[0].next = &rdata[1];
rdata[1].data = (char *) locks; (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
rdata[1].len = nlocks * sizeof(xl_standby_lock);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
(void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK, rdata);
} }
/* /*

View File

@ -754,7 +754,6 @@ write_relmap_file(bool shared, RelMapFile *newmap,
if (write_wal) if (write_wal)
{ {
xl_relmap_update xlrec; xl_relmap_update xlrec;
XLogRecData rdata[2];
XLogRecPtr lsn; XLogRecPtr lsn;
/* now errors are fatal ... */ /* now errors are fatal ... */
@ -764,16 +763,11 @@ write_relmap_file(bool shared, RelMapFile *newmap,
xlrec.tsid = tsid; xlrec.tsid = tsid;
xlrec.nbytes = sizeof(RelMapFile); xlrec.nbytes = sizeof(RelMapFile);
rdata[0].data = (char *) (&xlrec); XLogBeginInsert();
rdata[0].len = MinSizeOfRelmapUpdate; XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate);
rdata[0].buffer = InvalidBuffer; XLogRegisterData((char *) newmap, sizeof(RelMapFile));
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) newmap;
rdata[1].len = sizeof(RelMapFile);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE, rdata); lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE);
/* As always, WAL must hit the disk before the data update does */ /* As always, WAL must hit the disk before the data update does */
XLogFlush(lsn); XLogFlush(lsn);
@ -907,12 +901,12 @@ perform_relmap_update(bool shared, const RelMapFile *updates)
* RELMAP resource manager's routines * RELMAP resource manager's routines
*/ */
void void
relmap_redo(XLogRecPtr lsn, XLogRecord *record) relmap_redo(XLogReaderState *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in relmap records */ /* Backup blocks are not used in relmap records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_RELMAP_UPDATE) if (info == XLOG_RELMAP_UPDATE)
{ {

View File

@ -1006,6 +1006,7 @@ WriteEmptyXLOG(void)
char path[MAXPGPATH]; char path[MAXPGPATH];
int fd; int fd;
int nbytes; int nbytes;
char *recptr;
/* Use malloc() to ensure buffer is MAXALIGNED */ /* Use malloc() to ensure buffer is MAXALIGNED */
buffer = (char *) pg_malloc(XLOG_BLCKSZ); buffer = (char *) pg_malloc(XLOG_BLCKSZ);
@ -1023,18 +1024,21 @@ WriteEmptyXLOG(void)
longpage->xlp_xlog_blcksz = XLOG_BLCKSZ; longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
/* Insert the initial checkpoint record */ /* Insert the initial checkpoint record */
record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD); recptr = (char *) page + SizeOfXLogLongPHD;
record = (XLogRecord *) recptr;
record->xl_prev = 0; record->xl_prev = 0;
record->xl_xid = InvalidTransactionId; record->xl_xid = InvalidTransactionId;
record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint); record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
record->xl_len = sizeof(CheckPoint);
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
record->xl_rmid = RM_XLOG_ID; record->xl_rmid = RM_XLOG_ID;
memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy, recptr += SizeOfXLogRecord;
*(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
*(recptr++) = sizeof(CheckPoint);
memcpy(recptr, &ControlFile.checkPointCopy,
sizeof(CheckPoint)); sizeof(CheckPoint));
INIT_CRC32C(crc); INIT_CRC32C(crc);
COMP_CRC32C(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint)); COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc)); COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(crc); FIN_CRC32C(crc);
record->xl_crc = crc; record->xl_crc = crc;

View File

@ -14,7 +14,7 @@
#ifndef BRIN_XLOG_H #ifndef BRIN_XLOG_H
#define BRIN_XLOG_H #define BRIN_XLOG_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/bufpage.h" #include "storage/bufpage.h"
#include "storage/itemptr.h" #include "storage/itemptr.h"
@ -42,59 +42,82 @@
*/ */
#define XLOG_BRIN_INIT_PAGE 0x80 #define XLOG_BRIN_INIT_PAGE 0x80
/* This is what we need to know about a BRIN index create */ /*
* This is what we need to know about a BRIN index create.
*
* Backup block 0: metapage
*/
typedef struct xl_brin_createidx typedef struct xl_brin_createidx
{ {
BlockNumber pagesPerRange; BlockNumber pagesPerRange;
RelFileNode node;
uint16 version; uint16 version;
} xl_brin_createidx; } xl_brin_createidx;
#define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16)) #define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
/* /*
* This is what we need to know about a BRIN tuple insert * This is what we need to know about a BRIN tuple insert
*
* Backup block 0: main page, block data is the new BrinTuple.
* Backup block 1: revmap page
*/ */
typedef struct xl_brin_insert typedef struct xl_brin_insert
{ {
RelFileNode node;
BlockNumber heapBlk; BlockNumber heapBlk;
/* extra information needed to update the revmap */ /* extra information needed to update the revmap */
BlockNumber revmapBlk;
BlockNumber pagesPerRange; BlockNumber pagesPerRange;
uint16 tuplen; /* offset number in the main page to insert the tuple to. */
ItemPointerData tid; OffsetNumber offnum;
/* tuple data follows at end of struct */
} xl_brin_insert; } xl_brin_insert;
#define SizeOfBrinInsert (offsetof(xl_brin_insert, tid) + sizeof(ItemPointerData)) #define SizeOfBrinInsert (offsetof(xl_brin_insert, offnum) + sizeof(OffsetNumber))
/* /*
* A cross-page update is the same as an insert, but also store the old tid. * A cross-page update is the same as an insert, but also stores information
* about the old tuple.
*
* Like in xlog_brin_update:
* Backup block 0: new page, block data includes the new BrinTuple.
* Backup block 1: revmap page
*
* And in addition:
* Backup block 2: old page
*/ */
typedef struct xl_brin_update typedef struct xl_brin_update
{ {
ItemPointerData oldtid; /* offset number of old tuple on old page */
OffsetNumber oldOffnum;
xl_brin_insert insert; xl_brin_insert insert;
} xl_brin_update; } xl_brin_update;
#define SizeOfBrinUpdate (offsetof(xl_brin_update, insert) + SizeOfBrinInsert) #define SizeOfBrinUpdate (offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
/* This is what we need to know about a BRIN tuple samepage update */ /*
* This is what we need to know about a BRIN tuple samepage update
*
* Backup block 0: updated page, with new BrinTuple as block data
*/
typedef struct xl_brin_samepage_update typedef struct xl_brin_samepage_update
{ {
RelFileNode node; OffsetNumber offnum;
ItemPointerData tid;
/* tuple data follows at end of struct */
} xl_brin_samepage_update; } xl_brin_samepage_update;
#define SizeOfBrinSamepageUpdate (offsetof(xl_brin_samepage_update, tid) + sizeof(ItemPointerData)) #define SizeOfBrinSamepageUpdate (sizeof(OffsetNumber))
/* This is what we need to know about a revmap extension */ /*
* This is what we need to know about a revmap extension
*
* Backup block 0: metapage
* Backup block 1: new revmap page
*/
typedef struct xl_brin_revmap_extend typedef struct xl_brin_revmap_extend
{ {
RelFileNode node; /*
* XXX: This is actually redundant - the block number is stored as part of
* backup block 1.
*/
BlockNumber targetBlk; BlockNumber targetBlk;
} xl_brin_revmap_extend; } xl_brin_revmap_extend;
@ -102,8 +125,8 @@ typedef struct xl_brin_revmap_extend
sizeof(BlockNumber)) sizeof(BlockNumber))
extern void brin_desc(StringInfo buf, XLogRecord *record); extern void brin_redo(XLogReaderState *record);
extern void brin_redo(XLogRecPtr lsn, XLogRecord *record); extern void brin_desc(StringInfo buf, XLogReaderState *record);
extern const char *brin_identify(uint8 info); extern const char *brin_identify(uint8 info);
#endif /* BRIN_XLOG_H */ #endif /* BRIN_XLOG_H */

View File

@ -11,7 +11,7 @@
#ifndef CLOG_H #ifndef CLOG_H
#define CLOG_H #define CLOG_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
/* /*
@ -48,8 +48,8 @@ extern void TruncateCLOG(TransactionId oldestXact);
#define CLOG_ZEROPAGE 0x00 #define CLOG_ZEROPAGE 0x00
#define CLOG_TRUNCATE 0x10 #define CLOG_TRUNCATE 0x10
extern void clog_redo(XLogRecPtr lsn, XLogRecord *record); extern void clog_redo(XLogReaderState *record);
extern void clog_desc(StringInfo buf, XLogRecord *record); extern void clog_desc(StringInfo buf, XLogReaderState *record);
extern const char *clog_identify(uint8 info); extern const char *clog_identify(uint8 info);
#endif /* CLOG_H */ #endif /* CLOG_H */

View File

@ -10,7 +10,7 @@
#ifndef GIN_H #ifndef GIN_H
#define GIN_H #define GIN_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/block.h" #include "storage/block.h"
#include "utils/relcache.h" #include "utils/relcache.h"
@ -74,8 +74,8 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
extern void ginUpdateStats(Relation index, const GinStatsData *stats); extern void ginUpdateStats(Relation index, const GinStatsData *stats);
/* ginxlog.c */ /* ginxlog.c */
extern void gin_redo(XLogRecPtr lsn, XLogRecord *record); extern void gin_redo(XLogReaderState *record);
extern void gin_desc(StringInfo buf, XLogRecord *record); extern void gin_desc(StringInfo buf, XLogReaderState *record);
extern const char *gin_identify(uint8 info); extern const char *gin_identify(uint8 info);
extern void gin_xlog_startup(void); extern void gin_xlog_startup(void);
extern void gin_xlog_cleanup(void); extern void gin_xlog_cleanup(void);

View File

@ -13,7 +13,6 @@
#include "access/genam.h" #include "access/genam.h"
#include "access/gin.h" #include "access/gin.h"
#include "access/itup.h" #include "access/itup.h"
#include "access/xloginsert.h"
#include "fmgr.h" #include "fmgr.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "utils/rbtree.h" #include "utils/rbtree.h"
@ -397,22 +396,22 @@ typedef struct
typedef struct ginxlogCreatePostingTree typedef struct ginxlogCreatePostingTree
{ {
RelFileNode node;
BlockNumber blkno;
uint32 size; uint32 size;
/* A compressed posting list follows */ /* A compressed posting list follows */
} ginxlogCreatePostingTree; } ginxlogCreatePostingTree;
#define XLOG_GIN_INSERT 0x20
/* /*
* The format of the insertion record varies depending on the page type. * The format of the insertion record varies depending on the page type.
* ginxlogInsert is the common part between all variants. * ginxlogInsert is the common part between all variants.
*
* Backup Blk 0: target page
* Backup Blk 1: left child, if this insertion finishes an incomplete split
*/ */
#define XLOG_GIN_INSERT 0x20
typedef struct typedef struct
{ {
RelFileNode node;
BlockNumber blkno;
uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */ uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
/* /*
@ -477,14 +476,17 @@ typedef struct
PostingItem newitem; PostingItem newitem;
} ginxlogInsertDataInternal; } ginxlogInsertDataInternal;
/*
* Backup Blk 0: new left page (= original page, if not root split)
* Backup Blk 1: new right page
* Backup Blk 2: original page / new root page, if root split
* Backup Blk 3: left child, if this insertion completes an earlier split
*/
#define XLOG_GIN_SPLIT 0x30 #define XLOG_GIN_SPLIT 0x30
typedef struct ginxlogSplit typedef struct ginxlogSplit
{ {
RelFileNode node; RelFileNode node;
BlockNumber lblkno;
BlockNumber rblkno;
BlockNumber rrlink; /* right link, or root's blocknumber if root BlockNumber rrlink; /* right link, or root's blocknumber if root
* split */ * split */
BlockNumber leftChildBlkno; /* valid on a non-leaf split */ BlockNumber leftChildBlkno; /* valid on a non-leaf split */
@ -538,15 +540,6 @@ typedef struct
*/ */
#define XLOG_GIN_VACUUM_PAGE 0x40 #define XLOG_GIN_VACUUM_PAGE 0x40
typedef struct ginxlogVacuumPage
{
RelFileNode node;
BlockNumber blkno;
uint16 hole_offset; /* number of bytes before "hole" */
uint16 hole_length; /* number of bytes in "hole" */
/* entire page contents (minus the hole) follow at end of record */
} ginxlogVacuumPage;
/* /*
* Vacuuming posting tree leaf page is WAL-logged like recompression caused * Vacuuming posting tree leaf page is WAL-logged like recompression caused
* by insertion. * by insertion.
@ -555,26 +548,28 @@ typedef struct ginxlogVacuumPage
typedef struct ginxlogVacuumDataLeafPage typedef struct ginxlogVacuumDataLeafPage
{ {
RelFileNode node;
BlockNumber blkno;
ginxlogRecompressDataLeaf data; ginxlogRecompressDataLeaf data;
} ginxlogVacuumDataLeafPage; } ginxlogVacuumDataLeafPage;
/*
* Backup Blk 0: deleted page
* Backup Blk 1: parent
* Backup Blk 2: left sibling
*/
#define XLOG_GIN_DELETE_PAGE 0x50 #define XLOG_GIN_DELETE_PAGE 0x50
typedef struct ginxlogDeletePage typedef struct ginxlogDeletePage
{ {
RelFileNode node;
BlockNumber blkno;
BlockNumber parentBlkno;
OffsetNumber parentOffset; OffsetNumber parentOffset;
BlockNumber leftBlkno;
BlockNumber rightLink; BlockNumber rightLink;
} ginxlogDeletePage; } ginxlogDeletePage;
#define XLOG_GIN_UPDATE_META_PAGE 0x60 #define XLOG_GIN_UPDATE_META_PAGE 0x60
/*
* Backup Blk 0: metapage
* Backup Blk 1: tail page
*/
typedef struct ginxlogUpdateMeta typedef struct ginxlogUpdateMeta
{ {
RelFileNode node; RelFileNode node;
@ -591,22 +586,29 @@ typedef struct ginxlogUpdateMeta
typedef struct ginxlogInsertListPage typedef struct ginxlogInsertListPage
{ {
RelFileNode node;
BlockNumber blkno;
BlockNumber rightlink; BlockNumber rightlink;
int32 ntuples; int32 ntuples;
/* array of inserted tuples follows */ /* array of inserted tuples follows */
} ginxlogInsertListPage; } ginxlogInsertListPage;
/*
* Backup Blk 0: metapage
* Backup Blk 1 to (ndeleted + 1): deleted pages
*/
#define XLOG_GIN_DELETE_LISTPAGE 0x80 #define XLOG_GIN_DELETE_LISTPAGE 0x80
#define GIN_NDELETE_AT_ONCE 16 /*
* The WAL record for deleting list pages must contain a block reference to
* all the deleted pages, so the number of pages that can be deleted in one
* record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
* metapage.)
*/
#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
typedef struct ginxlogDeleteListPages typedef struct ginxlogDeleteListPages
{ {
RelFileNode node;
GinMetaPageData metadata; GinMetaPageData metadata;
int32 ndeleted; int32 ndeleted;
BlockNumber toDelete[GIN_NDELETE_AT_ONCE];
} ginxlogDeleteListPages; } ginxlogDeleteListPages;
@ -673,7 +675,7 @@ typedef struct GinBtreeData
/* insert methods */ /* insert methods */
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber); OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, XLogRecData **, Page *, Page *); GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, Page *, Page *);
void *(*prepareDownlink) (GinBtree, Buffer); void *(*prepareDownlink) (GinBtree, Buffer);
void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page); void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);

View File

@ -16,7 +16,7 @@
#include "access/gist.h" #include "access/gist.h"
#include "access/itup.h" #include "access/itup.h"
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "fmgr.h" #include "fmgr.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/buffile.h" #include "storage/buffile.h"
@ -185,34 +185,33 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
#define XLOG_GIST_CREATE_INDEX 0x50 #define XLOG_GIST_CREATE_INDEX 0x50
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */ /* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
/*
* Backup Blk 0: updated page.
* Backup Blk 1: If this operation completes a page split, by inserting a
* downlink for the split page, the left half of the split
*/
typedef struct gistxlogPageUpdate typedef struct gistxlogPageUpdate
{ {
RelFileNode node;
BlockNumber blkno;
/*
* If this operation completes a page split, by inserting a downlink for
* the split page, leftchild points to the left half of the split.
*/
BlockNumber leftchild;
/* number of deleted offsets */ /* number of deleted offsets */
uint16 ntodelete; uint16 ntodelete;
uint16 ntoinsert;
/* /*
* follow: 1. todelete OffsetNumbers 2. tuples to insert * In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
*/ */
} gistxlogPageUpdate; } gistxlogPageUpdate;
/*
* Backup Blk 0: If this operation completes a page split, by inserting a
* downlink for the split page, the left half of the split
* Backup Blk 1 - npage: split pages (1 is the original page)
*/
typedef struct gistxlogPageSplit typedef struct gistxlogPageSplit
{ {
RelFileNode node;
BlockNumber origblkno; /* splitted page */
BlockNumber origrlink; /* rightlink of the page before split */ BlockNumber origrlink; /* rightlink of the page before split */
GistNSN orignsn; /* NSN of the page before split */ GistNSN orignsn; /* NSN of the page before split */
bool origleaf; /* was splitted page a leaf page? */ bool origleaf; /* was splitted page a leaf page? */
BlockNumber leftchild; /* like in gistxlogPageUpdate */
uint16 npage; /* # of pages in the split */ uint16 npage; /* # of pages in the split */
bool markfollowright; /* set F_FOLLOW_RIGHT flags */ bool markfollowright; /* set F_FOLLOW_RIGHT flags */
@ -451,8 +450,8 @@ extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate); int len, GISTSTATE *giststate);
/* gistxlog.c */ /* gistxlog.c */
extern void gist_redo(XLogRecPtr lsn, XLogRecord *record); extern void gist_redo(XLogReaderState *record);
extern void gist_desc(StringInfo buf, XLogRecord *record); extern void gist_desc(StringInfo buf, XLogReaderState *record);
extern const char *gist_identify(uint8 info); extern const char *gist_identify(uint8 info);
extern void gist_xlog_startup(void); extern void gist_xlog_startup(void);
extern void gist_xlog_cleanup(void); extern void gist_xlog_cleanup(void);

View File

@ -20,7 +20,7 @@
#include "access/genam.h" #include "access/genam.h"
#include "access/itup.h" #include "access/itup.h"
#include "access/sdir.h" #include "access/sdir.h"
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "fmgr.h" #include "fmgr.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
@ -356,8 +356,8 @@ extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value); extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
/* hash.c */ /* hash.c */
extern void hash_redo(XLogRecPtr lsn, XLogRecord *record); extern void hash_redo(XLogReaderState *record);
extern void hash_desc(StringInfo buf, XLogRecord *record); extern void hash_desc(StringInfo buf, XLogReaderState *record);
extern const char *hash_identify(uint8 info); extern const char *hash_identify(uint8 info);
#endif /* HASH_H */ #endif /* HASH_H */

View File

@ -15,7 +15,7 @@
#define HEAPAM_XLOG_H #define HEAPAM_XLOG_H
#include "access/htup.h" #include "access/htup.h"
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/buf.h" #include "storage/buf.h"
#include "storage/bufpage.h" #include "storage/bufpage.h"
@ -78,27 +78,11 @@
#define XLOG_HEAP_CONTAINS_OLD \ #define XLOG_HEAP_CONTAINS_OLD \
(XLOG_HEAP_CONTAINS_OLD_TUPLE | XLOG_HEAP_CONTAINS_OLD_KEY) (XLOG_HEAP_CONTAINS_OLD_TUPLE | XLOG_HEAP_CONTAINS_OLD_KEY)
/*
* All what we need to find changed tuple
*
* NB: on most machines, sizeof(xl_heaptid) will include some trailing pad
* bytes for alignment. We don't want to store the pad space in the XLOG,
* so use SizeOfHeapTid for space calculations. Similar comments apply for
* the other xl_FOO structs.
*/
typedef struct xl_heaptid
{
RelFileNode node;
ItemPointerData tid; /* changed tuple id */
} xl_heaptid;
#define SizeOfHeapTid (offsetof(xl_heaptid, tid) + SizeOfIptrData)
/* This is what we need to know about delete */ /* This is what we need to know about delete */
typedef struct xl_heap_delete typedef struct xl_heap_delete
{ {
xl_heaptid target; /* deleted tuple id */
TransactionId xmax; /* xmax of the deleted tuple */ TransactionId xmax; /* xmax of the deleted tuple */
OffsetNumber offnum; /* deleted tuple's offset */
uint8 infobits_set; /* infomask bits */ uint8 infobits_set; /* infomask bits */
uint8 flags; uint8 flags;
} xl_heap_delete; } xl_heap_delete;
@ -122,45 +106,33 @@ typedef struct xl_heap_header
#define SizeOfHeapHeader (offsetof(xl_heap_header, t_hoff) + sizeof(uint8)) #define SizeOfHeapHeader (offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
/*
* Variant of xl_heap_header that contains the length of the tuple, which is
* useful if the length of the tuple cannot be computed using the overall
* record length. E.g. because there are several tuples inside a single
* record.
*/
typedef struct xl_heap_header_len
{
uint16 t_len;
xl_heap_header header;
} xl_heap_header_len;
#define SizeOfHeapHeaderLen (offsetof(xl_heap_header_len, header) + SizeOfHeapHeader)
/* This is what we need to know about insert */ /* This is what we need to know about insert */
typedef struct xl_heap_insert typedef struct xl_heap_insert
{ {
xl_heaptid target; /* inserted tuple id */ OffsetNumber offnum; /* inserted tuple's offset */
uint8 flags; uint8 flags;
/* xl_heap_header & TUPLE DATA FOLLOWS AT END OF STRUCT */
/* xl_heap_header & TUPLE DATA in backup block 0 */
} xl_heap_insert; } xl_heap_insert;
#define SizeOfHeapInsert (offsetof(xl_heap_insert, flags) + sizeof(uint8)) #define SizeOfHeapInsert (offsetof(xl_heap_insert, flags) + sizeof(uint8))
/* /*
* This is what we need to know about a multi-insert. The record consists of * This is what we need to know about a multi-insert.
* xl_heap_multi_insert header, followed by a xl_multi_insert_tuple and tuple *
* data for each tuple. 'offsets' array is omitted if the whole page is * The main data of the record consists of this xl_heap_multi_insert header.
* reinitialized (XLOG_HEAP_INIT_PAGE) * 'offsets' array is omitted if the whole page is reinitialized
* (XLOG_HEAP_INIT_PAGE).
*
* In block 0's data portion, there is an xl_multi_insert_tuple struct,
* followed by the tuple data for each tuple. There is padding to align
* each xl_multi_insert struct.
*/ */
typedef struct xl_heap_multi_insert typedef struct xl_heap_multi_insert
{ {
RelFileNode node;
BlockNumber blkno;
uint8 flags; uint8 flags;
uint16 ntuples; uint16 ntuples;
OffsetNumber offsets[1]; OffsetNumber offsets[1];
/* TUPLE DATA (xl_multi_insert_tuples) FOLLOW AT END OF STRUCT */
} xl_heap_multi_insert; } xl_heap_multi_insert;
#define SizeOfHeapMultiInsert offsetof(xl_heap_multi_insert, offsets) #define SizeOfHeapMultiInsert offsetof(xl_heap_multi_insert, offsets)
@ -176,34 +148,39 @@ typedef struct xl_multi_insert_tuple
#define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8)) #define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))
/* This is what we need to know about update|hot_update */ /*
* This is what we need to know about update|hot_update
*
* Backup blk 0: new page
*
* If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are set,
* the prefix and/or suffix come first, as one or two uint16s.
*
* After that, xl_heap_header and new tuple data follow. The new tuple
* data doesn't include the prefix and suffix, which are copied from the
* old tuple on replay.
*
* If HEAP_CONTAINS_NEW_TUPLE_DATA flag is given, the tuple data is
* included even if a full-page image was taken.
*
* Backup blk 1: old page, if different. (no data, just a reference to the blk)
*/
typedef struct xl_heap_update typedef struct xl_heap_update
{ {
xl_heaptid target; /* deleted tuple id */
TransactionId old_xmax; /* xmax of the old tuple */ TransactionId old_xmax; /* xmax of the old tuple */
TransactionId new_xmax; /* xmax of the new tuple */ OffsetNumber old_offnum; /* old tuple's offset */
ItemPointerData newtid; /* new inserted tuple id */
uint8 old_infobits_set; /* infomask bits to set on old tuple */ uint8 old_infobits_set; /* infomask bits to set on old tuple */
uint8 flags; uint8 flags;
TransactionId new_xmax; /* xmax of the new tuple */
OffsetNumber new_offnum; /* new tuple's offset */
/* /*
* If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are
* set, the prefix and/or suffix come next, as one or two uint16s.
*
* After that, xl_heap_header_len and new tuple data follow. The new
* tuple data and length don't include the prefix and suffix, which are
* copied from the old tuple on replay. The new tuple data is omitted if
* a full-page image of the page was taken (unless the
* XLOG_HEAP_CONTAINS_NEW_TUPLE flag is set, in which case it's included
* anyway).
*
* If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are * If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are
* set, another xl_heap_header_len struct and tuple data for the old tuple * set, a xl_heap_header struct and tuple data for the old tuple follows.
* follows.
*/ */
} xl_heap_update; } xl_heap_update;
#define SizeOfHeapUpdate (offsetof(xl_heap_update, flags) + sizeof(uint8)) #define SizeOfHeapUpdate (offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber))
/* /*
* This is what we need to know about vacuum page cleanup/redirect * This is what we need to know about vacuum page cleanup/redirect
@ -218,12 +195,10 @@ typedef struct xl_heap_update
*/ */
typedef struct xl_heap_clean typedef struct xl_heap_clean
{ {
RelFileNode node;
BlockNumber block;
TransactionId latestRemovedXid; TransactionId latestRemovedXid;
uint16 nredirected; uint16 nredirected;
uint16 ndead; uint16 ndead;
/* OFFSET NUMBERS FOLLOW */ /* OFFSET NUMBERS are in the block reference 0 */
} xl_heap_clean; } xl_heap_clean;
#define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16)) #define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16))
@ -251,8 +226,8 @@ typedef struct xl_heap_cleanup_info
/* This is what we need to know about lock */ /* This is what we need to know about lock */
typedef struct xl_heap_lock typedef struct xl_heap_lock
{ {
xl_heaptid target; /* locked tuple id */
TransactionId locking_xid; /* might be a MultiXactId not xid */ TransactionId locking_xid; /* might be a MultiXactId not xid */
OffsetNumber offnum; /* locked tuple's offset on page */
int8 infobits_set; /* infomask and infomask2 bits to set */ int8 infobits_set; /* infomask and infomask2 bits to set */
} xl_heap_lock; } xl_heap_lock;
@ -261,8 +236,8 @@ typedef struct xl_heap_lock
/* This is what we need to know about locking an updated version of a row */ /* This is what we need to know about locking an updated version of a row */
typedef struct xl_heap_lock_updated typedef struct xl_heap_lock_updated
{ {
xl_heaptid target;
TransactionId xmax; TransactionId xmax;
OffsetNumber offnum;
uint8 infobits_set; uint8 infobits_set;
} xl_heap_lock_updated; } xl_heap_lock_updated;
@ -271,11 +246,11 @@ typedef struct xl_heap_lock_updated
/* This is what we need to know about in-place update */ /* This is what we need to know about in-place update */
typedef struct xl_heap_inplace typedef struct xl_heap_inplace
{ {
xl_heaptid target; /* updated tuple id */ OffsetNumber offnum; /* updated tuple's offset on page */
/* TUPLE DATA FOLLOWS AT END OF STRUCT */ /* TUPLE DATA FOLLOWS AT END OF STRUCT */
} xl_heap_inplace; } xl_heap_inplace;
#define SizeOfHeapInplace (offsetof(xl_heap_inplace, target) + SizeOfHeapTid) #define SizeOfHeapInplace (offsetof(xl_heap_inplace, offnum) + sizeof(OffsetNumber))
/* /*
* This struct represents a 'freeze plan', which is what we need to know about * This struct represents a 'freeze plan', which is what we need to know about
@ -296,23 +271,26 @@ typedef struct xl_heap_freeze_tuple
/* /*
* This is what we need to know about a block being frozen during vacuum * This is what we need to know about a block being frozen during vacuum
*
* Backup block 0's data contains an array of xl_heap_freeze_tuple structs,
* one for each tuple.
*/ */
typedef struct xl_heap_freeze_page typedef struct xl_heap_freeze_page
{ {
RelFileNode node;
BlockNumber block;
TransactionId cutoff_xid; TransactionId cutoff_xid;
uint16 ntuples; uint16 ntuples;
xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER];
} xl_heap_freeze_page; } xl_heap_freeze_page;
#define SizeOfHeapFreezePage offsetof(xl_heap_freeze_page, tuples) #define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, ntuples) + sizeof(uint16))
/* This is what we need to know about setting a visibility map bit */ /*
* This is what we need to know about setting a visibility map bit
*
* Backup blk 0: visibility map buffer
* Backup blk 1: heap buffer
*/
typedef struct xl_heap_visible typedef struct xl_heap_visible
{ {
RelFileNode node;
BlockNumber block;
TransactionId cutoff_xid; TransactionId cutoff_xid;
} xl_heap_visible; } xl_heap_visible;
@ -338,10 +316,11 @@ typedef struct xl_heap_new_cid
/* /*
* Store the relfilenode/ctid pair to facilitate lookups. * Store the relfilenode/ctid pair to facilitate lookups.
*/ */
xl_heaptid target; RelFileNode target_node;
ItemPointerData target_tid;
} xl_heap_new_cid; } xl_heap_new_cid;
#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target) + SizeOfHeapTid) #define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target_tid) + sizeof(ItemPointerData))
/* logical rewrite xlog record header */ /* logical rewrite xlog record header */
typedef struct xl_heap_rewrite_mapping typedef struct xl_heap_rewrite_mapping
@ -357,13 +336,13 @@ typedef struct xl_heap_rewrite_mapping
extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
TransactionId *latestRemovedXid); TransactionId *latestRemovedXid);
extern void heap_redo(XLogRecPtr lsn, XLogRecord *record); extern void heap_redo(XLogReaderState *record);
extern void heap_desc(StringInfo buf, XLogRecord *record); extern void heap_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap_identify(uint8 info); extern const char *heap_identify(uint8 info);
extern void heap2_redo(XLogRecPtr lsn, XLogRecord *record); extern void heap2_redo(XLogReaderState *record);
extern void heap2_desc(StringInfo buf, XLogRecord *record); extern void heap2_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap2_identify(uint8 info); extern const char *heap2_identify(uint8 info);
extern void heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r); extern void heap_xlog_logical_rewrite(XLogReaderState *r);
extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode, extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode,
TransactionId latestRemovedXid); TransactionId latestRemovedXid);

View File

@ -498,6 +498,7 @@ do { \
* you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page. * you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page.
*/ */
#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData))) #define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)))
#define MinHeapTupleSize MAXALIGN(offsetof(HeapTupleHeaderData, t_bits))
/* /*
* MaxHeapTuplesPerPage is an upper bound on the number of tuples that can * MaxHeapTuplesPerPage is an upper bound on the number of tuples that can

View File

@ -133,6 +133,7 @@ typedef IndexAttributeBitMapData *IndexAttributeBitMap;
* IndexTupleData struct. We arrive at the divisor because each tuple * IndexTupleData struct. We arrive at the divisor because each tuple
* must be maxaligned, and it must have an associated item pointer. * must be maxaligned, and it must have an associated item pointer.
*/ */
#define MinIndexTupleSize MAXALIGN(sizeof(IndexTupleData) + 1)
#define MaxIndexTuplesPerPage \ #define MaxIndexTuplesPerPage \
((int) ((BLCKSZ - SizeOfPageHeaderData) / \ ((int) ((BLCKSZ - SizeOfPageHeaderData) / \
(MAXALIGN(sizeof(IndexTupleData) + 1) + sizeof(ItemIdData)))) (MAXALIGN(sizeof(IndexTupleData) + 1) + sizeof(ItemIdData))))

View File

@ -11,7 +11,7 @@
#ifndef MULTIXACT_H #ifndef MULTIXACT_H
#define MULTIXACT_H #define MULTIXACT_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
@ -135,8 +135,8 @@ extern void multixact_twophase_postcommit(TransactionId xid, uint16 info,
extern void multixact_twophase_postabort(TransactionId xid, uint16 info, extern void multixact_twophase_postabort(TransactionId xid, uint16 info,
void *recdata, uint32 len); void *recdata, uint32 len);
extern void multixact_redo(XLogRecPtr lsn, XLogRecord *record); extern void multixact_redo(XLogReaderState *record);
extern void multixact_desc(StringInfo buf, XLogRecord *record); extern void multixact_desc(StringInfo buf, XLogReaderState *record);
extern const char *multixact_identify(uint8 info); extern const char *multixact_identify(uint8 info);
extern char *mxid_to_string(MultiXactId multi, int nmembers, extern char *mxid_to_string(MultiXactId multi, int nmembers,
MultiXactMember *members); MultiXactMember *members);

View File

@ -17,7 +17,7 @@
#include "access/genam.h" #include "access/genam.h"
#include "access/itup.h" #include "access/itup.h"
#include "access/sdir.h" #include "access/sdir.h"
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "catalog/pg_index.h" #include "catalog/pg_index.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
@ -227,15 +227,6 @@ typedef struct BTMetaPageData
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from #define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from
* FSM */ * FSM */
/*
* All that we need to find changed index tuple
*/
typedef struct xl_btreetid
{
RelFileNode node;
ItemPointerData tid; /* changed tuple id */
} xl_btreetid;
/* /*
* All that we need to regenerate the meta-data page * All that we need to regenerate the meta-data page
*/ */
@ -252,16 +243,17 @@ typedef struct xl_btree_metadata
* *
* This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META. * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
* Note that INSERT_META implies it's not a leaf page. * Note that INSERT_META implies it's not a leaf page.
*
* Backup Blk 0: original page (data contains the inserted tuple)
* Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
* Backup Blk 2: xl_btree_metadata, if INSERT_META
*/ */
typedef struct xl_btree_insert typedef struct xl_btree_insert
{ {
xl_btreetid target; /* inserted tuple id */ OffsetNumber offnum;
/* BlockNumber finishes_split field FOLLOWS IF NOT XLOG_BTREE_INSERT_LEAF */
/* xl_btree_metadata FOLLOWS IF XLOG_BTREE_INSERT_META */
/* INDEX TUPLE FOLLOWS AT END OF STRUCT */
} xl_btree_insert; } xl_btree_insert;
#define SizeOfBtreeInsert (offsetof(xl_btreetid, tid) + SizeOfIptrData) #define SizeOfBtreeInsert (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
/* /*
* On insert with split, we save all the items going into the right sibling * On insert with split, we save all the items going into the right sibling
@ -278,45 +270,41 @@ typedef struct xl_btree_insert
* the root page, and thus that a newroot record rather than an insert or * the root page, and thus that a newroot record rather than an insert or
* split record should follow. Note that a split record never carries a * split record should follow. Note that a split record never carries a
* metapage update --- we'll do that in the parent-level update. * metapage update --- we'll do that in the parent-level update.
*
* Backup Blk 0: original page / new left page
*
* The left page's data portion contains the new item, if it's the _L variant.
* (In the _R variants, the new item is one of the right page's tuples.)
* If level > 0, an IndexTuple representing the HIKEY of the left page
* follows. We don't need this on leaf pages, because it's the same as the
* leftmost key in the new right page.
*
* Backup Blk 1: new right page
*
* The right page's data portion contains the right page's tuples in the
* form used by _bt_restore_page.
*
* Backup Blk 2: next block (orig page's rightlink), if any
* Backup Blk 3: child's left sibling, if non-leaf split
*/ */
typedef struct xl_btree_split typedef struct xl_btree_split
{ {
RelFileNode node;
BlockNumber leftsib; /* orig page / new left page */
BlockNumber rightsib; /* new right page */
BlockNumber rnext; /* next block (orig page's rightlink) */
uint32 level; /* tree level of page being split */ uint32 level; /* tree level of page being split */
OffsetNumber firstright; /* first item moved to right page */ OffsetNumber firstright; /* first item moved to right page */
OffsetNumber newitemoff; /* new item's offset (if placed on left page) */
/*
* In the _L variants, next are OffsetNumber newitemoff and the new item.
* (In the _R variants, the new item is one of the right page's tuples.)
* The new item, but not newitemoff, is suppressed if XLogInsert chooses
* to store the left page's whole page image.
*
* If level > 0, an IndexTuple representing the HIKEY of the left page
* follows. We don't need this on leaf pages, because it's the same as
* the leftmost key in the new right page. Also, it's suppressed if
* XLogInsert chooses to store the left page's whole page image.
*
* If level > 0, BlockNumber of the page whose incomplete-split flag this
* insertion clears. (not aligned)
*
* Last are the right page's tuples in the form used by _bt_restore_page.
*/
} xl_btree_split; } xl_btree_split;
#define SizeOfBtreeSplit (offsetof(xl_btree_split, firstright) + sizeof(OffsetNumber)) #define SizeOfBtreeSplit (offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
/* /*
* This is what we need to know about delete of individual leaf index tuples. * This is what we need to know about delete of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a * The WAL record can represent deletion of any number of index tuples on a
* single index page when *not* executed by VACUUM. * single index page when *not* executed by VACUUM.
*
* Backup Blk 0: index page
*/ */
typedef struct xl_btree_delete typedef struct xl_btree_delete
{ {
RelFileNode node; /* RelFileNode of the index */
BlockNumber block;
RelFileNode hnode; /* RelFileNode of the heap the index currently RelFileNode hnode; /* RelFileNode of the heap the index currently
* points at */ * points at */
int nitems; int nitems;
@ -361,8 +349,6 @@ typedef struct xl_btree_reuse_page
*/ */
typedef struct xl_btree_vacuum typedef struct xl_btree_vacuum
{ {
RelFileNode node;
BlockNumber block;
BlockNumber lastBlockVacuumed; BlockNumber lastBlockVacuumed;
/* TARGET OFFSET NUMBERS FOLLOW */ /* TARGET OFFSET NUMBERS FOLLOW */
@ -376,10 +362,13 @@ typedef struct xl_btree_vacuum
* remove this tuple's downlink and the *following* tuple's key). Note that * remove this tuple's downlink and the *following* tuple's key). Note that
* the leaf page is empty, so we don't need to store its content --- it is * the leaf page is empty, so we don't need to store its content --- it is
* just reinitialized during recovery using the rest of the fields. * just reinitialized during recovery using the rest of the fields.
*
* Backup Blk 0: leaf block
* Backup Blk 1: top parent
*/ */
typedef struct xl_btree_mark_page_halfdead typedef struct xl_btree_mark_page_halfdead
{ {
xl_btreetid target; /* deleted tuple id in parent page */ OffsetNumber poffset; /* deleted tuple id in parent page */
/* information needed to recreate the leaf page: */ /* information needed to recreate the leaf page: */
BlockNumber leafblk; /* leaf block ultimately being deleted */ BlockNumber leafblk; /* leaf block ultimately being deleted */
@ -394,11 +383,15 @@ typedef struct xl_btree_mark_page_halfdead
* This is what we need to know about deletion of a btree page. Note we do * This is what we need to know about deletion of a btree page. Note we do
* not store any content for the deleted page --- it is just rewritten as empty * not store any content for the deleted page --- it is just rewritten as empty
* during recovery, apart from resetting the btpo.xact. * during recovery, apart from resetting the btpo.xact.
*
* Backup Blk 0: target block being deleted
* Backup Blk 1: target block's left sibling, if any
* Backup Blk 2: target block's right sibling
* Backup Blk 3: leaf block (if different from target)
* Backup Blk 4: metapage (if rightsib becomes new fast root)
*/ */
typedef struct xl_btree_unlink_page typedef struct xl_btree_unlink_page
{ {
RelFileNode node;
BlockNumber deadblk; /* target block being deleted */
BlockNumber leftsib; /* target block's left sibling, if any */ BlockNumber leftsib; /* target block's left sibling, if any */
BlockNumber rightsib; /* target block's right sibling */ BlockNumber rightsib; /* target block's right sibling */
@ -406,7 +399,6 @@ typedef struct xl_btree_unlink_page
* Information needed to recreate the leaf page, when target is an * Information needed to recreate the leaf page, when target is an
* internal page. * internal page.
*/ */
BlockNumber leafblk;
BlockNumber leafleftsib; BlockNumber leafleftsib;
BlockNumber leafrightsib; BlockNumber leafrightsib;
BlockNumber topparent; /* next child down in the branch */ BlockNumber topparent; /* next child down in the branch */
@ -423,13 +415,15 @@ typedef struct xl_btree_unlink_page
* *
* Note that although this implies rewriting the metadata page, we don't need * Note that although this implies rewriting the metadata page, we don't need
* an xl_btree_metadata record --- the rootblk and level are sufficient. * an xl_btree_metadata record --- the rootblk and level are sufficient.
*
* Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
* Backup Blk 1: left child (if splitting an old root)
* Backup Blk 2: metapage
*/ */
typedef struct xl_btree_newroot typedef struct xl_btree_newroot
{ {
RelFileNode node; BlockNumber rootblk; /* location of new root (redundant with blk 0) */
BlockNumber rootblk; /* location of new root */
uint32 level; /* its tree level */ uint32 level; /* its tree level */
/* 0 or 2 INDEX TUPLES FOLLOW AT END OF STRUCT */
} xl_btree_newroot; } xl_btree_newroot;
#define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, level) + sizeof(uint32)) #define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, level) + sizeof(uint32))
@ -726,8 +720,8 @@ extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
/* /*
* prototypes for functions in nbtxlog.c * prototypes for functions in nbtxlog.c
*/ */
extern void btree_redo(XLogRecPtr lsn, XLogRecord *record); extern void btree_redo(XLogReaderState *record);
extern void btree_desc(StringInfo buf, XLogRecord *record); extern void btree_desc(StringInfo buf, XLogReaderState *record);
extern const char *btree_identify(uint8 info); extern const char *btree_identify(uint8 info);
#endif /* NBTREE_H */ #endif /* NBTREE_H */

View File

@ -15,7 +15,7 @@
#define SPGIST_H #define SPGIST_H
#include "access/skey.h" #include "access/skey.h"
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "fmgr.h" #include "fmgr.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
@ -197,8 +197,8 @@ extern Datum spgbulkdelete(PG_FUNCTION_ARGS);
extern Datum spgvacuumcleanup(PG_FUNCTION_ARGS); extern Datum spgvacuumcleanup(PG_FUNCTION_ARGS);
/* spgxlog.c */ /* spgxlog.c */
extern void spg_redo(XLogRecPtr lsn, XLogRecord *record); extern void spg_redo(XLogReaderState *record);
extern void spg_desc(StringInfo buf, XLogRecord *record); extern void spg_desc(StringInfo buf, XLogReaderState *record);
extern const char *spg_identify(uint8 info); extern const char *spg_identify(uint8 info);
extern void spg_xlog_startup(void); extern void spg_xlog_startup(void);
extern void spg_xlog_cleanup(void); extern void spg_xlog_cleanup(void);

View File

@ -18,7 +18,6 @@
#include "access/spgist.h" #include "access/spgist.h"
#include "nodes/tidbitmap.h" #include "nodes/tidbitmap.h"
#include "storage/buf.h" #include "storage/buf.h"
#include "storage/relfilenode.h"
#include "utils/relcache.h" #include "utils/relcache.h"
@ -351,35 +350,8 @@ typedef SpGistDeadTupleData *SpGistDeadTuple;
/* /*
* XLOG stuff * XLOG stuff
*
* ACCEPT_RDATA_* can only use fixed-length rdata arrays, because of lengthof
*/ */
#define ACCEPT_RDATA_DATA(p, s, i) \
do { \
Assert((i) < lengthof(rdata)); \
rdata[i].data = (char *) (p); \
rdata[i].len = (s); \
rdata[i].buffer = InvalidBuffer; \
rdata[i].buffer_std = true; \
rdata[i].next = NULL; \
if ((i) > 0) \
rdata[(i) - 1].next = rdata + (i); \
} while(0)
#define ACCEPT_RDATA_BUFFER(b, i) \
do { \
Assert((i) < lengthof(rdata)); \
rdata[i].data = NULL; \
rdata[i].len = 0; \
rdata[i].buffer = (b); \
rdata[i].buffer_std = true; \
rdata[i].next = NULL; \
if ((i) > 0) \
rdata[(i) - 1].next = rdata + (i); \
} while(0)
/* XLOG record types for SPGiST */ /* XLOG record types for SPGiST */
#define XLOG_SPGIST_CREATE_INDEX 0x00 #define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10 #define XLOG_SPGIST_ADD_LEAF 0x10
@ -408,36 +380,36 @@ typedef struct spgxlogState
(d).isBuild = (s)->isBuild; \ (d).isBuild = (s)->isBuild; \
} while(0) } while(0)
/*
* Backup Blk 0: destination page for leaf tuple
* Backup Blk 1: parent page (if any)
*/
typedef struct spgxlogAddLeaf typedef struct spgxlogAddLeaf
{ {
RelFileNode node;
BlockNumber blknoLeaf; /* destination page for leaf tuple */
bool newPage; /* init dest page? */ bool newPage; /* init dest page? */
bool storesNulls; /* page is in the nulls tree? */ bool storesNulls; /* page is in the nulls tree? */
OffsetNumber offnumLeaf; /* offset where leaf tuple gets placed */ OffsetNumber offnumLeaf; /* offset where leaf tuple gets placed */
OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */ OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */
BlockNumber blknoParent; /* where the parent downlink is, if any */ OffsetNumber offnumParent; /* where the parent downlink is, if any */
OffsetNumber offnumParent;
uint16 nodeI; uint16 nodeI;
/* new leaf tuple follows (unaligned!) */ /* new leaf tuple follows (unaligned!) */
} spgxlogAddLeaf; } spgxlogAddLeaf;
/*
* Backup Blk 0: source leaf page
* Backup Blk 1: destination leaf page
* Backup Blk 2: parent page
*/
typedef struct spgxlogMoveLeafs typedef struct spgxlogMoveLeafs
{ {
RelFileNode node;
BlockNumber blknoSrc; /* source leaf page */
BlockNumber blknoDst; /* destination leaf page */
uint16 nMoves; /* number of tuples moved from source page */ uint16 nMoves; /* number of tuples moved from source page */
bool newPage; /* init dest page? */ bool newPage; /* init dest page? */
bool replaceDead; /* are we replacing a DEAD source tuple? */ bool replaceDead; /* are we replacing a DEAD source tuple? */
bool storesNulls; /* pages are in the nulls tree? */ bool storesNulls; /* pages are in the nulls tree? */
BlockNumber blknoParent; /* where the parent downlink is */ /* where the parent downlink is */
OffsetNumber offnumParent; OffsetNumber offnumParent;
uint16 nodeI; uint16 nodeI;
@ -452,11 +424,6 @@ typedef struct spgxlogMoveLeafs
* Note: if replaceDead is true then there is only one inserted tuple * Note: if replaceDead is true then there is only one inserted tuple
* number and only one leaf tuple in the data, because we are not copying * number and only one leaf tuple in the data, because we are not copying
* the dead tuple from the source * the dead tuple from the source
*
* Buffer references in the rdata array are:
* Src page
* Dest page
* Parent page
*---------- *----------
*/ */
OffsetNumber offsets[1]; OffsetNumber offsets[1];
@ -464,21 +431,43 @@ typedef struct spgxlogMoveLeafs
#define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets) #define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets)
/*
* Backup Blk 0: original page
* Backup Blk 1: where new tuple goes, if not same place
* Backup Blk 2: where parent downlink is, if updated and different from
* the old and new
*/
typedef struct spgxlogAddNode typedef struct spgxlogAddNode
{ {
RelFileNode node; /*
* Offset of the original inner tuple, in the original page (on backup
* block 0).
*/
OffsetNumber offnum;
BlockNumber blkno; /* block number of original inner tuple */ /*
OffsetNumber offnum; /* offset of original inner tuple */ * Offset of the new tuple, on the new page (on backup block 1). Invalid,
* if we overwrote the old tuple in the original page).
BlockNumber blknoParent; /* where parent downlink is, if updated */ */
OffsetNumber offnumParent;
uint16 nodeI;
BlockNumber blknoNew; /* where new tuple goes, if not same place */
OffsetNumber offnumNew; OffsetNumber offnumNew;
bool newPage; /* init new page? */ bool newPage; /* init new page? */
/*----
* Where is the parent downlink? parentBlk indicates which page it's on,
* and offnumParent is the offset within the page. The possible values for
* parentBlk are:
*
* 0: parent == original page
* 1: parent == new page
* 2: parent == different page (blk ref 2)
* -1: parent not updated
*----
*/
char parentBlk;
OffsetNumber offnumParent; /* offset within the parent page */
uint16 nodeI;
spgxlogState stateSrc; spgxlogState stateSrc;
/* /*
@ -486,41 +475,51 @@ typedef struct spgxlogAddNode
*/ */
} spgxlogAddNode; } spgxlogAddNode;
/*
* Backup Blk 0: where the prefix tuple goes
* Backup Blk 1: where the postfix tuple goes (if different page)
*/
typedef struct spgxlogSplitTuple typedef struct spgxlogSplitTuple
{ {
RelFileNode node; /* where the prefix tuple goes */
BlockNumber blknoPrefix; /* where the prefix tuple goes */
OffsetNumber offnumPrefix; OffsetNumber offnumPrefix;
BlockNumber blknoPostfix; /* where the postfix tuple goes */ /* where the postfix tuple goes */
OffsetNumber offnumPostfix; OffsetNumber offnumPostfix;
bool newPage; /* need to init that page? */ bool newPage; /* need to init that page? */
bool postfixBlkSame; /* was postfix tuple put on same page as
* prefix? */
/* /*
* new prefix inner tuple follows, then new postfix inner tuple * new prefix inner tuple follows, then new postfix inner tuple (both are
* (both are unaligned!) * unaligned!)
*/ */
} spgxlogSplitTuple; } spgxlogSplitTuple;
/*
* Buffer references in the rdata array are:
* Backup Blk 0: Src page (only if not root)
* Backup Blk 1: Dest page (if used)
* Backup Blk 2: Inner page
* Backup Blk 3: Parent page (if any, and different from Inner)
*/
typedef struct spgxlogPickSplit typedef struct spgxlogPickSplit
{ {
RelFileNode node; bool isRootSplit;
BlockNumber blknoSrc; /* original leaf page */
BlockNumber blknoDest; /* other leaf page, if any */
uint16 nDelete; /* n to delete from Src */ uint16 nDelete; /* n to delete from Src */
uint16 nInsert; /* n to insert on Src and/or Dest */ uint16 nInsert; /* n to insert on Src and/or Dest */
bool initSrc; /* re-init the Src page? */ bool initSrc; /* re-init the Src page? */
bool initDest; /* re-init the Dest page? */ bool initDest; /* re-init the Dest page? */
BlockNumber blknoInner; /* where to put new inner tuple */ /* where to put new inner tuple */
OffsetNumber offnumInner; OffsetNumber offnumInner;
bool initInner; /* re-init the Inner page? */ bool initInner; /* re-init the Inner page? */
bool storesNulls; /* pages are in the nulls tree? */ bool storesNulls; /* pages are in the nulls tree? */
BlockNumber blknoParent; /* where the parent downlink is, if any */ /* where the parent downlink is, if any */
bool innerIsParent; /* is parent the same as inner page? */
OffsetNumber offnumParent; OffsetNumber offnumParent;
uint16 nodeI; uint16 nodeI;
@ -533,24 +532,15 @@ typedef struct spgxlogPickSplit
* array of page selector bytes for inserted tuples, length nInsert * array of page selector bytes for inserted tuples, length nInsert
* new inner tuple (unaligned!) * new inner tuple (unaligned!)
* list of leaf tuples, length nInsert (unaligned!) * list of leaf tuples, length nInsert (unaligned!)
*
* Buffer references in the rdata array are:
* Src page (only if not root and not being init'd)
* Dest page (if used and not being init'd)
* Inner page (only if not being init'd)
* Parent page (if any; could be same as Inner)
*---------- *----------
*/ */
OffsetNumber offsets[1]; OffsetNumber offsets[1];
} spgxlogPickSplit; } spgxlogPickSplit;
#define SizeOfSpgxlogPickSplit offsetof(spgxlogPickSplit, offsets) #define SizeOfSpgxlogPickSplit offsetof(spgxlogPickSplit, offsets)
typedef struct spgxlogVacuumLeaf typedef struct spgxlogVacuumLeaf
{ {
RelFileNode node;
BlockNumber blkno; /* block number to clean */
uint16 nDead; /* number of tuples to become DEAD */ uint16 nDead; /* number of tuples to become DEAD */
uint16 nPlaceholder; /* number of tuples to become PLACEHOLDER */ uint16 nPlaceholder; /* number of tuples to become PLACEHOLDER */
uint16 nMove; /* number of tuples to move */ uint16 nMove; /* number of tuples to move */
@ -576,9 +566,6 @@ typedef struct spgxlogVacuumLeaf
typedef struct spgxlogVacuumRoot typedef struct spgxlogVacuumRoot
{ {
/* vacuum a root page when it is also a leaf */ /* vacuum a root page when it is also a leaf */
RelFileNode node;
BlockNumber blkno; /* block number to clean */
uint16 nDelete; /* number of tuples to delete */ uint16 nDelete; /* number of tuples to delete */
spgxlogState stateSrc; spgxlogState stateSrc;
@ -591,9 +578,6 @@ typedef struct spgxlogVacuumRoot
typedef struct spgxlogVacuumRedirect typedef struct spgxlogVacuumRedirect
{ {
RelFileNode node;
BlockNumber blkno; /* block number to clean */
uint16 nToPlaceholder; /* number of redirects to make placeholders */ uint16 nToPlaceholder; /* number of redirects to make placeholders */
OffsetNumber firstPlaceholder; /* first placeholder tuple to remove */ OffsetNumber firstPlaceholder; /* first placeholder tuple to remove */
TransactionId newestRedirectXid; /* newest XID of removed redirects */ TransactionId newestRedirectXid; /* newest XID of removed redirects */

View File

@ -14,7 +14,7 @@
#ifndef XACT_H #ifndef XACT_H
#define XACT_H #define XACT_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
@ -256,8 +256,8 @@ extern void UnregisterSubXactCallback(SubXactCallback callback, void *arg);
extern int xactGetCommittedChildren(TransactionId **ptr); extern int xactGetCommittedChildren(TransactionId **ptr);
extern void xact_redo(XLogRecPtr lsn, XLogRecord *record); extern void xact_redo(XLogReaderState *record);
extern void xact_desc(StringInfo buf, XLogRecord *record); extern void xact_desc(StringInfo buf, XLogReaderState *record);
extern const char *xact_identify(uint8 info); extern const char *xact_identify(uint8 info);
#endif /* XACT_H */ #endif /* XACT_H */

View File

@ -14,7 +14,7 @@
#include "access/rmgr.h" #include "access/rmgr.h"
#include "access/xlogdefs.h" #include "access/xlogdefs.h"
#include "access/xloginsert.h" #include "access/xloginsert.h"
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "datatype/timestamp.h" #include "datatype/timestamp.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
@ -186,7 +186,9 @@ typedef struct CheckpointStatsData
extern CheckpointStatsData CheckpointStats; extern CheckpointStatsData CheckpointStats;
extern XLogRecPtr XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn); struct XLogRecData;
extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn);
extern void XLogFlush(XLogRecPtr RecPtr); extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogBackgroundFlush(void); extern bool XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
@ -198,8 +200,8 @@ extern XLogSegNo XLogGetLastRemovedSegno(void);
extern void XLogSetAsyncXactLSN(XLogRecPtr record); extern void XLogSetAsyncXactLSN(XLogRecPtr record);
extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn); extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record); extern void xlog_redo(XLogReaderState *record);
extern void xlog_desc(StringInfo buf, XLogRecord *record); extern void xlog_desc(StringInfo buf, XLogReaderState *record);
extern const char *xlog_identify(uint8 info); extern const char *xlog_identify(uint8 info);
extern void issue_xlog_fsync(int fd, XLogSegNo segno); extern void issue_xlog_fsync(int fd, XLogSegNo segno);

View File

@ -20,7 +20,7 @@
#define XLOG_INTERNAL_H #define XLOG_INTERNAL_H
#include "access/xlogdefs.h" #include "access/xlogdefs.h"
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "datatype/timestamp.h" #include "datatype/timestamp.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "pgtime.h" #include "pgtime.h"
@ -31,7 +31,7 @@
/* /*
* Each page of XLOG file has a header like this: * Each page of XLOG file has a header like this:
*/ */
#define XLOG_PAGE_MAGIC 0xD080 /* can be used as WAL version indicator */ #define XLOG_PAGE_MAGIC 0xD081 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData typedef struct XLogPageHeaderData
{ {
@ -203,6 +203,17 @@ typedef struct xl_end_of_recovery
TimeLineID PrevTimeLineID; /* previous TLI we forked off from */ TimeLineID PrevTimeLineID; /* previous TLI we forked off from */
} xl_end_of_recovery; } xl_end_of_recovery;
/*
* The functions in xloginsert.c construct a chain of XLogRecData structs
* to represent the final WAL record.
*/
typedef struct XLogRecData
{
struct XLogRecData *next; /* next struct in chain, or NULL */
char *data; /* start of rmgr data to include */
uint32 len; /* length of rmgr data to include */
} XLogRecData;
/* /*
* Method table for resource managers. * Method table for resource managers.
* *
@ -219,8 +230,8 @@ typedef struct xl_end_of_recovery
typedef struct RmgrData typedef struct RmgrData
{ {
const char *rm_name; const char *rm_name;
void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr); void (*rm_redo) (XLogReaderState *record);
void (*rm_desc) (StringInfo buf, XLogRecord *rptr); void (*rm_desc) (StringInfo buf, XLogReaderState *record);
const char *(*rm_identify) (uint8 info); const char *(*rm_identify) (uint8 info);
void (*rm_startup) (void); void (*rm_startup) (void);
void (*rm_cleanup) (void); void (*rm_cleanup) (void);

View File

@ -18,49 +18,43 @@
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
/* /*
* The rmgr data to be written by XLogInsert() is defined by a chain of * The minimum size of the WAL construction working area. If you need to
* one or more XLogRecData structs. (Multiple structs would be used when * register more than XLR_NORMAL_MAX_BLOCK_ID block references or have more
* parts of the source data aren't physically adjacent in memory, or when * than XLR_NORMAL_RDATAS data chunks in a single WAL record, you must call
* multiple associated buffers need to be specified.) * XLogEnsureRecordSpace() first to allocate more working memory.
*
* If buffer is valid then XLOG will check if buffer must be backed up
* (ie, whether this is first change of that page since last checkpoint).
* If so, the whole page contents are attached to the XLOG record, and XLOG
* sets XLR_BKP_BLOCK(N) bit in xl_info. Note that the buffer must be pinned
* and exclusive-locked by the caller, so that it won't change under us.
* NB: when the buffer is backed up, we DO NOT insert the data pointed to by
* this XLogRecData struct into the XLOG record, since we assume it's present
* in the buffer. Therefore, rmgr redo routines MUST pay attention to
* XLR_BKP_BLOCK(N) to know what is actually stored in the XLOG record.
* The N'th XLR_BKP_BLOCK bit corresponds to the N'th distinct buffer
* value (ignoring InvalidBuffer) appearing in the rdata chain.
*
* When buffer is valid, caller must set buffer_std to indicate whether the
* page uses standard pd_lower/pd_upper header fields. If this is true, then
* XLOG is allowed to omit the free space between pd_lower and pd_upper from
* the backed-up page image. Note that even when buffer_std is false, the
* page MUST have an LSN field as its first eight bytes!
*
* Note: data can be NULL to indicate no rmgr data associated with this chain
* entry. This can be sensible (ie, not a wasted entry) if buffer is valid.
* The implication is that the buffer has been changed by the operation being
* logged, and so may need to be backed up, but the change can be redone using
* only information already present elsewhere in the XLOG entry.
*/ */
typedef struct XLogRecData #define XLR_NORMAL_MAX_BLOCK_ID 4
{ #define XLR_NORMAL_RDATAS 20
char *data; /* start of rmgr data to include */
uint32 len; /* length of rmgr data to include */ /* flags for XLogRegisterBuffer */
Buffer buffer; /* buffer associated with data, if any */ #define REGBUF_FORCE_IMAGE 0x01 /* force a full-page image */
bool buffer_std; /* buffer has standard pd_lower/pd_upper */ #define REGBUF_NO_IMAGE 0x02 /* don't take a full-page image */
struct XLogRecData *next; /* next struct in chain, or NULL */ #define REGBUF_WILL_INIT (0x04 | 0x02) /* page will be re-initialized at
} XLogRecData; * replay (implies NO_IMAGE) */
#define REGBUF_STANDARD 0x08 /* page follows "standard" page layout,
* (data between pd_lower and pd_upper
* will be skipped) */
#define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image
* is taken */
/* prototypes for public functions in xloginsert.c: */
extern void XLogBeginInsert(void);
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
extern void XLogRegisterData(char *data, int len);
extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
extern void XLogRegisterBlock(uint8 block_id, RelFileNode *rnode,
ForkNumber forknum, BlockNumber blknum, char *page,
uint8 flags);
extern void XLogRegisterBufData(uint8 block_id, char *data, int len);
extern void XLogResetInsertion(void);
extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
BlockNumber blk, char *page, bool page_std); BlockNumber blk, char *page, bool page_std);
extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std); extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std); extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
extern void InitXLogInsert(void);
#endif /* XLOGINSERT_H */ #endif /* XLOGINSERT_H */

View File

@ -14,12 +14,18 @@
* *
* The basic idea is to allocate an XLogReaderState via * The basic idea is to allocate an XLogReaderState via
* XLogReaderAllocate(), and call XLogReadRecord() until it returns NULL. * XLogReaderAllocate(), and call XLogReadRecord() until it returns NULL.
*
* After reading a record with XLogReadRecord(), it's decomposed into
* the per-block and main data parts, and the parts can be accessed
* with the XLogRec* macros and functions. You can also decode a
* record that's already constructed in memory, without reading from
* disk, by calling the DecodeXLogRecord() function.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef XLOGREADER_H #ifndef XLOGREADER_H
#define XLOGREADER_H #define XLOGREADER_H
#include "access/xlog_internal.h" #include "access/xlogrecord.h"
typedef struct XLogReaderState XLogReaderState; typedef struct XLogReaderState XLogReaderState;
@ -31,6 +37,32 @@ typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
char *readBuf, char *readBuf,
TimeLineID *pageTLI); TimeLineID *pageTLI);
typedef struct
{
/* Is this block ref in use? */
bool in_use;
/* Identify the block this refers to */
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blkno;
/* copy of the fork_flags field from the XLogRecordBlockHeader */
uint8 flags;
/* Information on full-page image, if any */
bool has_image;
char *bkp_image;
uint16 hole_offset;
uint16 hole_length;
/* Buffer holding the rmgr-specific data associated with this block */
bool has_data;
char *data;
uint16 data_len;
uint16 data_bufsz;
} DecodedBkpBlock;
struct XLogReaderState struct XLogReaderState
{ {
/* ---------------------------------------- /* ----------------------------------------
@ -79,6 +111,25 @@ struct XLogReaderState
XLogRecPtr ReadRecPtr; /* start of last record read */ XLogRecPtr ReadRecPtr; /* start of last record read */
XLogRecPtr EndRecPtr; /* end+1 of last record read */ XLogRecPtr EndRecPtr; /* end+1 of last record read */
/* ----------------------------------------
* Decoded representation of current record
*
* Use XLogRecGet* functions to investigate the record; these fields
* should not be accessed directly.
* ----------------------------------------
*/
XLogRecord *decoded_record; /* currently decoded record */
char *main_data; /* record's main data portion */
uint32 main_data_len; /* main data portion's length */
uint32 main_data_bufsz; /* allocated size of the buffer */
/* information about blocks referenced by the record. */
DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID + 1];
int max_block_id; /* highest block_id in use (-1 if none) */
/* ---------------------------------------- /* ----------------------------------------
* private/internal state * private/internal state
* ---------------------------------------- * ----------------------------------------
@ -123,4 +174,28 @@ extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr); extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
#endif /* FRONTEND */ #endif /* FRONTEND */
/* Functions for decoding an XLogRecord */
extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
char **errmsg);
#define XLogRecGetTotalLen(decoder) ((decoder)->decoded_record->xl_tot_len)
#define XLogRecGetPrev(decoder) ((decoder)->decoded_record->xl_prev)
#define XLogRecGetInfo(decoder) ((decoder)->decoded_record->xl_info)
#define XLogRecGetRmid(decoder) ((decoder)->decoded_record->xl_rmid)
#define XLogRecGetXid(decoder) ((decoder)->decoded_record->xl_xid)
#define XLogRecGetData(decoder) ((decoder)->main_data)
#define XLogRecGetDataLen(decoder) ((decoder)->main_data_len)
#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->max_block_id >= 0)
#define XLogRecHasBlockRef(decoder, block_id) \
((decoder)->blocks[block_id].in_use)
#define XLogRecHasBlockImage(decoder, block_id) \
((decoder)->blocks[block_id].has_image)
extern bool RestoreBlockImage(XLogReaderState *recoder, uint8 block_id, char *dst);
extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
RelFileNode *rnode, ForkNumber *forknum,
BlockNumber *blknum);
#endif /* XLOGREADER_H */ #endif /* XLOGREADER_H */

View File

@ -20,81 +20,161 @@
/* /*
* The overall layout of an XLOG record is: * The overall layout of an XLOG record is:
* Fixed-size header (XLogRecord struct) * Fixed-size header (XLogRecord struct)
* rmgr-specific data * XLogRecordBlockHeader struct
* BkpBlock * XLogRecordBlockHeader struct
* backup block data
* BkpBlock
* backup block data
* ... * ...
* XLogRecordDataHeader[Short|Long] struct
* block data
* block data
* ...
* main data
* *
* where there can be zero to four backup blocks (as signaled by xl_info flag * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
* bits). XLogRecord structs always start on MAXALIGN boundaries in the WAL * rmgr-specific data not associated with a block. XLogRecord structs
* files, and we round up SizeOfXLogRecord so that the rmgr data is also * always start on MAXALIGN boundaries in the WAL files, but the rest of
* guaranteed to begin on a MAXALIGN boundary. However, no padding is added * the fields are not aligned.
* to align BkpBlock structs or backup block data.
* *
* NOTE: xl_len counts only the rmgr data, not the XLogRecord header, * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
* and also not any backup blocks. xl_tot_len counts everything. Neither * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
* length field is rounded up to an alignment boundary. * used to distinguish between block references, and the main data structs.
*/ */
typedef struct XLogRecord typedef struct XLogRecord
{ {
uint32 xl_tot_len; /* total len of entire record */ uint32 xl_tot_len; /* total len of entire record */
TransactionId xl_xid; /* xact id */ TransactionId xl_xid; /* xact id */
uint32 xl_len; /* total len of rmgr data */ XLogRecPtr xl_prev; /* ptr to previous record in log */
uint8 xl_info; /* flag bits, see below */ uint8 xl_info; /* flag bits, see below */
RmgrId xl_rmid; /* resource manager for this record */ RmgrId xl_rmid; /* resource manager for this record */
/* 2 bytes of padding here, initialize to zero */ /* 2 bytes of padding here, initialize to zero */
XLogRecPtr xl_prev; /* ptr to previous record in log */
pg_crc32 xl_crc; /* CRC for this record */ pg_crc32 xl_crc; /* CRC for this record */
/* If MAXALIGN==8, there are 4 wasted bytes here */ /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
/* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
} XLogRecord; } XLogRecord;
#define SizeOfXLogRecord MAXALIGN(sizeof(XLogRecord)) #define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32))
#define XLogRecGetData(record) ((char*) (record) + SizeOfXLogRecord)
/* /*
* XLOG uses only low 4 bits of xl_info. High 4 bits may be used by rmgr. * The high 4 bits in xl_info may be used freely by rmgr. The
* XLR_SPECIAL_REL_UPDATE bit can be passed by XLogInsert caller. The rest
* are set internally by XLogInsert.
*/ */
#define XLR_INFO_MASK 0x0F #define XLR_INFO_MASK 0x0F
#define XLR_RMGR_INFO_MASK 0xF0
/* /*
* If we backed up any disk blocks with the XLOG record, we use flag bits in * If a WAL record modifies any relation files, in ways not covered by the
* xl_info to signal it. We support backup of up to 4 disk blocks per XLOG * usual block references, this flag is set. This is not used for anything
* record. * by PostgreSQL itself, but it allows external tools that read WAL and keep
* track of modified blocks to recognize such special record types.
*/ */
#define XLR_BKP_BLOCK_MASK 0x0F /* all info bits used for bkp blocks */ #define XLR_SPECIAL_REL_UPDATE 0x01
#define XLR_MAX_BKP_BLOCKS 4
#define XLR_BKP_BLOCK(iblk) (0x08 >> (iblk)) /* iblk in 0..3 */
/* /*
* Header info for a backup block appended to an XLOG record. * Header info for block data appended to an XLOG record.
*
* Note that we don't attempt to align the XLogRecordBlockHeader struct!
* So, the struct must be copied to aligned local storage before use.
* 'data_length' is the length of the payload data associated with this,
* and includes the possible full-page image, and rmgr-specific data. It
* does not include the XLogRecordBlockHeader struct itself.
*/
typedef struct XLogRecordBlockHeader
{
uint8 id; /* block reference ID */
uint8 fork_flags; /* fork within the relation, and flags */
uint16 data_length; /* number of payload bytes (not including page
* image) */
/* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */
/* If !BKPBLOCK_SAME_REL is not set, a RelFileNode follows */
/* BlockNumber follows */
} XLogRecordBlockHeader;
#define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16))
/*
* Additional header information when a full-page image is included
* (i.e. when BKPBLOCK_HAS_IMAGE is set).
* *
* As a trivial form of data compression, the XLOG code is aware that * As a trivial form of data compression, the XLOG code is aware that
* PG data pages usually contain an unused "hole" in the middle, which * PG data pages usually contain an unused "hole" in the middle, which
* contains only zero bytes. If hole_length > 0 then we have removed * contains only zero bytes. If hole_length > 0 then we have removed
* such a "hole" from the stored data (and it's not counted in the * such a "hole" from the stored data (and it's not counted in the
* XLOG record's CRC, either). Hence, the amount of block data actually * XLOG record's CRC, either). Hence, the amount of block data actually
* present following the BkpBlock struct is BLCKSZ - hole_length bytes. * present is BLCKSZ - hole_length bytes.
*
* Note that we don't attempt to align either the BkpBlock struct or the
* block's data. So, the struct must be copied to aligned local storage
* before use.
*/ */
typedef struct BkpBlock typedef struct XLogRecordBlockImageHeader
{ {
RelFileNode node; /* relation containing block */
ForkNumber fork; /* fork within the relation */
BlockNumber block; /* block number */
uint16 hole_offset; /* number of bytes before "hole" */ uint16 hole_offset; /* number of bytes before "hole" */
uint16 hole_length; /* number of bytes in "hole" */ uint16 hole_length; /* number of bytes in "hole" */
} XLogRecordBlockImageHeader;
#define SizeOfXLogRecordBlockImageHeader sizeof(XLogRecordBlockImageHeader)
/*
* Maximum size of the header for a block reference. This is used to size a
* temporary buffer for constructing the header.
*/
#define MaxSizeOfXLogRecordBlockHeader \
(SizeOfXLogRecordBlockHeader + \
SizeOfXLogRecordBlockImageHeader + \
sizeof(RelFileNode) + \
sizeof(BlockNumber))
/*
* The fork number fits in the lower 4 bits in the fork_flags field. The upper
* bits are used for flags.
*/
#define BKPBLOCK_FORK_MASK 0x0F
#define BKPBLOCK_FLAG_MASK 0xF0
#define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */
#define BKPBLOCK_HAS_DATA 0x20
#define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */
#define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */
/*
* XLogRecordDataHeaderShort/Long are used for the "main data" portion of
* the record. If the length of the data is less than 256 bytes, the short
* form is used, with a single byte to hold the length. Otherwise the long
* form is used.
*
* (These structs are currently not used in the code, they are here just for
* documentation purposes).
*/
typedef struct XLogRecordDataHeaderShort
{
uint8 id; /* XLR_BLOCK_ID_DATA_SHORT */
uint8 data_length; /* number of payload bytes */
} XLogRecordDataHeaderShort;
#define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2)
typedef struct XLogRecordDataHeaderLong
{
uint8 id; /* XLR_BLOCK_ID_DATA_LONG */
/* followed by uint32 data_length, unaligned */
} XLogRecordDataHeaderLong;
#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
/*
* Block IDs used to distinguish different kinds of record fragments. Block
* references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use
* any ID number in that range (although you should stick to small numbers,
* because the WAL machinery is optimized for that case). A couple of ID
* numbers are reserved to denote the "main" data portion of the record.
*
* The maximum is currently set at 32, quite arbitrarily. Most records only
* need a handful of block references, but there are a few exceptions that
* need more.
*/
#define XLR_MAX_BLOCK_ID 32
#define XLR_BLOCK_ID_DATA_SHORT 255
#define XLR_BLOCK_ID_DATA_LONG 254
#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
/* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
} BkpBlock;
#endif /* XLOGRECORD_H */ #endif /* XLOGRECORD_H */

View File

@ -11,7 +11,7 @@
#ifndef XLOG_UTILS_H #ifndef XLOG_UTILS_H
#define XLOG_UTILS_H #define XLOG_UTILS_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
@ -33,26 +33,17 @@ typedef enum
* replayed) */ * replayed) */
} XLogRedoAction; } XLogRedoAction;
extern XLogRedoAction XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, extern XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record,
int block_index, RelFileNode rnode, BlockNumber blkno, uint8 buffer_id, Buffer *buf);
Buffer *buf); extern Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id);
extern XLogRedoAction XLogReadBufferForRedoExtended(XLogRecPtr lsn, extern XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record,
XLogRecord *record, int block_index, uint8 buffer_id,
RelFileNode rnode, ForkNumber forkno,
BlockNumber blkno,
ReadBufferMode mode, bool get_cleanup_lock, ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf); Buffer *buf);
extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, ReadBufferMode mode); BlockNumber blkno, ReadBufferMode mode);
extern Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
int block_index,
bool get_cleanup_lock, bool keep_buffer);
extern Buffer RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb,
char *blk, bool get_cleanup_lock, bool keep_buffer);
extern Relation CreateFakeRelcacheEntry(RelFileNode rnode); extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
extern void FreeFakeRelcacheEntry(Relation fakerel); extern void FreeFakeRelcacheEntry(Relation fakerel);

View File

@ -14,7 +14,7 @@
#ifndef STORAGE_XLOG_H #ifndef STORAGE_XLOG_H
#define STORAGE_XLOG_H #define STORAGE_XLOG_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/block.h" #include "storage/block.h"
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
@ -44,8 +44,8 @@ typedef struct xl_smgr_truncate
extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum); extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum);
extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record); extern void smgr_redo(XLogReaderState *record);
extern void smgr_desc(StringInfo buf, XLogRecord *record); extern void smgr_desc(StringInfo buf, XLogReaderState *record);
extern const char *smgr_identify(uint8 info); extern const char *smgr_identify(uint8 info);
#endif /* STORAGE_XLOG_H */ #endif /* STORAGE_XLOG_H */

View File

@ -14,7 +14,7 @@
#ifndef DBCOMMANDS_H #ifndef DBCOMMANDS_H
#define DBCOMMANDS_H #define DBCOMMANDS_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
@ -63,8 +63,8 @@ extern Oid AlterDatabaseOwner(const char *dbname, Oid newOwnerId);
extern Oid get_database_oid(const char *dbname, bool missingok); extern Oid get_database_oid(const char *dbname, bool missingok);
extern char *get_database_name(Oid dbid); extern char *get_database_name(Oid dbid);
extern void dbase_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void dbase_redo(XLogReaderState *rptr);
extern void dbase_desc(StringInfo buf, XLogRecord *rptr); extern void dbase_desc(StringInfo buf, XLogReaderState *rptr);
extern const char *dbase_identify(uint8 info); extern const char *dbase_identify(uint8 info);
extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype); extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype);

View File

@ -13,7 +13,7 @@
#ifndef SEQUENCE_H #ifndef SEQUENCE_H
#define SEQUENCE_H #define SEQUENCE_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "fmgr.h" #include "fmgr.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
@ -77,8 +77,8 @@ extern Oid AlterSequence(AlterSeqStmt *stmt);
extern void ResetSequence(Oid seq_relid); extern void ResetSequence(Oid seq_relid);
extern void ResetSequenceCaches(void); extern void ResetSequenceCaches(void);
extern void seq_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void seq_redo(XLogReaderState *rptr);
extern void seq_desc(StringInfo buf, XLogRecord *rptr); extern void seq_desc(StringInfo buf, XLogReaderState *rptr);
extern const char *seq_identify(uint8 info); extern const char *seq_identify(uint8 info);
#endif /* SEQUENCE_H */ #endif /* SEQUENCE_H */

View File

@ -14,7 +14,7 @@
#ifndef TABLESPACE_H #ifndef TABLESPACE_H
#define TABLESPACE_H #define TABLESPACE_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
@ -56,8 +56,8 @@ extern char *get_tablespace_name(Oid spc_oid);
extern bool directory_is_empty(const char *path); extern bool directory_is_empty(const char *path);
extern void tblspc_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void tblspc_redo(XLogReaderState *rptr);
extern void tblspc_desc(StringInfo buf, XLogRecord *rptr); extern void tblspc_desc(StringInfo buf, XLogReaderState *rptr);
extern const char *tblspc_identify(uint8 info); extern const char *tblspc_identify(uint8 info);
#endif /* TABLESPACE_H */ #endif /* TABLESPACE_H */

View File

@ -15,6 +15,6 @@
#include "replication/logical.h" #include "replication/logical.h"
void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx,
XLogRecord *record); XLogReaderState *record);
#endif #endif

View File

@ -14,7 +14,7 @@
#ifndef STANDBY_H #ifndef STANDBY_H
#define STANDBY_H #define STANDBY_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/lock.h" #include "storage/lock.h"
#include "storage/procsignal.h" #include "storage/procsignal.h"
@ -82,8 +82,8 @@ typedef struct xl_running_xacts
/* Recovery handlers for the Standby Rmgr (RM_STANDBY_ID) */ /* Recovery handlers for the Standby Rmgr (RM_STANDBY_ID) */
extern void standby_redo(XLogRecPtr lsn, XLogRecord *record); extern void standby_redo(XLogReaderState *record);
extern void standby_desc(StringInfo buf, XLogRecord *record); extern void standby_desc(StringInfo buf, XLogReaderState *record);
extern const char *standby_identify(uint8 info); extern const char *standby_identify(uint8 info);
/* /*

View File

@ -14,7 +14,7 @@
#ifndef RELMAPPER_H #ifndef RELMAPPER_H
#define RELMAPPER_H #define RELMAPPER_H
#include "access/xlogrecord.h" #include "access/xlogreader.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
/* ---------------- /* ----------------
@ -59,8 +59,8 @@ extern void RelationMapInitialize(void);
extern void RelationMapInitializePhase2(void); extern void RelationMapInitializePhase2(void);
extern void RelationMapInitializePhase3(void); extern void RelationMapInitializePhase3(void);
extern void relmap_redo(XLogRecPtr lsn, XLogRecord *record); extern void relmap_redo(XLogReaderState *record);
extern void relmap_desc(StringInfo buf, XLogRecord *record); extern void relmap_desc(StringInfo buf, XLogReaderState *record);
extern const char *relmap_identify(uint8 info); extern const char *relmap_identify(uint8 info);
#endif /* RELMAPPER_H */ #endif /* RELMAPPER_H */