2005-06-14 13:45:14 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* gistxlog.c
|
|
|
|
* WAL replay logic for GiST.
|
|
|
|
*
|
|
|
|
*
|
2010-01-02 17:58:17 +01:00
|
|
|
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
2005-06-14 13:45:14 +02:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-01-02 17:58:17 +01:00
|
|
|
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.35 2010/01/02 16:57:34 momjian Exp $
|
2005-06-14 13:45:14 +02:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "access/gist_private.h"
|
2008-05-12 02:00:54 +02:00
|
|
|
#include "access/xlogutils.h"
|
2005-06-14 13:45:14 +02:00
|
|
|
#include "miscadmin.h"
|
2008-05-12 02:00:54 +02:00
|
|
|
#include "storage/bufmgr.h"
|
2005-06-14 13:45:14 +02:00
|
|
|
#include "utils/memutils.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "utils/rel.h"
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
typedef struct
|
|
|
|
{
|
2006-03-31 01:03:10 +02:00
|
|
|
gistxlogPageUpdate *data;
|
2005-06-14 13:45:14 +02:00
|
|
|
int len;
|
2005-09-22 22:44:36 +02:00
|
|
|
IndexTuple *itup;
|
|
|
|
OffsetNumber *todelete;
|
2006-03-31 01:03:10 +02:00
|
|
|
} PageUpdateRecord;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
gistxlogPage *header;
|
|
|
|
IndexTuple *itup;
|
2005-06-14 13:45:14 +02:00
|
|
|
} NewPage;
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
gistxlogPageSplit *data;
|
|
|
|
NewPage *page;
|
2005-06-14 13:45:14 +02:00
|
|
|
} PageSplitRecord;
|
|
|
|
|
|
|
|
/* track for incomplete inserts, idea was taken from nbtxlog.c */
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
typedef struct gistIncompleteInsert
|
|
|
|
{
|
|
|
|
RelFileNode node;
|
|
|
|
BlockNumber origblkno; /* for splits */
|
|
|
|
ItemPointerData key;
|
|
|
|
int lenblk;
|
|
|
|
BlockNumber *blkno;
|
2005-06-20 12:29:37 +02:00
|
|
|
XLogRecPtr lsn;
|
2005-09-22 22:44:36 +02:00
|
|
|
BlockNumber *path;
|
|
|
|
int pathlen;
|
2005-06-14 13:45:14 +02:00
|
|
|
} gistIncompleteInsert;
|
|
|
|
|
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
static MemoryContext opCtx; /* working memory for operations */
|
2006-10-04 02:30:14 +02:00
|
|
|
static MemoryContext insertCtx; /* holds incomplete_inserts list */
|
2005-06-14 13:45:14 +02:00
|
|
|
static List *incomplete_inserts;
|
|
|
|
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
#define ItemPointerEQ(a, b) \
|
2006-03-31 01:03:10 +02:00
|
|
|
( ItemPointerGetOffsetNumber(a) == ItemPointerGetOffsetNumber(b) && \
|
|
|
|
ItemPointerGetBlockNumber (a) == ItemPointerGetBlockNumber(b) )
|
|
|
|
|
2005-06-14 13:45:14 +02:00
|
|
|
|
|
|
|
static void
|
2005-06-20 12:29:37 +02:00
|
|
|
pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key,
|
2005-09-22 22:44:36 +02:00
|
|
|
BlockNumber *blkno, int lenblk,
|
|
|
|
PageSplitRecord *xlinfo /* to extract blkno info */ )
|
|
|
|
{
|
2006-05-19 13:10:25 +02:00
|
|
|
MemoryContext oldCxt;
|
|
|
|
gistIncompleteInsert *ninsert;
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
if (!ItemPointerIsValid(&key))
|
|
|
|
|
|
|
|
/*
|
2006-05-19 13:10:25 +02:00
|
|
|
* if key is null then we should not store insertion as incomplete,
|
|
|
|
* because it's a vacuum operation..
|
|
|
|
*/
|
|
|
|
return;
|
|
|
|
|
|
|
|
oldCxt = MemoryContextSwitchTo(insertCtx);
|
|
|
|
ninsert = (gistIncompleteInsert *) palloc(sizeof(gistIncompleteInsert));
|
2005-06-14 13:45:14 +02:00
|
|
|
|
|
|
|
ninsert->node = node;
|
2005-09-22 22:44:36 +02:00
|
|
|
ninsert->key = key;
|
|
|
|
ninsert->lsn = lsn;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
if (lenblk && blkno)
|
|
|
|
{
|
2005-06-14 13:45:14 +02:00
|
|
|
ninsert->lenblk = lenblk;
|
2005-09-22 22:44:36 +02:00
|
|
|
ninsert->blkno = (BlockNumber *) palloc(sizeof(BlockNumber) * ninsert->lenblk);
|
|
|
|
memcpy(ninsert->blkno, blkno, sizeof(BlockNumber) * ninsert->lenblk);
|
2005-06-30 19:52:14 +02:00
|
|
|
ninsert->origblkno = *blkno;
|
2005-09-22 22:44:36 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int i;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
Assert(xlinfo);
|
2005-06-14 13:45:14 +02:00
|
|
|
ninsert->lenblk = xlinfo->data->npage;
|
2005-09-22 22:44:36 +02:00
|
|
|
ninsert->blkno = (BlockNumber *) palloc(sizeof(BlockNumber) * ninsert->lenblk);
|
|
|
|
for (i = 0; i < ninsert->lenblk; i++)
|
2005-06-14 13:45:14 +02:00
|
|
|
ninsert->blkno[i] = xlinfo->page[i].header->blkno;
|
2005-06-30 19:52:14 +02:00
|
|
|
ninsert->origblkno = xlinfo->data->origblkno;
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
2005-09-22 22:44:36 +02:00
|
|
|
Assert(ninsert->lenblk > 0);
|
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
/*
|
|
|
|
* Stick the new incomplete insert onto the front of the list, not the
|
2006-10-04 02:30:14 +02:00
|
|
|
* back. This is so that gist_xlog_cleanup will process incompletions in
|
|
|
|
* last-in-first-out order.
|
2006-03-31 01:03:10 +02:00
|
|
|
*/
|
|
|
|
incomplete_inserts = lcons(ninsert, incomplete_inserts);
|
|
|
|
|
2005-06-14 13:45:14 +02:00
|
|
|
MemoryContextSwitchTo(oldCxt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2005-09-22 22:44:36 +02:00
|
|
|
forgetIncompleteInsert(RelFileNode node, ItemPointerData key)
|
|
|
|
{
|
2005-06-14 13:45:14 +02:00
|
|
|
ListCell *l;
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
if (!ItemPointerIsValid(&key))
|
2006-05-19 13:10:25 +02:00
|
|
|
return;
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
if (incomplete_inserts == NIL)
|
2006-05-19 13:10:25 +02:00
|
|
|
return;
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
foreach(l, incomplete_inserts)
|
|
|
|
{
|
|
|
|
gistIncompleteInsert *insert = (gistIncompleteInsert *) lfirst(l);
|
|
|
|
|
|
|
|
if (RelFileNodeEquals(node, insert->node) && ItemPointerEQ(&(insert->key), &(key)))
|
|
|
|
{
|
2005-06-14 13:45:14 +02:00
|
|
|
/* found */
|
|
|
|
incomplete_inserts = list_delete_ptr(incomplete_inserts, insert);
|
2006-03-31 01:03:10 +02:00
|
|
|
pfree(insert->blkno);
|
2005-09-22 22:44:36 +02:00
|
|
|
pfree(insert);
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
2005-09-22 22:44:36 +02:00
|
|
|
}
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-03-31 01:03:10 +02:00
|
|
|
decodePageUpdateRecord(PageUpdateRecord *decoded, XLogRecord *record)
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
|
|
|
char *begin = XLogRecGetData(record),
|
|
|
|
*ptr;
|
|
|
|
int i = 0,
|
|
|
|
addpath = 0;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
decoded->data = (gistxlogPageUpdate *) begin;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
if (decoded->data->ntodelete)
|
|
|
|
{
|
2006-03-31 01:03:10 +02:00
|
|
|
decoded->todelete = (OffsetNumber *) (begin + sizeof(gistxlogPageUpdate) + addpath);
|
2005-09-22 22:44:36 +02:00
|
|
|
addpath = MAXALIGN(sizeof(OffsetNumber) * decoded->data->ntodelete);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
decoded->todelete = NULL;
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
decoded->len = 0;
|
2006-03-31 01:03:10 +02:00
|
|
|
ptr = begin + sizeof(gistxlogPageUpdate) + addpath;
|
2005-09-22 22:44:36 +02:00
|
|
|
while (ptr - begin < record->xl_len)
|
|
|
|
{
|
2005-06-14 13:45:14 +02:00
|
|
|
decoded->len++;
|
2005-09-22 22:44:36 +02:00
|
|
|
ptr += IndexTupleSize((IndexTuple) ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
decoded->itup = (IndexTuple *) palloc(sizeof(IndexTuple) * decoded->len);
|
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
ptr = begin + sizeof(gistxlogPageUpdate) + addpath;
|
2005-09-22 22:44:36 +02:00
|
|
|
while (ptr - begin < record->xl_len)
|
|
|
|
{
|
|
|
|
decoded->itup[i] = (IndexTuple) ptr;
|
|
|
|
ptr += IndexTupleSize(decoded->itup[i]);
|
2005-06-14 13:45:14 +02:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-06-20 12:29:37 +02:00
|
|
|
/*
|
|
|
|
* redo any page update (except page split)
|
|
|
|
*/
|
2005-06-14 13:45:14 +02:00
|
|
|
static void
|
2006-03-31 01:03:10 +02:00
|
|
|
gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot)
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
2006-04-03 18:45:50 +02:00
|
|
|
gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record);
|
2006-03-31 01:03:10 +02:00
|
|
|
PageUpdateRecord xlrec;
|
2005-09-22 22:44:36 +02:00
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2006-04-03 18:45:50 +02:00
|
|
|
/* we must fix incomplete_inserts list even if XLR_BKP_BLOCK_1 is set */
|
2006-05-19 13:10:25 +02:00
|
|
|
forgetIncompleteInsert(xldata->node, xldata->key);
|
2006-04-03 18:45:50 +02:00
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
if (!isnewroot && xldata->blkno != GIST_ROOT_BLKNO)
|
|
|
|
/* operation with root always finalizes insertion */
|
|
|
|
pushIncompleteInsert(xldata->node, lsn, xldata->key,
|
|
|
|
&(xldata->blkno), 1,
|
|
|
|
NULL);
|
2006-04-03 18:45:50 +02:00
|
|
|
|
|
|
|
/* nothing else to do if page was backed up (and no info to do it with) */
|
2006-03-31 01:03:10 +02:00
|
|
|
if (record->xl_info & XLR_BKP_BLOCK_1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
decodePageUpdateRecord(&xlrec, record);
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xlrec.data->node, xlrec.data->blkno, false);
|
2005-06-14 13:45:14 +02:00
|
|
|
if (!BufferIsValid(buffer))
|
2006-03-31 01:03:10 +02:00
|
|
|
return;
|
2005-06-14 13:45:14 +02:00
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
if (XLByteLE(lsn, PageGetLSN(page)))
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
2006-04-01 01:32:07 +02:00
|
|
|
UnlockReleaseBuffer(buffer);
|
2006-03-31 01:03:10 +02:00
|
|
|
return;
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
2006-05-17 18:34:59 +02:00
|
|
|
if (isnewroot)
|
|
|
|
GISTInitBuffer(buffer, 0);
|
|
|
|
else if (xlrec.data->ntodelete)
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
2006-05-17 18:34:59 +02:00
|
|
|
int i;
|
2005-09-22 22:44:36 +02:00
|
|
|
|
2006-05-17 18:34:59 +02:00
|
|
|
for (i = 0; i < xlrec.data->ntodelete; i++)
|
|
|
|
PageIndexTupleDelete(page, xlrec.todelete[i]);
|
|
|
|
if (GistPageIsLeaf(page))
|
|
|
|
GistMarkTuplesDeleted(page);
|
2005-09-22 22:44:36 +02:00
|
|
|
}
|
|
|
|
|
2006-05-17 18:34:59 +02:00
|
|
|
/* add tuples */
|
|
|
|
if (xlrec.len > 0)
|
2008-06-12 11:12:31 +02:00
|
|
|
gistfillbuffer(page, xlrec.itup, xlrec.len, InvalidOffsetNumber);
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2006-05-17 18:34:59 +02:00
|
|
|
/*
|
|
|
|
* special case: leafpage, nothing to insert, nothing to delete, then
|
|
|
|
* vacuum marks page
|
|
|
|
*/
|
|
|
|
if (GistPageIsLeaf(page) && xlrec.len == 0 && xlrec.data->ntodelete == 0)
|
|
|
|
GistClearTuplesDeleted(page);
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
if (!GistPageIsLeaf(page) && PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && xldata->blkno == GIST_ROOT_BLKNO)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* all links on non-leaf root page was deleted by vacuum full, so root
|
|
|
|
* page becomes a leaf
|
|
|
|
*/
|
2006-05-17 18:34:59 +02:00
|
|
|
GistPageSetLeaf(page);
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
|
2005-06-14 13:45:14 +02:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
2006-05-17 18:34:59 +02:00
|
|
|
static void
|
|
|
|
gistRedoPageDeleteRecord(XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
|
|
|
gistxlogPageDelete *xldata = (gistxlogPageDelete *) XLogRecGetData(record);
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
/* nothing else to do if page was backed up (and no info to do it with) */
|
|
|
|
if (record->xl_info & XLR_BKP_BLOCK_1)
|
|
|
|
return;
|
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xldata->node, xldata->blkno, false);
|
2006-05-17 18:34:59 +02:00
|
|
|
if (!BufferIsValid(buffer))
|
|
|
|
return;
|
|
|
|
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
GistPageSetDeleted(page);
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
}
|
|
|
|
|
2005-06-14 13:45:14 +02:00
|
|
|
static void
|
2005-09-22 22:44:36 +02:00
|
|
|
decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record)
|
|
|
|
{
|
|
|
|
char *begin = XLogRecGetData(record),
|
|
|
|
*ptr;
|
|
|
|
int j,
|
|
|
|
i = 0;
|
|
|
|
|
|
|
|
decoded->data = (gistxlogPageSplit *) begin;
|
|
|
|
decoded->page = (NewPage *) palloc(sizeof(NewPage) * decoded->data->npage);
|
|
|
|
|
|
|
|
ptr = begin + sizeof(gistxlogPageSplit);
|
|
|
|
for (i = 0; i < decoded->data->npage; i++)
|
|
|
|
{
|
|
|
|
Assert(ptr - begin < record->xl_len);
|
|
|
|
decoded->page[i].header = (gistxlogPage *) ptr;
|
2005-06-14 13:45:14 +02:00
|
|
|
ptr += sizeof(gistxlogPage);
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
decoded->page[i].itup = (IndexTuple *)
|
|
|
|
palloc(sizeof(IndexTuple) * decoded->page[i].header->num);
|
|
|
|
j = 0;
|
|
|
|
while (j < decoded->page[i].header->num)
|
|
|
|
{
|
|
|
|
Assert(ptr - begin < record->xl_len);
|
|
|
|
decoded->page[i].itup[j] = (IndexTuple) ptr;
|
|
|
|
ptr += IndexTupleSize((IndexTuple) ptr);
|
2005-06-20 17:22:38 +02:00
|
|
|
j++;
|
|
|
|
}
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
}
|
2005-09-22 22:44:36 +02:00
|
|
|
|
2005-06-14 13:45:14 +02:00
|
|
|
static void
|
2005-09-22 22:44:36 +02:00
|
|
|
gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
|
|
|
PageSplitRecord xlrec;
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
int i;
|
2006-03-31 01:03:10 +02:00
|
|
|
int flags;
|
2005-09-22 22:44:36 +02:00
|
|
|
|
|
|
|
decodePageSplitRecord(&xlrec, record);
|
2006-03-31 01:03:10 +02:00
|
|
|
flags = xlrec.data->origleaf ? F_LEAF : 0;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-06-20 17:22:38 +02:00
|
|
|
/* loop around all pages */
|
2005-09-22 22:44:36 +02:00
|
|
|
for (i = 0; i < xlrec.data->npage; i++)
|
|
|
|
{
|
|
|
|
NewPage *newpage = xlrec.page + i;
|
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true);
|
2006-03-31 01:03:10 +02:00
|
|
|
Assert(BufferIsValid(buffer));
|
2005-06-20 17:22:38 +02:00
|
|
|
page = (Page) BufferGetPage(buffer);
|
2005-09-22 22:44:36 +02:00
|
|
|
|
2005-06-20 17:22:38 +02:00
|
|
|
/* ok, clear buffer */
|
2005-09-22 22:44:36 +02:00
|
|
|
GISTInitBuffer(buffer, flags);
|
|
|
|
|
2005-06-20 17:22:38 +02:00
|
|
|
/* and fill it */
|
2008-06-12 11:12:31 +02:00
|
|
|
gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber);
|
2005-09-22 22:44:36 +02:00
|
|
|
|
2005-06-20 17:22:38 +02:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
forgetIncompleteInsert(xlrec.data->node, xlrec.data->key);
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
pushIncompleteInsert(xlrec.data->node, lsn, xlrec.data->key,
|
|
|
|
NULL, 0,
|
|
|
|
&xlrec);
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2005-09-22 22:44:36 +02:00
|
|
|
gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
|
|
|
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true);
|
2006-03-29 23:17:39 +02:00
|
|
|
Assert(BufferIsValid(buffer));
|
2005-06-14 13:45:14 +02:00
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
|
|
|
|
GISTInitBuffer(buffer, F_LEAF);
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-03-29 23:17:39 +02:00
|
|
|
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
2005-06-20 12:29:37 +02:00
|
|
|
static void
|
2005-09-22 22:44:36 +02:00
|
|
|
gistRedoCompleteInsert(XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
|
|
|
char *begin = XLogRecGetData(record),
|
|
|
|
*ptr;
|
|
|
|
gistxlogInsertComplete *xlrec;
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
xlrec = (gistxlogInsertComplete *) begin;
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
ptr = begin + sizeof(gistxlogInsertComplete);
|
|
|
|
while (ptr - begin < record->xl_len)
|
|
|
|
{
|
|
|
|
Assert(record->xl_len - (ptr - begin) >= sizeof(ItemPointerData));
|
|
|
|
forgetIncompleteInsert(xlrec->node, *((ItemPointerData *) ptr));
|
2005-06-20 12:29:37 +02:00
|
|
|
ptr += sizeof(ItemPointerData);
|
2005-09-22 22:44:36 +02:00
|
|
|
}
|
2005-06-20 12:29:37 +02:00
|
|
|
}
|
|
|
|
|
2005-06-14 13:45:14 +02:00
|
|
|
void
|
|
|
|
gist_redo(XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
2005-09-22 22:44:36 +02:00
|
|
|
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
2005-06-14 13:45:14 +02:00
|
|
|
MemoryContext oldCxt;
|
2005-09-22 22:44:36 +02:00
|
|
|
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
/*
|
|
|
|
* GIST indexes do not require any conflict processing. NB: If we ever
|
|
|
|
* implement a similar optimization we have in b-tree, and remove killed
|
|
|
|
* tuples outside VACUUM, we'll need to handle that here.
|
|
|
|
*/
|
|
|
|
|
2009-01-20 19:59:37 +01:00
|
|
|
RestoreBkpBlocks(lsn, record, false);
|
|
|
|
|
2005-06-14 13:45:14 +02:00
|
|
|
oldCxt = MemoryContextSwitchTo(opCtx);
|
2005-09-22 22:44:36 +02:00
|
|
|
switch (info)
|
|
|
|
{
|
2006-03-31 01:03:10 +02:00
|
|
|
case XLOG_GIST_PAGE_UPDATE:
|
|
|
|
gistRedoPageUpdateRecord(lsn, record, false);
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
2006-05-17 18:34:59 +02:00
|
|
|
case XLOG_GIST_PAGE_DELETE:
|
|
|
|
gistRedoPageDeleteRecord(lsn, record);
|
|
|
|
break;
|
2005-09-22 22:44:36 +02:00
|
|
|
case XLOG_GIST_NEW_ROOT:
|
2006-03-31 01:03:10 +02:00
|
|
|
gistRedoPageUpdateRecord(lsn, record, true);
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
2005-09-22 22:44:36 +02:00
|
|
|
case XLOG_GIST_PAGE_SPLIT:
|
|
|
|
gistRedoPageSplitRecord(lsn, record);
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
2005-09-22 22:44:36 +02:00
|
|
|
case XLOG_GIST_CREATE_INDEX:
|
2005-06-14 13:45:14 +02:00
|
|
|
gistRedoCreateIndex(lsn, record);
|
|
|
|
break;
|
2005-09-22 22:44:36 +02:00
|
|
|
case XLOG_GIST_INSERT_COMPLETE:
|
2005-06-20 12:29:37 +02:00
|
|
|
gistRedoCompleteInsert(lsn, record);
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
elog(PANIC, "gist_redo: unknown op code %u", info);
|
|
|
|
}
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldCxt);
|
|
|
|
MemoryContextReset(opCtx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-03-24 05:32:13 +01:00
|
|
|
out_target(StringInfo buf, RelFileNode node, ItemPointerData key)
|
2005-06-14 13:45:14 +02:00
|
|
|
{
|
2006-05-17 18:34:59 +02:00
|
|
|
appendStringInfo(buf, "rel %u/%u/%u",
|
2006-10-04 02:30:14 +02:00
|
|
|
node.spcNode, node.dbNode, node.relNode);
|
|
|
|
if (ItemPointerIsValid(&key))
|
2006-05-17 18:34:59 +02:00
|
|
|
appendStringInfo(buf, "; tid %u/%u",
|
2006-10-04 02:30:14 +02:00
|
|
|
ItemPointerGetBlockNumber(&key),
|
|
|
|
ItemPointerGetOffsetNumber(&key));
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-03-31 01:03:10 +02:00
|
|
|
out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec)
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
2005-06-14 13:45:14 +02:00
|
|
|
out_target(buf, xlrec->node, xlrec->key);
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "; block number %u", xlrec->blkno);
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
2006-05-17 18:34:59 +02:00
|
|
|
static void
|
|
|
|
out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec)
|
|
|
|
{
|
|
|
|
appendStringInfo(buf, "page_delete: rel %u/%u/%u; blkno %u",
|
2006-10-04 02:30:14 +02:00
|
|
|
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
|
|
|
|
xlrec->blkno);
|
2006-05-17 18:34:59 +02:00
|
|
|
}
|
|
|
|
|
2005-06-14 13:45:14 +02:00
|
|
|
static void
|
2006-03-24 05:32:13 +01:00
|
|
|
out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "page_split: ");
|
2005-06-14 13:45:14 +02:00
|
|
|
out_target(buf, xlrec->node, xlrec->key);
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "; block number %u splits to %d pages",
|
2006-10-04 02:30:14 +02:00
|
|
|
xlrec->origblkno, xlrec->npage);
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2006-03-24 05:32:13 +01:00
|
|
|
gist_desc(StringInfo buf, uint8 xl_info, char *rec)
|
2005-06-14 13:45:14 +02:00
|
|
|
{
|
2005-09-22 22:44:36 +02:00
|
|
|
uint8 info = xl_info & ~XLR_INFO_MASK;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
switch (info)
|
|
|
|
{
|
2006-03-31 01:03:10 +02:00
|
|
|
case XLOG_GIST_PAGE_UPDATE:
|
|
|
|
appendStringInfo(buf, "page_update: ");
|
|
|
|
out_gistxlogPageUpdate(buf, (gistxlogPageUpdate *) rec);
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
2006-05-17 18:34:59 +02:00
|
|
|
case XLOG_GIST_PAGE_DELETE:
|
|
|
|
out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec);
|
|
|
|
break;
|
2005-09-22 22:44:36 +02:00
|
|
|
case XLOG_GIST_NEW_ROOT:
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "new_root: ");
|
2006-03-31 01:03:10 +02:00
|
|
|
out_target(buf, ((gistxlogPageUpdate *) rec)->node, ((gistxlogPageUpdate *) rec)->key);
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
2005-09-22 22:44:36 +02:00
|
|
|
case XLOG_GIST_PAGE_SPLIT:
|
|
|
|
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
2005-09-22 22:44:36 +02:00
|
|
|
case XLOG_GIST_CREATE_INDEX:
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "create_index: rel %u/%u/%u",
|
2006-10-04 02:30:14 +02:00
|
|
|
((RelFileNode *) rec)->spcNode,
|
|
|
|
((RelFileNode *) rec)->dbNode,
|
|
|
|
((RelFileNode *) rec)->relNode);
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
2005-09-22 22:44:36 +02:00
|
|
|
case XLOG_GIST_INSERT_COMPLETE:
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "complete_insert: rel %u/%u/%u",
|
2006-10-04 02:30:14 +02:00
|
|
|
((gistxlogInsertComplete *) rec)->node.spcNode,
|
|
|
|
((gistxlogInsertComplete *) rec)->node.dbNode,
|
|
|
|
((gistxlogInsertComplete *) rec)->node.relNode);
|
2005-09-22 20:49:45 +02:00
|
|
|
break;
|
2005-06-14 13:45:14 +02:00
|
|
|
default:
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "unknown gist op code %u", info);
|
|
|
|
break;
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
IndexTuple
|
|
|
|
gist_form_invalid_tuple(BlockNumber blkno)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* we don't alloc space for null's bitmap, this is invalid tuple, be
|
|
|
|
* carefull in read and write code
|
|
|
|
*/
|
|
|
|
Size size = IndexInfoFindDataOffset(0);
|
|
|
|
IndexTuple tuple = (IndexTuple) palloc0(size);
|
2005-06-20 12:29:37 +02:00
|
|
|
|
|
|
|
tuple->t_info |= size;
|
2005-09-22 22:44:36 +02:00
|
|
|
|
2005-06-20 12:29:37 +02:00
|
|
|
ItemPointerSetBlockNumber(&(tuple->t_tid), blkno);
|
2005-09-22 22:44:36 +02:00
|
|
|
GistTupleSetInvalid(tuple);
|
2005-06-20 12:29:37 +02:00
|
|
|
|
|
|
|
return tuple;
|
|
|
|
}
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-06-30 19:52:14 +02:00
|
|
|
|
2005-06-27 14:45:23 +02:00
|
|
|
static void
|
2006-03-31 01:03:10 +02:00
|
|
|
gistxlogFindPath(Relation index, gistIncompleteInsert *insert)
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
2005-06-27 14:45:23 +02:00
|
|
|
GISTInsertStack *top;
|
2005-09-22 22:44:36 +02:00
|
|
|
|
2005-06-27 14:45:23 +02:00
|
|
|
insert->pathlen = 0;
|
|
|
|
insert->path = NULL;
|
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
if ((top = gistFindPath(index, insert->origblkno)) != NULL)
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
|
|
|
int i;
|
2006-03-31 01:03:10 +02:00
|
|
|
GISTInsertStack *ptr;
|
2005-09-22 22:44:36 +02:00
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
for (ptr = top; ptr; ptr = ptr->parent)
|
2005-06-30 19:52:14 +02:00
|
|
|
insert->pathlen++;
|
2005-06-27 14:45:23 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
insert->path = (BlockNumber *) palloc(sizeof(BlockNumber) * insert->pathlen);
|
2005-06-27 14:45:23 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
i = 0;
|
2006-03-31 01:03:10 +02:00
|
|
|
for (ptr = top; ptr; ptr = ptr->parent)
|
|
|
|
insert->path[i++] = ptr->blkno;
|
2005-09-22 22:44:36 +02:00
|
|
|
}
|
|
|
|
else
|
2006-05-19 13:10:25 +02:00
|
|
|
elog(ERROR, "lost parent for block %u", insert->origblkno);
|
|
|
|
}
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
static SplitedPageLayout *
|
|
|
|
gistMakePageLayout(Buffer *buffers, int nbuffers)
|
|
|
|
{
|
|
|
|
SplitedPageLayout *res = NULL,
|
|
|
|
*resptr;
|
2006-05-19 13:10:25 +02:00
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
while (nbuffers-- > 0)
|
|
|
|
{
|
|
|
|
Page page = BufferGetPage(buffers[nbuffers]);
|
|
|
|
IndexTuple *vec;
|
|
|
|
int veclen;
|
2006-05-19 13:10:25 +02:00
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
resptr = (SplitedPageLayout *) palloc0(sizeof(SplitedPageLayout));
|
2006-05-19 13:10:25 +02:00
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
resptr->block.blkno = BufferGetBlockNumber(buffers[nbuffers]);
|
|
|
|
resptr->block.num = PageGetMaxOffsetNumber(page);
|
2006-05-19 13:10:25 +02:00
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
vec = gistextractpage(page, &veclen);
|
|
|
|
resptr->list = gistfillitupvec(vec, veclen, &(resptr->lenlist));
|
2006-05-19 13:10:25 +02:00
|
|
|
|
|
|
|
resptr->next = res;
|
|
|
|
res = resptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
2005-06-27 14:45:23 +02:00
|
|
|
}
|
|
|
|
|
2005-07-01 15:18:17 +02:00
|
|
|
/*
|
2006-03-31 01:03:10 +02:00
|
|
|
* Continue insert after crash. In normal situations, there aren't any
|
|
|
|
* incomplete inserts, but if a crash occurs partway through an insertion
|
|
|
|
* sequence, we'll need to finish making the index valid at the end of WAL
|
|
|
|
* replay.
|
|
|
|
*
|
|
|
|
* Note that we assume the index is now in a valid state, except for the
|
|
|
|
* unfinished insertion. In particular it's safe to invoke gistFindPath();
|
|
|
|
* there shouldn't be any garbage pages for it to run into.
|
2006-10-04 02:30:14 +02:00
|
|
|
*
|
2006-05-19 13:10:25 +02:00
|
|
|
* To complete insert we can't use basic insertion algorithm because
|
|
|
|
* during insertion we can't call user-defined support functions of opclass.
|
|
|
|
* So, we insert 'invalid' tuples without real key and do it by separate algorithm.
|
|
|
|
* 'invalid' tuple should be updated by vacuum full.
|
2005-09-22 22:44:36 +02:00
|
|
|
*/
|
2005-06-14 13:45:14 +02:00
|
|
|
static void
|
2005-09-22 22:44:36 +02:00
|
|
|
gistContinueInsert(gistIncompleteInsert *insert)
|
|
|
|
{
|
|
|
|
IndexTuple *itup;
|
|
|
|
int i,
|
|
|
|
lenitup;
|
|
|
|
Relation index;
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
index = CreateFakeRelcacheEntry(insert->node);
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
/*
|
|
|
|
* needed vector itup never will be more than initial lenblkno+2, because
|
|
|
|
* during this processing Indextuple can be only smaller
|
|
|
|
*/
|
|
|
|
lenitup = insert->lenblk;
|
|
|
|
itup = (IndexTuple *) palloc(sizeof(IndexTuple) * (lenitup + 2 /* guarantee root split */ ));
|
|
|
|
|
|
|
|
for (i = 0; i < insert->lenblk; i++)
|
|
|
|
itup[i] = gist_form_invalid_tuple(insert->blkno[i]);
|
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* any insertion of itup[] should make LOG message about
|
2006-05-19 13:10:25 +02:00
|
|
|
*/
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
if (insert->origblkno == GIST_ROOT_BLKNO)
|
|
|
|
{
|
|
|
|
/*
|
2006-03-29 23:17:39 +02:00
|
|
|
* it was split root, so we should only make new root. it can't be
|
2006-05-19 13:10:25 +02:00
|
|
|
* simple insert into root, we should replace all content of root.
|
2005-09-22 22:44:36 +02:00
|
|
|
*/
|
2008-06-12 11:12:31 +02:00
|
|
|
Buffer buffer = XLogReadBuffer(insert->node, GIST_ROOT_BLKNO, true);
|
2005-06-30 19:52:14 +02:00
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
gistnewroot(index, buffer, itup, lenitup, NULL);
|
2006-04-01 01:32:07 +02:00
|
|
|
UnlockReleaseBuffer(buffer);
|
2005-09-22 22:44:36 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Buffer *buffers;
|
|
|
|
Page *pages;
|
|
|
|
int numbuffer;
|
2006-10-04 02:30:14 +02:00
|
|
|
OffsetNumber *todelete;
|
2005-06-30 19:52:14 +02:00
|
|
|
|
|
|
|
/* construct path */
|
2006-03-31 01:03:10 +02:00
|
|
|
gistxlogFindPath(index, insert);
|
2005-09-22 22:44:36 +02:00
|
|
|
|
|
|
|
Assert(insert->pathlen > 0);
|
2005-06-30 19:52:14 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
buffers = (Buffer *) palloc(sizeof(Buffer) * (insert->lenblk + 2 /* guarantee root split */ ));
|
|
|
|
pages = (Page *) palloc(sizeof(Page) * (insert->lenblk + 2 /* guarantee root split */ ));
|
2006-05-19 13:10:25 +02:00
|
|
|
todelete = (OffsetNumber *) palloc(sizeof(OffsetNumber) * (insert->lenblk + 2 /* guarantee root split */ ));
|
2005-06-30 19:52:14 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
for (i = 0; i < insert->pathlen; i++)
|
|
|
|
{
|
|
|
|
int j,
|
|
|
|
k,
|
2006-05-19 13:10:25 +02:00
|
|
|
pituplen = 0;
|
2009-12-24 18:52:04 +01:00
|
|
|
uint8 xlinfo;
|
2006-10-04 02:30:14 +02:00
|
|
|
XLogRecData *rdata;
|
|
|
|
XLogRecPtr recptr;
|
|
|
|
Buffer tempbuffer = InvalidBuffer;
|
|
|
|
int ntodelete = 0;
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
numbuffer = 1;
|
2006-05-19 13:10:25 +02:00
|
|
|
buffers[0] = ReadBuffer(index, insert->path[i]);
|
|
|
|
LockBuffer(buffers[0], GIST_EXCLUSIVE);
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
/*
|
|
|
|
* we check buffer, because we restored page earlier
|
|
|
|
*/
|
|
|
|
gistcheckpage(index, buffers[0]);
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
pages[0] = BufferGetPage(buffers[0]);
|
2006-10-04 02:30:14 +02:00
|
|
|
Assert(!GistPageIsLeaf(pages[0]));
|
2005-06-30 19:52:14 +02:00
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
pituplen = PageGetMaxOffsetNumber(pages[0]);
|
2005-09-22 22:44:36 +02:00
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
/* find remove old IndexTuples to remove */
|
|
|
|
for (j = 0; j < pituplen && ntodelete < lenitup; j++)
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
2005-06-20 12:29:37 +02:00
|
|
|
BlockNumber blkno;
|
2006-05-19 13:10:25 +02:00
|
|
|
ItemId iid = PageGetItemId(pages[0], j + FirstOffsetNumber);
|
|
|
|
IndexTuple idxtup = (IndexTuple) PageGetItem(pages[0], iid);
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
blkno = ItemPointerGetBlockNumber(&(idxtup->t_tid));
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
for (k = 0; k < lenitup; k++)
|
|
|
|
if (ItemPointerGetBlockNumber(&(itup[k]->t_tid)) == blkno)
|
|
|
|
{
|
2006-05-19 13:10:25 +02:00
|
|
|
todelete[ntodelete] = j + FirstOffsetNumber - ntodelete;
|
|
|
|
ntodelete++;
|
2005-06-14 13:45:14 +02:00
|
|
|
break;
|
|
|
|
}
|
2005-06-20 12:29:37 +02:00
|
|
|
}
|
2005-06-14 13:45:14 +02:00
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
if (ntodelete == 0)
|
Wording cleanup for error messages. Also change can't -> cannot.
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 20:10:30 +01:00
|
|
|
elog(PANIC, "gistContinueInsert: cannot find pointer to page(s)");
|
2006-05-19 13:10:25 +02:00
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* we check space with subtraction only first tuple to delete,
|
|
|
|
* hope, that wiil be enough space....
|
2006-05-19 13:10:25 +02:00
|
|
|
*/
|
|
|
|
|
2006-07-02 04:23:23 +02:00
|
|
|
if (gistnospace(pages[0], itup, lenitup, *todelete, 0))
|
2005-09-22 22:44:36 +02:00
|
|
|
{
|
2006-05-19 13:10:25 +02:00
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
/* no space left on page, so we must split */
|
|
|
|
buffers[numbuffer] = ReadBuffer(index, P_NEW);
|
|
|
|
LockBuffer(buffers[numbuffer], GIST_EXCLUSIVE);
|
2005-09-22 22:44:36 +02:00
|
|
|
GISTInitBuffer(buffers[numbuffer], 0);
|
|
|
|
pages[numbuffer] = BufferGetPage(buffers[numbuffer]);
|
2008-06-12 11:12:31 +02:00
|
|
|
gistfillbuffer(pages[numbuffer], itup, lenitup, FirstOffsetNumber);
|
2005-06-20 12:29:37 +02:00
|
|
|
numbuffer++;
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
if (BufferGetBlockNumber(buffers[0]) == GIST_ROOT_BLKNO)
|
|
|
|
{
|
2006-10-04 02:30:14 +02:00
|
|
|
Buffer tmp;
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
/*
|
2006-05-19 13:10:25 +02:00
|
|
|
* we split root, just copy content from root to new page
|
2005-09-22 22:44:36 +02:00
|
|
|
*/
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-06-30 19:52:14 +02:00
|
|
|
/* sanity check */
|
2005-09-22 22:44:36 +02:00
|
|
|
if (i + 1 != insert->pathlen)
|
|
|
|
elog(PANIC, "unexpected pathlen in index \"%s\"",
|
|
|
|
RelationGetRelationName(index));
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2006-05-19 13:10:25 +02:00
|
|
|
/* fill new page, root will be changed later */
|
|
|
|
tempbuffer = ReadBuffer(index, P_NEW);
|
|
|
|
LockBuffer(tempbuffer, GIST_EXCLUSIVE);
|
2006-10-04 02:30:14 +02:00
|
|
|
memcpy(BufferGetPage(tempbuffer), pages[0], BufferGetPageSize(tempbuffer));
|
2006-05-19 13:10:25 +02:00
|
|
|
|
|
|
|
/* swap buffers[0] (was root) and temp buffer */
|
|
|
|
tmp = buffers[0];
|
|
|
|
buffers[0] = tempbuffer;
|
2006-10-04 02:30:14 +02:00
|
|
|
tempbuffer = tmp; /* now in tempbuffer GIST_ROOT_BLKNO,
|
|
|
|
* it is still unchanged */
|
2006-05-19 13:10:25 +02:00
|
|
|
|
|
|
|
pages[0] = BufferGetPage(buffers[0]);
|
2005-06-20 12:29:37 +02:00
|
|
|
}
|
2006-05-19 13:10:25 +02:00
|
|
|
|
|
|
|
START_CRIT_SECTION();
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
for (j = 0; j < ntodelete; j++)
|
2006-05-19 13:10:25 +02:00
|
|
|
PageIndexTupleDelete(pages[0], todelete[j]);
|
|
|
|
|
2009-12-24 18:52:04 +01:00
|
|
|
xlinfo = XLOG_GIST_PAGE_SPLIT;
|
2006-05-19 13:10:25 +02:00
|
|
|
rdata = formSplitRdata(index->rd_node, insert->path[i],
|
2006-10-04 02:30:14 +02:00
|
|
|
false, &(insert->key),
|
|
|
|
gistMakePageLayout(buffers, numbuffer));
|
2006-05-19 13:10:25 +02:00
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2006-05-19 13:10:25 +02:00
|
|
|
START_CRIT_SECTION();
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
for (j = 0; j < ntodelete; j++)
|
2006-05-19 13:10:25 +02:00
|
|
|
PageIndexTupleDelete(pages[0], todelete[j]);
|
2008-06-12 11:12:31 +02:00
|
|
|
gistfillbuffer(pages[0], itup, lenitup, InvalidOffsetNumber);
|
2006-05-19 13:10:25 +02:00
|
|
|
|
2009-12-24 18:52:04 +01:00
|
|
|
xlinfo = XLOG_GIST_PAGE_UPDATE;
|
2006-10-04 02:30:14 +02:00
|
|
|
rdata = formUpdateRdata(index->rd_node, buffers[0],
|
|
|
|
todelete, ntodelete,
|
|
|
|
itup, lenitup, &(insert->key));
|
2005-09-22 22:44:36 +02:00
|
|
|
}
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
/*
|
|
|
|
* use insert->key as mark for completion of insert (form*Rdata()
|
|
|
|
* above) for following possible replays
|
2006-05-19 13:10:25 +02:00
|
|
|
*/
|
|
|
|
|
2006-05-19 19:15:41 +02:00
|
|
|
/* write pages, we should mark it dirty befor XLogInsert() */
|
2006-10-04 02:30:14 +02:00
|
|
|
for (j = 0; j < numbuffer; j++)
|
|
|
|
{
|
2006-05-19 19:15:41 +02:00
|
|
|
GistPageGetOpaque(pages[j])->rightlink = InvalidBlockNumber;
|
|
|
|
MarkBufferDirty(buffers[j]);
|
|
|
|
}
|
2009-12-24 18:52:04 +01:00
|
|
|
recptr = XLogInsert(RM_GIST_ID, xlinfo, rdata);
|
2005-09-22 22:44:36 +02:00
|
|
|
for (j = 0; j < numbuffer; j++)
|
|
|
|
{
|
2006-05-19 13:10:25 +02:00
|
|
|
PageSetLSN(pages[j], recptr);
|
2005-06-20 12:29:37 +02:00
|
|
|
PageSetTLI(pages[j], ThisTimeLineID);
|
2006-05-19 13:10:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
END_CRIT_SECTION();
|
|
|
|
|
|
|
|
lenitup = numbuffer;
|
2006-10-04 02:30:14 +02:00
|
|
|
for (j = 0; j < numbuffer; j++)
|
|
|
|
{
|
2006-05-19 13:10:25 +02:00
|
|
|
itup[j] = gist_form_invalid_tuple(BufferGetBlockNumber(buffers[j]));
|
2006-04-01 01:32:07 +02:00
|
|
|
UnlockReleaseBuffer(buffers[j]);
|
2005-06-20 12:29:37 +02:00
|
|
|
}
|
2006-05-19 13:10:25 +02:00
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
if (tempbuffer != InvalidBuffer)
|
|
|
|
{
|
2006-05-19 13:10:25 +02:00
|
|
|
/*
|
|
|
|
* it was a root split, so fill it by new values
|
|
|
|
*/
|
|
|
|
gistnewroot(index, tempbuffer, itup, lenitup, &(insert->key));
|
|
|
|
UnlockReleaseBuffer(tempbuffer);
|
|
|
|
}
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
FreeFakeRelcacheEntry(index);
|
|
|
|
|
2005-09-22 20:49:45 +02:00
|
|
|
ereport(LOG,
|
2006-10-04 02:30:14 +02:00
|
|
|
(errmsg("index %u/%u/%u needs VACUUM FULL or REINDEX to finish crash recovery",
|
2005-09-22 22:44:36 +02:00
|
|
|
insert->node.spcNode, insert->node.dbNode, insert->node.relNode),
|
2006-10-04 02:30:14 +02:00
|
|
|
errdetail("Incomplete insertion detected during crash replay.")));
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2005-09-22 22:44:36 +02:00
|
|
|
gist_xlog_startup(void)
|
|
|
|
{
|
|
|
|
incomplete_inserts = NIL;
|
2005-06-14 13:45:14 +02:00
|
|
|
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
|
2005-09-22 22:44:36 +02:00
|
|
|
"GiST recovery temporary context",
|
|
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
2005-06-14 13:45:14 +02:00
|
|
|
opCtx = createTempGistContext();
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2005-09-22 22:44:36 +02:00
|
|
|
gist_xlog_cleanup(void)
|
|
|
|
{
|
2005-06-14 13:45:14 +02:00
|
|
|
ListCell *l;
|
2006-03-31 01:03:10 +02:00
|
|
|
MemoryContext oldCxt;
|
2005-06-30 19:52:14 +02:00
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
oldCxt = MemoryContextSwitchTo(opCtx);
|
2006-05-19 13:10:25 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
foreach(l, incomplete_inserts)
|
|
|
|
{
|
|
|
|
gistIncompleteInsert *insert = (gistIncompleteInsert *) lfirst(l);
|
|
|
|
|
2005-06-14 13:45:14 +02:00
|
|
|
gistContinueInsert(insert);
|
2005-06-30 19:52:14 +02:00
|
|
|
MemoryContextReset(opCtx);
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
2005-06-30 19:52:14 +02:00
|
|
|
MemoryContextSwitchTo(oldCxt);
|
|
|
|
|
2005-06-14 13:45:14 +02:00
|
|
|
MemoryContextDelete(opCtx);
|
2005-09-22 22:44:36 +02:00
|
|
|
MemoryContextDelete(insertCtx);
|
2005-06-14 13:45:14 +02:00
|
|
|
}
|
|
|
|
|
2006-08-07 18:57:57 +02:00
|
|
|
bool
|
|
|
|
gist_safe_restartpoint(void)
|
|
|
|
{
|
|
|
|
if (incomplete_inserts)
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2005-06-20 12:29:37 +02:00
|
|
|
|
|
|
|
XLogRecData *
|
2006-03-31 01:03:10 +02:00
|
|
|
formSplitRdata(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
|
2005-09-22 22:44:36 +02:00
|
|
|
ItemPointer key, SplitedPageLayout *dist)
|
|
|
|
{
|
|
|
|
XLogRecData *rdata;
|
|
|
|
gistxlogPageSplit *xlrec = (gistxlogPageSplit *) palloc(sizeof(gistxlogPageSplit));
|
|
|
|
SplitedPageLayout *ptr;
|
|
|
|
int npage = 0,
|
|
|
|
cur = 1;
|
|
|
|
|
|
|
|
ptr = dist;
|
|
|
|
while (ptr)
|
|
|
|
{
|
2005-06-20 12:29:37 +02:00
|
|
|
npage++;
|
2005-09-22 22:44:36 +02:00
|
|
|
ptr = ptr->next;
|
2005-06-20 12:29:37 +02:00
|
|
|
}
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
rdata = (XLogRecData *) palloc(sizeof(XLogRecData) * (npage * 2 + 2));
|
2005-06-20 12:29:37 +02:00
|
|
|
|
|
|
|
xlrec->node = node;
|
|
|
|
xlrec->origblkno = blkno;
|
2006-03-31 01:03:10 +02:00
|
|
|
xlrec->origleaf = page_is_leaf;
|
2005-09-22 22:44:36 +02:00
|
|
|
xlrec->npage = (uint16) npage;
|
|
|
|
if (key)
|
2005-06-20 12:29:37 +02:00
|
|
|
xlrec->key = *key;
|
|
|
|
else
|
2005-09-22 22:44:36 +02:00
|
|
|
ItemPointerSetInvalid(&(xlrec->key));
|
|
|
|
|
2005-06-20 12:29:37 +02:00
|
|
|
rdata[0].buffer = InvalidBuffer;
|
2005-09-22 22:44:36 +02:00
|
|
|
rdata[0].data = (char *) xlrec;
|
|
|
|
rdata[0].len = sizeof(gistxlogPageSplit);
|
|
|
|
rdata[0].next = NULL;
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
ptr = dist;
|
|
|
|
while (ptr)
|
|
|
|
{
|
2005-06-20 12:29:37 +02:00
|
|
|
rdata[cur].buffer = InvalidBuffer;
|
2005-09-22 22:44:36 +02:00
|
|
|
rdata[cur].data = (char *) &(ptr->block);
|
|
|
|
rdata[cur].len = sizeof(gistxlogPage);
|
|
|
|
rdata[cur - 1].next = &(rdata[cur]);
|
2005-06-20 12:29:37 +02:00
|
|
|
cur++;
|
|
|
|
|
|
|
|
rdata[cur].buffer = InvalidBuffer;
|
2005-09-22 22:44:36 +02:00
|
|
|
rdata[cur].data = (char *) (ptr->list);
|
|
|
|
rdata[cur].len = ptr->lenlist;
|
|
|
|
rdata[cur - 1].next = &(rdata[cur]);
|
|
|
|
rdata[cur].next = NULL;
|
2005-06-20 12:29:37 +02:00
|
|
|
cur++;
|
2005-09-22 22:44:36 +02:00
|
|
|
ptr = ptr->next;
|
2005-06-20 12:29:37 +02:00
|
|
|
}
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
return rdata;
|
2005-06-20 12:29:37 +02:00
|
|
|
}
|
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
/*
|
|
|
|
* Construct the rdata array for an XLOG record describing a page update
|
|
|
|
* (deletion and/or insertion of tuples on a single index page).
|
|
|
|
*
|
|
|
|
* Note that both the todelete array and the tuples are marked as belonging
|
|
|
|
* to the target buffer; they need not be stored in XLOG if XLogInsert decides
|
|
|
|
* to log the whole buffer contents instead. Also, we take care that there's
|
|
|
|
* at least one rdata item referencing the buffer, even when ntodelete and
|
|
|
|
* ituplen are both zero; this ensures that XLogInsert knows about the buffer.
|
|
|
|
*/
|
2005-06-20 12:29:37 +02:00
|
|
|
XLogRecData *
|
2006-03-31 01:03:10 +02:00
|
|
|
formUpdateRdata(RelFileNode node, Buffer buffer,
|
2006-05-17 18:34:59 +02:00
|
|
|
OffsetNumber *todelete, int ntodelete,
|
2005-09-22 22:44:36 +02:00
|
|
|
IndexTuple *itup, int ituplen, ItemPointer key)
|
|
|
|
{
|
|
|
|
XLogRecData *rdata;
|
2006-03-31 01:03:10 +02:00
|
|
|
gistxlogPageUpdate *xlrec;
|
|
|
|
int cur,
|
|
|
|
i;
|
|
|
|
|
2006-05-17 18:34:59 +02:00
|
|
|
rdata = (XLogRecData *) palloc(sizeof(XLogRecData) * (3 + ituplen));
|
2006-03-31 01:03:10 +02:00
|
|
|
xlrec = (gistxlogPageUpdate *) palloc(sizeof(gistxlogPageUpdate));
|
2005-06-20 12:29:37 +02:00
|
|
|
|
|
|
|
xlrec->node = node;
|
2006-03-31 01:03:10 +02:00
|
|
|
xlrec->blkno = BufferGetBlockNumber(buffer);
|
|
|
|
xlrec->ntodelete = ntodelete;
|
2006-05-17 18:34:59 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
if (key)
|
2005-06-20 12:29:37 +02:00
|
|
|
xlrec->key = *key;
|
|
|
|
else
|
2005-09-22 22:44:36 +02:00
|
|
|
ItemPointerSetInvalid(&(xlrec->key));
|
|
|
|
|
2006-05-17 18:34:59 +02:00
|
|
|
rdata[0].buffer = buffer;
|
|
|
|
rdata[0].buffer_std = true;
|
|
|
|
rdata[0].data = NULL;
|
|
|
|
rdata[0].len = 0;
|
2006-03-31 01:03:10 +02:00
|
|
|
rdata[0].next = &(rdata[1]);
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2006-05-17 18:34:59 +02:00
|
|
|
rdata[1].data = (char *) xlrec;
|
|
|
|
rdata[1].len = sizeof(gistxlogPageUpdate);
|
|
|
|
rdata[1].buffer = InvalidBuffer;
|
|
|
|
rdata[1].next = &(rdata[2]);
|
|
|
|
|
|
|
|
rdata[2].data = (char *) todelete;
|
|
|
|
rdata[2].len = MAXALIGN(sizeof(OffsetNumber) * ntodelete);
|
|
|
|
rdata[2].buffer = buffer;
|
|
|
|
rdata[2].buffer_std = true;
|
|
|
|
rdata[2].next = NULL;
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2006-03-31 01:03:10 +02:00
|
|
|
/* new tuples */
|
2006-05-17 18:34:59 +02:00
|
|
|
cur = 3;
|
2006-03-31 01:03:10 +02:00
|
|
|
for (i = 0; i < ituplen; i++)
|
|
|
|
{
|
|
|
|
rdata[cur - 1].next = &(rdata[cur]);
|
|
|
|
rdata[cur].data = (char *) (itup[i]);
|
|
|
|
rdata[cur].len = IndexTupleSize(itup[i]);
|
|
|
|
rdata[cur].buffer = buffer;
|
|
|
|
rdata[cur].buffer_std = true;
|
|
|
|
rdata[cur].next = NULL;
|
|
|
|
cur++;
|
2005-06-20 12:29:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return rdata;
|
|
|
|
}
|
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
XLogRecPtr
|
|
|
|
gistxlogInsertCompletion(RelFileNode node, ItemPointerData *keys, int len)
|
|
|
|
{
|
|
|
|
gistxlogInsertComplete xlrec;
|
|
|
|
XLogRecData rdata[2];
|
|
|
|
XLogRecPtr recptr;
|
2005-06-20 12:29:37 +02:00
|
|
|
|
2005-09-22 22:44:36 +02:00
|
|
|
Assert(len > 0);
|
2005-06-20 12:29:37 +02:00
|
|
|
xlrec.node = node;
|
|
|
|
|
|
|
|
rdata[0].buffer = InvalidBuffer;
|
2005-09-22 22:44:36 +02:00
|
|
|
rdata[0].data = (char *) &xlrec;
|
|
|
|
rdata[0].len = sizeof(gistxlogInsertComplete);
|
|
|
|
rdata[0].next = &(rdata[1]);
|
2005-06-20 12:29:37 +02:00
|
|
|
|
|
|
|
rdata[1].buffer = InvalidBuffer;
|
2005-09-22 22:44:36 +02:00
|
|
|
rdata[1].data = (char *) keys;
|
|
|
|
rdata[1].len = sizeof(ItemPointerData) * len;
|
|
|
|
rdata[1].next = NULL;
|
2005-06-20 12:29:37 +02:00
|
|
|
|
|
|
|
START_CRIT_SECTION();
|
|
|
|
|
|
|
|
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_INSERT_COMPLETE, rdata);
|
2005-09-22 22:44:36 +02:00
|
|
|
|
2005-06-20 12:29:37 +02:00
|
|
|
END_CRIT_SECTION();
|
|
|
|
|
|
|
|
return recptr;
|
|
|
|
}
|