From 898a7bd13bb9d5ae36d2defcf3bbed3bd1d5ffd6 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Thu, 30 Jun 2005 17:52:14 +0000 Subject: [PATCH] Bug fixes for GiST crash recovery. - add forgotten check of lsn for insert completion - remove level of pages: hard to check in recovery - some cleanups --- src/backend/access/gist/gist.c | 25 +++--- src/backend/access/gist/gistutil.c | 4 +- src/backend/access/gist/gistvacuum.c | 3 +- src/backend/access/gist/gistxlog.c | 118 ++++++++++++++++++--------- src/include/access/gist.h | 10 +-- src/include/access/gist_private.h | 4 +- 6 files changed, 97 insertions(+), 67 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 2e75225253..5ce3fceba6 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.124 2005/06/29 14:06:14 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.125 2005/06/30 17:52:13 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -587,7 +587,7 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate) * Should have the same interface as XLogReadBuffer */ static Buffer -gistReadAndLockBuffer( bool unused, Relation r, BlockNumber blkno ) { +gistReadAndLockBuffer( Relation r, BlockNumber blkno ) { Buffer buffer = ReadBuffer( r, blkno ); LockBuffer( buffer, GIST_SHARE ); return buffer; @@ -601,7 +601,7 @@ gistReadAndLockBuffer( bool unused, Relation r, BlockNumber blkno ) { * returns from the begining of closest parent; */ GISTInsertStack* -gistFindPath( Relation r, BlockNumber child, Buffer (*myReadBuffer)(bool, Relation, BlockNumber) ) { +gistFindPath( Relation r, BlockNumber child, Buffer (*myReadBuffer)(Relation, BlockNumber) ) { Page page; Buffer buffer; OffsetNumber i, maxoff; @@ -614,9 +614,15 @@ gistFindPath( Relation r, BlockNumber child, Buffer (*myReadBuffer)(bool, Relat top->blkno = GIST_ROOT_BLKNO; while( top && top->blkno != child ) { - buffer = myReadBuffer(false, r, top->blkno); /* buffer locked */ + buffer = myReadBuffer(r, top->blkno); /* buffer locked */ page = (Page)BufferGetPage( buffer ); - Assert( !GistPageIsLeaf(page) ); + + if ( GistPageIsLeaf(page) ) { + /* we can safety go away, follows only leaf pages */ + LockBuffer( buffer, GIST_UNLOCK ); + ReleaseBuffer( buffer ); + return NULL; + } top->lsn = PageGetLSN(page); @@ -662,7 +668,7 @@ gistFindPath( Relation r, BlockNumber child, Buffer (*myReadBuffer)(bool, Relat LockBuffer( buffer, GIST_UNLOCK ); ReleaseBuffer( buffer ); return top; - } else if ( GistPageGetOpaque(page)->level> 0 ) { + } else { /* Install next inner page to the end of stack */ ptr = (GISTInsertStack*)palloc0( sizeof(GISTInsertStack) ); ptr->blkno = blkno; @@ -855,11 +861,9 @@ gistSplit(Relation r, OffsetNumber *realoffset; IndexTuple *cleaneditup = itup; int lencleaneditup = *len; - int level; p = (Page) BufferGetPage(buffer); opaque = GistPageGetOpaque(p); - level = opaque->level; /* * The root of the tree is the first block in the relation. If we're @@ -872,7 +876,6 @@ gistSplit(Relation r, GISTInitBuffer(leftbuf, opaque->flags&F_LEAF); lbknum = BufferGetBlockNumber(leftbuf); left = (Page) BufferGetPage(leftbuf); - GistPageGetOpaque(left)->level = level; } else { @@ -886,7 +889,6 @@ gistSplit(Relation r, GISTInitBuffer(rightbuf, opaque->flags&F_LEAF); rbknum = BufferGetBlockNumber(rightbuf); right = (Page) BufferGetPage(rightbuf); - GistPageGetOpaque(right)->level = level; /* generate the item array */ realoffset = palloc((*len + 1) * sizeof(OffsetNumber)); @@ -1068,13 +1070,10 @@ void gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer key) { Page page; - int level; Assert( BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO ); page = BufferGetPage(buffer); - level = GistPageGetOpaque(page)->level; GISTInitBuffer(buffer, 0); - GistPageGetOpaque(page)->level = level+1; gistfillbuffer(r, page, itup, len, FirstOffsetNumber); if ( !r->rd_istemp ) { diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 031914a37c..5b6d13a7a3 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.4 2005/06/28 15:51:00 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.5 2005/06/30 17:52:14 teodor Exp $ *------------------------------------------------------------------------- */ #include "postgres.h" @@ -809,8 +809,6 @@ GISTInitBuffer(Buffer b, uint32 f) opaque = GistPageGetOpaque(page); opaque->flags = f; - opaque->nsplited = 0; - opaque->level = 0; opaque->rightlink = InvalidBlockNumber; memset( &(opaque->nsn), 0, sizeof(GistNSN) ); } diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index cf6d89d27b..381cf98559 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.5 2005/06/29 14:06:14 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.6 2005/06/30 17:52:14 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -60,7 +60,6 @@ gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) { page = (Page) BufferGetPage(buffer); maxoff = PageGetMaxOffsetNumber(page); - if ( GistPageIsLeaf(page) ) { if ( GistTuplesDeleted(page) ) { needunion = needwrite = true; diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 30fd5b71ee..15acb18c80 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.5 2005/06/28 15:51:00 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.6 2005/06/30 17:52:14 teodor Exp $ *------------------------------------------------------------------------- */ #include "postgres.h" @@ -44,6 +44,7 @@ typedef struct { typedef struct gistIncompleteInsert { RelFileNode node; + BlockNumber origblkno; /* for splits */ ItemPointerData key; int lenblk; BlockNumber *blkno; @@ -79,6 +80,7 @@ pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key, ninsert->lenblk = lenblk; ninsert->blkno = (BlockNumber*)palloc( sizeof(BlockNumber)*ninsert->lenblk ); memcpy(ninsert->blkno, blkno, sizeof(BlockNumber)*ninsert->lenblk); + ninsert->origblkno = *blkno; } else { int i; @@ -87,6 +89,7 @@ pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key, ninsert->blkno = (BlockNumber*)palloc( sizeof(BlockNumber)*ninsert->lenblk ); for(i=0;ilenblk;i++) ninsert->blkno[i] = xlinfo->page[i].header->blkno; + ninsert->origblkno = xlinfo->data->origblkno; } Assert( ninsert->lenblk>0 ); @@ -209,6 +212,7 @@ gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) { PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); + GistPageGetOpaque(page)->rightlink = InvalidBlockNumber; LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); @@ -466,81 +470,98 @@ gist_form_invalid_tuple(BlockNumber blkno) { return tuple; } +static Buffer +gistXLogReadAndLockBuffer( Relation r, BlockNumber blkno ) { + Buffer buffer = XLogReadBuffer( false, r, blkno ); + if (!BufferIsValid(buffer)) + elog(PANIC, "gistXLogReadAndLockBuffer: block %u unfound", blkno); + if ( PageIsNew( (PageHeader)(BufferGetPage(buffer)) ) ) + elog(PANIC, "gistXLogReadAndLockBuffer: uninitialized page %u", blkno); + + return buffer; +} + + static void gixtxlogFindPath( Relation index, gistIncompleteInsert *insert ) { - int i; GISTInsertStack *top; insert->pathlen = 0; insert->path = NULL; - for(i=0;insert->lenblk;i++) { - if ( (top=gistFindPath(index, insert->blkno[i], XLogReadBuffer)) != NULL ) { - GISTInsertStack *ptr=top; - while(ptr) { - insert->pathlen++; - ptr = ptr->parent; - } - - insert->path=(BlockNumber*)palloc( sizeof(BlockNumber) * insert->pathlen ); - - i=0; - ptr = top; - while(ptr) { - insert->path[i] = ptr->blkno; - i++; - ptr = ptr->parent; - } - break; + if ( (top=gistFindPath(index, insert->origblkno, gistXLogReadAndLockBuffer)) != NULL ) { + int i; + GISTInsertStack *ptr=top; + while(ptr) { + insert->pathlen++; + ptr = ptr->parent; } - } + + insert->path=(BlockNumber*)palloc( sizeof(BlockNumber) * insert->pathlen ); + + i=0; + ptr = top; + while(ptr) { + insert->path[i] = ptr->blkno; + i++; + ptr = ptr->parent; + } + } else + elog(LOG, "gixtxlogFindPath: lost parent for block %u", insert->origblkno); } static void gistContinueInsert(gistIncompleteInsert *insert) { IndexTuple *itup; int i, lenitup; - MemoryContext oldCxt; Relation index; - oldCxt = MemoryContextSwitchTo(opCtx); - index = XLogOpenRelation(insert->node); - if (!RelationIsValid(index)) + if (!RelationIsValid(index)) return; - elog(LOG,"Detected incomplete insert into GiST index %u/%u/%u; It's desirable to vacuum or reindex index", - insert->node.spcNode, insert->node.dbNode, insert->node.relNode); - /* needed vector itup never will be more than initial lenblkno+2, because during this processing Indextuple can be only smaller */ lenitup = insert->lenblk; itup = (IndexTuple*)palloc(sizeof(IndexTuple)*(lenitup+2 /*guarantee root split*/)); - for(i=0;ilenblk;i++) + for(i=0;ilenblk;i++) itup[i] = gist_form_invalid_tuple( insert->blkno[i] ); - /* construct path */ - gixtxlogFindPath( index, insert ); - - if ( insert->pathlen==0 ) { - /*it was split root, so we should only make new root*/ + if ( insert->origblkno==GIST_ROOT_BLKNO ) { + /*it was split root, so we should only make new root. + it can't be simple insert into root, look at call + pushIncompleteInsert in gistRedoPageSplitRecord */ Buffer buffer = XLogReadBuffer(true, index, GIST_ROOT_BLKNO); Page page; if (!BufferIsValid(buffer)) elog(PANIC, "gistContinueInsert: root block unfound"); + page = BufferGetPage(buffer); + if (XLByteLE(insert->lsn, PageGetLSN(page))) { + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + return; + } + GISTInitBuffer(buffer, 0); page = BufferGetPage(buffer); gistfillbuffer(index, page, itup, lenitup, FirstOffsetNumber); + PageSetLSN(page, insert->lsn); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } else { Buffer *buffers; Page *pages; int numbuffer; - + + /* construct path */ + gixtxlogFindPath( index, insert ); + + Assert( insert->pathlen > 0 ); + buffers= (Buffer*) palloc( sizeof(Buffer) * (insert->lenblk+2/*guarantee root split*/) ); pages = (Page*) palloc( sizeof(Page ) * (insert->lenblk+2/*guarantee root split*/) ); @@ -555,6 +576,12 @@ gistContinueInsert(gistIncompleteInsert *insert) { if ( PageIsNew((PageHeader)(pages[numbuffer-1])) ) elog(PANIC, "gistContinueInsert: uninitialized page"); + if (XLByteLE(insert->lsn, PageGetLSN(pages[numbuffer-1]))) { + LockBuffer(buffers[numbuffer-1], BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffers[numbuffer-1]); + return; + } + pituplen = PageGetMaxOffsetNumber(pages[numbuffer-1]); /* remove old IndexTuples */ @@ -587,9 +614,10 @@ gistContinueInsert(gistIncompleteInsert *insert) { if ( BufferGetBlockNumber( buffers[0] ) == GIST_ROOT_BLKNO ) { IndexTuple *parentitup; + /* we split root, just copy tuples from old root to new page */ parentitup = gistextractbuffer(buffers[numbuffer-1], &pituplen); - /* we split root, just copy tuples from old root to new page */ + /* sanity check */ if ( i+1 != insert->pathlen ) elog(PANIC,"gistContinueInsert: can't restore index '%s'", RelationGetRelationName( index )); @@ -624,14 +652,15 @@ gistContinueInsert(gistIncompleteInsert *insert) { itup[j]=gist_form_invalid_tuple( BufferGetBlockNumber( buffers[j] ) ); PageSetLSN(pages[j], insert->lsn); PageSetTLI(pages[j], ThisTimeLineID); + GistPageGetOpaque(pages[j])->rightlink = InvalidBlockNumber; LockBuffer(buffers[j], BUFFER_LOCK_UNLOCK); WriteBuffer( buffers[j] ); } } } - MemoryContextSwitchTo(oldCxt); - MemoryContextReset(opCtx); + elog(LOG,"Detected incomplete insert into GiST index %u/%u/%u; It's desirable to vacuum or reindex index", + insert->node.spcNode, insert->node.dbNode, insert->node.relNode); } void @@ -648,11 +677,22 @@ gist_xlog_startup(void) { void gist_xlog_cleanup(void) { ListCell *l; + List *reverse=NIL; + MemoryContext oldCxt = MemoryContextSwitchTo(insertCtx); - foreach(l, incomplete_inserts) { + /* we should call gistContinueInsert in reverse order */ + + foreach(l, incomplete_inserts) + reverse = lappend(reverse, lfirst(l)); + + MemoryContextSwitchTo(opCtx); + foreach(l, reverse) { gistIncompleteInsert *insert = (gistIncompleteInsert*) lfirst(l); gistContinueInsert(insert); + MemoryContextReset(opCtx); } + MemoryContextSwitchTo(oldCxt); + MemoryContextDelete(opCtx); MemoryContextDelete(insertCtx); } diff --git a/src/include/access/gist.h b/src/include/access/gist.h index ee060e83c2..44fe84ee38 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -9,7 +9,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/gist.h,v 1.48 2005/06/27 12:45:22 teodor Exp $ + * $PostgreSQL: pgsql/src/include/access/gist.h,v 1.49 2005/06/30 17:52:14 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -45,13 +45,7 @@ typedef XLogRecPtr GistNSN; typedef struct GISTPageOpaqueData { - uint8 flags; - - /* number page to which current one is splitted in last split */ - uint8 nsplited; - - /* level of page, 0 - leaf */ - uint16 level; + uint32 flags; /* 29 bits are unused for now */ BlockNumber rightlink; /* the only meaning - change this value if diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 6ea4dccb68..a14df2e377 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.6 2005/06/27 12:45:22 teodor Exp $ + * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.7 2005/06/30 17:52:14 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -234,7 +234,7 @@ extern IndexTuple * gistSplit(Relation r, Buffer buffer, IndexTuple *itup, int *len, SplitedPageLayout **dist, GISTSTATE *giststate); extern GISTInsertStack* gistFindPath( Relation r, BlockNumber child, - Buffer (*myReadBuffer)(bool, Relation, BlockNumber) ); + Buffer (*myReadBuffer)(Relation, BlockNumber) ); /* gistxlog.c */ extern void gist_redo(XLogRecPtr lsn, XLogRecord *record); extern void gist_desc(char *buf, uint8 xl_info, char *rec);