Fix a serious bug introduced into GIN in 8.4: now that MergeItemPointers()

is supposed to remove duplicate heap TIDs, we have to be sure to reduce the
tuple size and posting-item count accordingly in addItemPointersToTuple().
Failing to do so resulted in the effective injection of garbage TIDs into the
index contents, ie, whatever happened to be in the memory palloc'd for the
new tuple.  I'm not sure that this fully explains the index corruption
reported by Tatsuo Ishii, but the test case I'm using no longer fails.
This commit is contained in:
Tom Lane 2009-06-06 02:39:40 +00:00
parent 1978d7f13f
commit 356eea24ce
4 changed files with 62 additions and 26 deletions

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.14 2009/03/24 20:17:10 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.15 2009/06/06 02:39:40 tgl Exp $
*-------------------------------------------------------------------------
*/
@ -32,10 +32,14 @@ compareItemPointers(ItemPointer a, ItemPointer b)
}
/*
* Merge two ordered array of itempointer
* Merge two ordered arrays of itempointers, eliminating any duplicates.
* Returns the number of items in the result.
* Caller is responsible that there is enough space at *dst.
*/
void
MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb)
uint32
MergeItemPointers(ItemPointerData *dst,
ItemPointerData *a, uint32 na,
ItemPointerData *b, uint32 nb)
{
ItemPointerData *dptr = dst;
ItemPointerData *aptr = a,
@ -62,6 +66,8 @@ MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPoint
while (bptr - b < nb)
*dptr++ = *bptr++;
return dptr - dst;
}
/*

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.19 2009/01/01 17:23:34 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.20 2009/06/06 02:39:40 tgl Exp $
*-------------------------------------------------------------------------
*/
@ -20,28 +20,33 @@
#include "utils/rel.h"
/*
* forms tuple for entry tree. On leaf page, Index tuple has
* non-traditional layout. Tuple may contain posting list or
* root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno)
* Form a tuple for entry tree.
*
* On leaf pages, Index tuple has non-traditional layout. Tuple may contain
* posting list or root blocknumber of posting tree.
* Macros: GinIsPostingTree(itup) / GinSetPostingTree(itup, blkno)
* 1) Posting list
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
* - itup->t_info & INDEX_SIZE_MASK contains total size of tuple as usual
* - ItemPointerGetBlockNumber(&itup->t_tid) contains original
* size of tuple (without posting list).
* Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
* Macros: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
* of elements in posting list (number of heap itempointer)
* Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n)
* - After usual part of tuple there is a posting list
* of elements in posting list (number of heap itempointers)
* Macros: GinGetNPosting(itup) / GinSetNPosting(itup,n)
* - After standard part of tuple there is a posting list, ie, array
* of heap itempointers
* Macros: GinGetPosting(itup)
* 2) Posting tree
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
* - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
* root of posting tree
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number GIN_TREE_POSTING
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number
* GIN_TREE_POSTING, which distinguishes this from posting-list case
*
* Storage of attributes of tuple are different for single and multicolumn index.
* For single-column index tuple stores only value to be indexed and for
* multicolumn variant it stores two attributes: column number of value and value.
* Attributes of an index tuple are different for single and multicolumn index.
* For single-column case, index tuple stores only value to be indexed.
* For multicolumn case, it stores two attributes: column number of value
* and value.
*/
IndexTuple
GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData *ipd, uint32 nipd)
@ -89,6 +94,28 @@ GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData
return itup;
}
/*
* Sometimes we reduce the number of posting list items in a tuple after
* having built it with GinFormTuple. This function adjusts the size
* fields to match.
*/
void
GinShortenTuple(IndexTuple itup, uint32 nipd)
{
uint32 newsize;
Assert(nipd <= GinGetNPosting(itup));
newsize = MAXALIGN(SHORTALIGN(GinGetOrigSizePosting(itup)) + sizeof(ItemPointerData) * nipd);
Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK));
itup->t_info &= ~INDEX_SIZE_MASK;
itup->t_info |= newsize;
GinSetNPosting(itup, nipd);
}
/*
* Entry tree is a "static", ie tuple never deletes from it,
* so we don't use right bound, we use rightest key instead.

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.20 2009/03/24 22:06:03 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.21 2009/06/06 02:39:40 tgl Exp $
*-------------------------------------------------------------------------
*/
@ -102,17 +102,19 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
{
Datum key = gin_index_getattr(ginstate, old);
OffsetNumber attnum = gintuple_get_attrnum(ginstate, old);
IndexTuple res = GinFormTuple(ginstate, attnum, key, NULL, nitem + GinGetNPosting(old));
IndexTuple res = GinFormTuple(ginstate, attnum, key,
NULL, nitem + GinGetNPosting(old));
if (res)
{
/* good, small enough */
MergeItemPointers(GinGetPosting(res),
GinGetPosting(old), GinGetNPosting(old),
items, nitem
);
uint32 newnitem;
GinSetNPosting(res, nitem + GinGetNPosting(old));
newnitem = MergeItemPointers(GinGetPosting(res),
GinGetPosting(old), GinGetNPosting(old),
items, nitem);
/* merge might have eliminated some duplicate items */
GinShortenTuple(res, newnitem);
}
else
{

View File

@ -4,7 +4,7 @@
*
* Copyright (c) 2006-2009, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.32 2009/06/05 18:50:47 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.33 2009/06/06 02:39:40 tgl Exp $
*--------------------------------------------------------------------------
*/
#ifndef GIN_H
@ -435,6 +435,7 @@ extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBl
/* ginentrypage.c */
extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key,
ItemPointerData *ipd, uint32 nipd);
extern void GinShortenTuple(IndexTuple itup, uint32 nipd);
extern void prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum,
Datum value, GinState *ginstate);
extern void entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
@ -442,7 +443,7 @@ extern IndexTuple ginPageGetLinkItup(Buffer buf);
/* gindatapage.c */
extern int compareItemPointers(ItemPointer a, ItemPointer b);
extern void MergeItemPointers(ItemPointerData *dst,
extern uint32 MergeItemPointers(ItemPointerData *dst,
ItemPointerData *a, uint32 na,
ItemPointerData *b, uint32 nb);