Fix a serious bug introduced into GIN in 8.4: now that MergeItemPointers()

is supposed to remove duplicate heap TIDs, we have to be sure to reduce the
tuple size and posting-item count accordingly in addItemPointersToTuple().
Failing to do so resulted in the effective injection of garbage TIDs into the
index contents, ie, whatever happened to be in the memory palloc'd for the
new tuple.  I'm not sure that this fully explains the index corruption
reported by Tatsuo Ishii, but the test case I'm using no longer fails.
This commit is contained in:
Tom Lane 2009-06-06 02:39:40 +00:00
parent 1978d7f13f
commit 356eea24ce
4 changed files with 62 additions and 26 deletions

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.14 2009/03/24 20:17:10 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.15 2009/06/06 02:39:40 tgl Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -32,10 +32,14 @@ compareItemPointers(ItemPointer a, ItemPointer b)
} }
/* /*
* Merge two ordered array of itempointer * Merge two ordered arrays of itempointers, eliminating any duplicates.
* Returns the number of items in the result.
* Caller is responsible that there is enough space at *dst.
*/ */
void uint32
MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb) MergeItemPointers(ItemPointerData *dst,
ItemPointerData *a, uint32 na,
ItemPointerData *b, uint32 nb)
{ {
ItemPointerData *dptr = dst; ItemPointerData *dptr = dst;
ItemPointerData *aptr = a, ItemPointerData *aptr = a,
@ -62,6 +66,8 @@ MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPoint
while (bptr - b < nb) while (bptr - b < nb)
*dptr++ = *bptr++; *dptr++ = *bptr++;
return dptr - dst;
} }
/* /*

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.19 2009/01/01 17:23:34 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.20 2009/06/06 02:39:40 tgl Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -20,28 +20,33 @@
#include "utils/rel.h" #include "utils/rel.h"
/* /*
* forms tuple for entry tree. On leaf page, Index tuple has * Form a tuple for entry tree.
* non-traditional layout. Tuple may contain posting list or *
* root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno) * On leaf pages, Index tuple has non-traditional layout. Tuple may contain
* posting list or root blocknumber of posting tree.
* Macros: GinIsPostingTree(itup) / GinSetPostingTree(itup, blkno)
* 1) Posting list * 1) Posting list
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual * - itup->t_info & INDEX_SIZE_MASK contains total size of tuple as usual
* - ItemPointerGetBlockNumber(&itup->t_tid) contains original * - ItemPointerGetBlockNumber(&itup->t_tid) contains original
* size of tuple (without posting list). * size of tuple (without posting list).
* Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n) * Macros: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains number * - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
* of elements in posting list (number of heap itempointer) * of elements in posting list (number of heap itempointers)
* Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n) * Macros: GinGetNPosting(itup) / GinSetNPosting(itup,n)
* - After usual part of tuple there is a posting list * - After standard part of tuple there is a posting list, ie, array
* of heap itempointers
* Macros: GinGetPosting(itup) * Macros: GinGetPosting(itup)
* 2) Posting tree * 2) Posting tree
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
* - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of * - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
* root of posting tree * root of posting tree
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number GIN_TREE_POSTING * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number
* GIN_TREE_POSTING, which distinguishes this from posting-list case
* *
* Storage of attributes of tuple are different for single and multicolumn index. * Attributes of an index tuple are different for single and multicolumn index.
* For single-column index tuple stores only value to be indexed and for * For single-column case, index tuple stores only value to be indexed.
* multicolumn variant it stores two attributes: column number of value and value. * For multicolumn case, it stores two attributes: column number of value
* and value.
*/ */
IndexTuple IndexTuple
GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData *ipd, uint32 nipd) GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData *ipd, uint32 nipd)
@ -89,6 +94,28 @@ GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData
return itup; return itup;
} }
/*
* Sometimes we reduce the number of posting list items in a tuple after
* having built it with GinFormTuple. This function adjusts the size
* fields to match.
*/
void
GinShortenTuple(IndexTuple itup, uint32 nipd)
{
uint32 newsize;
Assert(nipd <= GinGetNPosting(itup));
newsize = MAXALIGN(SHORTALIGN(GinGetOrigSizePosting(itup)) + sizeof(ItemPointerData) * nipd);
Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK));
itup->t_info &= ~INDEX_SIZE_MASK;
itup->t_info |= newsize;
GinSetNPosting(itup, nipd);
}
/* /*
* Entry tree is a "static", ie tuple never deletes from it, * Entry tree is a "static", ie tuple never deletes from it,
* so we don't use right bound, we use rightest key instead. * so we don't use right bound, we use rightest key instead.

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.20 2009/03/24 22:06:03 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.21 2009/06/06 02:39:40 tgl Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -102,17 +102,19 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
{ {
Datum key = gin_index_getattr(ginstate, old); Datum key = gin_index_getattr(ginstate, old);
OffsetNumber attnum = gintuple_get_attrnum(ginstate, old); OffsetNumber attnum = gintuple_get_attrnum(ginstate, old);
IndexTuple res = GinFormTuple(ginstate, attnum, key, NULL, nitem + GinGetNPosting(old)); IndexTuple res = GinFormTuple(ginstate, attnum, key,
NULL, nitem + GinGetNPosting(old));
if (res) if (res)
{ {
/* good, small enough */ /* good, small enough */
MergeItemPointers(GinGetPosting(res), uint32 newnitem;
GinGetPosting(old), GinGetNPosting(old),
items, nitem
);
GinSetNPosting(res, nitem + GinGetNPosting(old)); newnitem = MergeItemPointers(GinGetPosting(res),
GinGetPosting(old), GinGetNPosting(old),
items, nitem);
/* merge might have eliminated some duplicate items */
GinShortenTuple(res, newnitem);
} }
else else
{ {

View File

@ -4,7 +4,7 @@
* *
* Copyright (c) 2006-2009, PostgreSQL Global Development Group * Copyright (c) 2006-2009, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.32 2009/06/05 18:50:47 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.33 2009/06/06 02:39:40 tgl Exp $
*-------------------------------------------------------------------------- *--------------------------------------------------------------------------
*/ */
#ifndef GIN_H #ifndef GIN_H
@ -435,6 +435,7 @@ extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBl
/* ginentrypage.c */ /* ginentrypage.c */
extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key,
ItemPointerData *ipd, uint32 nipd); ItemPointerData *ipd, uint32 nipd);
extern void GinShortenTuple(IndexTuple itup, uint32 nipd);
extern void prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, extern void prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum,
Datum value, GinState *ginstate); Datum value, GinState *ginstate);
extern void entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); extern void entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
@ -442,7 +443,7 @@ extern IndexTuple ginPageGetLinkItup(Buffer buf);
/* gindatapage.c */ /* gindatapage.c */
extern int compareItemPointers(ItemPointer a, ItemPointer b); extern int compareItemPointers(ItemPointer a, ItemPointer b);
extern void MergeItemPointers(ItemPointerData *dst, extern uint32 MergeItemPointers(ItemPointerData *dst,
ItemPointerData *a, uint32 na, ItemPointerData *a, uint32 na,
ItemPointerData *b, uint32 nb); ItemPointerData *b, uint32 nb);