diff --git a/contrib/intarray/expected/_int.out b/contrib/intarray/expected/_int.out index 7dd28ea145..2f33c7e18d 100644 --- a/contrib/intarray/expected/_int.out +++ b/contrib/intarray/expected/_int.out @@ -857,4 +857,95 @@ SELECT count(*) from test__int WHERE a @@ '!20 & !21'; 6343 (1 row) +DROP INDEX text_idx; +-- Repeat the same queries with an extended data set. The data set is the +-- same that we used before, except that each element in the array is +-- repeated three times, offset by 1000 and 2000. For example, {1, 5} +-- becomes {1, 1001, 2001, 5, 1005, 2005}. +-- +-- That has proven to be unreasonably effective at exercising codepaths in +-- core GiST code related to splitting parent pages, which is not covered by +-- other tests. This is a bit out-of-place as the point is to test core GiST +-- code rather than this extension, but there is no suitable GiST opclass in +-- core that would reach the same codepaths. +CREATE TABLE more__int AS SELECT + -- Leave alone NULLs, empty arrays and the one row that we use to test + -- equality + CASE WHEN a IS NULL OR a = '{}' OR a = '{73,23,20}' THEN a ELSE + (select array_agg(u) || array_agg(u + 1000) || array_agg(u + 2000) from (select unnest(a) u) x) + END AS a, a as b + FROM test__int; +CREATE INDEX ON more__int using gist (a gist__int_ops(numranges = 252)); +SELECT count(*) from more__int WHERE a && '{23,50}'; + count +------- + 403 +(1 row) + +SELECT count(*) from more__int WHERE a @@ '23|50'; + count +------- + 403 +(1 row) + +SELECT count(*) from more__int WHERE a @> '{23,50}'; + count +------- + 12 +(1 row) + +SELECT count(*) from more__int WHERE a @@ '23&50'; + count +------- + 12 +(1 row) + +SELECT count(*) from more__int WHERE a @> '{20,23}'; + count +------- + 12 +(1 row) + +SELECT count(*) from more__int WHERE a <@ '{73,23,20}'; + count +------- + 10 +(1 row) + +SELECT count(*) from more__int WHERE a = '{73,23,20}'; + count +------- + 1 +(1 row) + +SELECT count(*) from more__int WHERE a @@ '50&68'; + count +------- + 9 +(1 row) + +SELECT count(*) from more__int WHERE a @> '{20,23}' or a @> '{50,68}'; + count +------- + 21 +(1 row) + +SELECT count(*) from more__int WHERE a @@ '(20&23)|(50&68)'; + count +------- + 21 +(1 row) + +SELECT count(*) from more__int WHERE a @@ '20 | !21'; + count +------- + 6566 +(1 row) + +SELECT count(*) from more__int WHERE a @@ '!20 & !21'; + count +------- + 6343 +(1 row) + RESET enable_seqscan; diff --git a/contrib/intarray/sql/_int.sql b/contrib/intarray/sql/_int.sql index f8002c93fc..bd3e01208d 100644 --- a/contrib/intarray/sql/_int.sql +++ b/contrib/intarray/sql/_int.sql @@ -180,4 +180,39 @@ SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)'; SELECT count(*) from test__int WHERE a @@ '20 | !21'; SELECT count(*) from test__int WHERE a @@ '!20 & !21'; +DROP INDEX text_idx; + +-- Repeat the same queries with an extended data set. The data set is the +-- same that we used before, except that each element in the array is +-- repeated three times, offset by 1000 and 2000. For example, {1, 5} +-- becomes {1, 1001, 2001, 5, 1005, 2005}. +-- +-- That has proven to be unreasonably effective at exercising codepaths in +-- core GiST code related to splitting parent pages, which is not covered by +-- other tests. This is a bit out-of-place as the point is to test core GiST +-- code rather than this extension, but there is no suitable GiST opclass in +-- core that would reach the same codepaths. +CREATE TABLE more__int AS SELECT + -- Leave alone NULLs, empty arrays and the one row that we use to test + -- equality + CASE WHEN a IS NULL OR a = '{}' OR a = '{73,23,20}' THEN a ELSE + (select array_agg(u) || array_agg(u + 1000) || array_agg(u + 2000) from (select unnest(a) u) x) + END AS a, a as b + FROM test__int; +CREATE INDEX ON more__int using gist (a gist__int_ops(numranges = 252)); + +SELECT count(*) from more__int WHERE a && '{23,50}'; +SELECT count(*) from more__int WHERE a @@ '23|50'; +SELECT count(*) from more__int WHERE a @> '{23,50}'; +SELECT count(*) from more__int WHERE a @@ '23&50'; +SELECT count(*) from more__int WHERE a @> '{20,23}'; +SELECT count(*) from more__int WHERE a <@ '{73,23,20}'; +SELECT count(*) from more__int WHERE a = '{73,23,20}'; +SELECT count(*) from more__int WHERE a @@ '50&68'; +SELECT count(*) from more__int WHERE a @> '{20,23}' or a @> '{50,68}'; +SELECT count(*) from more__int WHERE a @@ '(20&23)|(50&68)'; +SELECT count(*) from more__int WHERE a @@ '20 | !21'; +SELECT count(*) from more__int WHERE a @@ '!20 & !21'; + + RESET enable_seqscan; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 0683f42c25..3c519f743f 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -1017,87 +1017,106 @@ gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum) * remain so at exit, but it might not be the same page anymore. */ static void -gistFindCorrectParent(Relation r, GISTInsertStack *child) +gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build) { GISTInsertStack *parent = child->parent; + ItemId iid; + IndexTuple idxtuple; + OffsetNumber maxoff; + GISTInsertStack *ptr; gistcheckpage(r, parent->buffer); parent->page = (Page) BufferGetPage(parent->buffer); + maxoff = PageGetMaxOffsetNumber(parent->page); - /* here we don't need to distinguish between split and page update */ - if (child->downlinkoffnum == InvalidOffsetNumber || - parent->lsn != PageGetLSN(parent->page)) + /* Check if the downlink is still where it was before */ + if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff) { - /* parent is changed, look child in right links until found */ - OffsetNumber i, - maxoff; - ItemId iid; - IndexTuple idxtuple; - GISTInsertStack *ptr; - - while (true) - { - maxoff = PageGetMaxOffsetNumber(parent->page); - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) - { - iid = PageGetItemId(parent->page, i); - idxtuple = (IndexTuple) PageGetItem(parent->page, iid); - if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno) - { - /* yes!!, found */ - child->downlinkoffnum = i; - return; - } - } - - parent->blkno = GistPageGetOpaque(parent->page)->rightlink; - UnlockReleaseBuffer(parent->buffer); - if (parent->blkno == InvalidBlockNumber) - { - /* - * End of chain and still didn't find parent. It's a very-very - * rare situation when root splitted. - */ - break; - } - parent->buffer = ReadBuffer(r, parent->blkno); - LockBuffer(parent->buffer, GIST_EXCLUSIVE); - gistcheckpage(r, parent->buffer); - parent->page = (Page) BufferGetPage(parent->buffer); - } - - /* - * awful!!, we need search tree to find parent ... , but before we - * should release all old parent - */ - - ptr = child->parent->parent; /* child->parent already released - * above */ - while (ptr) - { - ReleaseBuffer(ptr->buffer); - ptr = ptr->parent; - } - - /* ok, find new path */ - ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum); - - /* read all buffers as expected by caller */ - /* note we don't lock them or gistcheckpage them here! */ - while (ptr) - { - ptr->buffer = ReadBuffer(r, ptr->blkno); - ptr->page = (Page) BufferGetPage(ptr->buffer); - ptr = ptr->parent; - } - - /* install new chain of parents to stack */ - child->parent = parent; - - /* make recursive call to normal processing */ - LockBuffer(child->parent->buffer, GIST_EXCLUSIVE); - gistFindCorrectParent(r, child); + iid = PageGetItemId(parent->page, child->downlinkoffnum); + idxtuple = (IndexTuple) PageGetItem(parent->page, iid); + if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno) + return; /* still there */ } + + /* + * The page has changed since we looked. During normal operation, every + * update of a page changes its LSN, so the LSN we memorized should have + * changed too. During index build, however, we don't WAL-log the changes + * until we have built the index, so the LSN doesn't change. There is no + * concurrent activity during index build, but we might have changed the + * parent ourselves. + */ + Assert(parent->lsn != PageGetLSN(parent->page) || is_build); + + /* + * Scan the page to re-find the downlink. If the page was split, it might + * have moved to a different page, so follow the right links until we find + * it. + */ + while (true) + { + OffsetNumber i; + + maxoff = PageGetMaxOffsetNumber(parent->page); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + iid = PageGetItemId(parent->page, i); + idxtuple = (IndexTuple) PageGetItem(parent->page, iid); + if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno) + { + /* yes!!, found */ + child->downlinkoffnum = i; + return; + } + } + + parent->blkno = GistPageGetOpaque(parent->page)->rightlink; + parent->downlinkoffnum = InvalidOffsetNumber; + UnlockReleaseBuffer(parent->buffer); + if (parent->blkno == InvalidBlockNumber) + { + /* + * End of chain and still didn't find parent. It's a very-very + * rare situation when root splitted. + */ + break; + } + parent->buffer = ReadBuffer(r, parent->blkno); + LockBuffer(parent->buffer, GIST_EXCLUSIVE); + gistcheckpage(r, parent->buffer); + parent->page = (Page) BufferGetPage(parent->buffer); + } + + /* + * awful!!, we need search tree to find parent ... , but before we should + * release all old parent + */ + + ptr = child->parent->parent; /* child->parent already released above */ + while (ptr) + { + ReleaseBuffer(ptr->buffer); + ptr = ptr->parent; + } + + /* ok, find new path */ + ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum); + + /* read all buffers as expected by caller */ + /* note we don't lock them or gistcheckpage them here! */ + while (ptr) + { + ptr->buffer = ReadBuffer(r, ptr->blkno); + ptr->page = (Page) BufferGetPage(ptr->buffer); + ptr = ptr->parent; + } + + /* install new chain of parents to stack */ + child->parent = parent; + + /* make recursive call to normal processing */ + LockBuffer(child->parent->buffer, GIST_EXCLUSIVE); + gistFindCorrectParent(r, child, is_build); } /* @@ -1105,7 +1124,7 @@ gistFindCorrectParent(Relation r, GISTInsertStack *child) */ static IndexTuple gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate, - GISTInsertStack *stack) + GISTInsertStack *stack, bool is_build) { Page page = BufferGetPage(buf); OffsetNumber maxoff; @@ -1146,7 +1165,7 @@ gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate, ItemId iid; LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE); - gistFindCorrectParent(rel, stack); + gistFindCorrectParent(rel, stack, is_build); iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum); downlink = (IndexTuple) PageGetItem(stack->parent->page, iid); downlink = CopyIndexTuple(downlink); @@ -1192,7 +1211,7 @@ gistfixsplit(GISTInsertState *state, GISTSTATE *giststate) page = BufferGetPage(buf); /* Form the new downlink tuples to insert to parent */ - downlink = gistformdownlink(state->r, buf, giststate, stack); + downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build); si->buf = buf; si->downlink = downlink; @@ -1346,7 +1365,7 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack, right = (GISTPageSplitInfo *) list_nth(splitinfo, pos); left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1); - gistFindCorrectParent(state->r, stack); + gistFindCorrectParent(state->r, stack, state->is_build); if (gistinserttuples(state, stack->parent, giststate, &right->downlink, 1, InvalidOffsetNumber, @@ -1371,21 +1390,22 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack, */ tuples[0] = left->downlink; tuples[1] = right->downlink; - gistFindCorrectParent(state->r, stack); - if (gistinserttuples(state, stack->parent, giststate, - tuples, 2, - stack->downlinkoffnum, - left->buf, right->buf, - true, /* Unlock parent */ - unlockbuf /* Unlock stack->buffer if caller wants - * that */ - )) - { - /* - * If the parent page was split, the downlink might have moved. - */ - stack->downlinkoffnum = InvalidOffsetNumber; - } + gistFindCorrectParent(state->r, stack, state->is_build); + (void) gistinserttuples(state, stack->parent, giststate, + tuples, 2, + stack->downlinkoffnum, + left->buf, right->buf, + true, /* Unlock parent */ + unlockbuf /* Unlock stack->buffer if caller + * wants that */ + ); + + /* + * The downlink might have moved when we updated it. Even if the page + * wasn't split, because gistinserttuples() implements updating the old + * tuple by removing and re-inserting it! + */ + stack->downlinkoffnum = InvalidOffsetNumber; Assert(left->buf == stack->buffer);