diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 5643dd4d88..7ed4e01bd3 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1406,8 +1406,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) OffsetNumber minoff; OffsetNumber maxoff; int itemIndex; - IndexTuple itup; bool continuescan; + int indnatts; /* * We must have the buffer pinned and locked, but the usual macro can't be @@ -1427,6 +1427,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) _bt_parallel_release(scan, BufferGetBlockNumber(so->currPos.buf)); } + continuescan = true; /* default assumption */ + indnatts = IndexRelationGetNumberOfAttributes(scan->indexRelation); minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); @@ -1468,23 +1470,58 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) while (offnum <= maxoff) { - itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan); - if (itup != NULL) + ItemId iid = PageGetItemId(page, offnum); + IndexTuple itup; + + /* + * If the scan specifies not to return killed tuples, then we + * treat a killed tuple as not passing the qual + */ + if (scan->ignore_killed_tuples && ItemIdIsDead(iid)) + { + offnum = OffsetNumberNext(offnum); + continue; + } + + itup = (IndexTuple) PageGetItem(page, iid); + + if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan)) { /* tuple passes all scan key conditions, so remember it */ _bt_saveitem(so, itemIndex, offnum, itup); itemIndex++; } + /* When !continuescan, there can't be any more matches, so stop */ if (!continuescan) - { - /* there can't be any more matches, so stop */ - so->currPos.moreRight = false; break; - } offnum = OffsetNumberNext(offnum); } + /* + * We don't need to visit page to the right when the high key + * indicates that no more matches will be found there. + * + * Checking the high key like this works out more often than you might + * think. Leaf page splits pick a split point between the two most + * dissimilar tuples (this is weighed against the need to evenly share + * free space). Leaf pages with high key attribute values that can + * only appear on non-pivot tuples on the right sibling page are + * common. + */ + if (continuescan && !P_RIGHTMOST(opaque)) + { + ItemId iid = PageGetItemId(page, P_HIKEY); + IndexTuple itup = (IndexTuple) PageGetItem(page, iid); + int truncatt; + + truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation); + _bt_checkkeys(scan, itup, truncatt, dir, &continuescan); + } + + if (!continuescan) + so->currPos.moreRight = false; + Assert(itemIndex <= MaxIndexTuplesPerPage); so->currPos.firstItem = 0; so->currPos.lastItem = itemIndex - 1; @@ -1499,8 +1536,40 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) while (offnum >= minoff) { - itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan); - if (itup != NULL) + ItemId iid = PageGetItemId(page, offnum); + IndexTuple itup; + bool tuple_alive; + bool passes_quals; + + /* + * If the scan specifies not to return killed tuples, then we + * treat a killed tuple as not passing the qual. Most of the + * time, it's a win to not bother examining the tuple's index + * keys, but just skip to the next tuple (previous, actually, + * since we're scanning backwards). However, if this is the first + * tuple on the page, we do check the index keys, to prevent + * uselessly advancing to the page to the left. This is similar + * to the high key optimization used by forward scans. + */ + if (scan->ignore_killed_tuples && ItemIdIsDead(iid)) + { + Assert(offnum >= P_FIRSTDATAKEY(opaque)); + if (offnum > P_FIRSTDATAKEY(opaque)) + { + offnum = OffsetNumberPrev(offnum); + continue; + } + + tuple_alive = false; + } + else + tuple_alive = true; + + itup = (IndexTuple) PageGetItem(page, iid); + + passes_quals = _bt_checkkeys(scan, itup, indnatts, dir, + &continuescan); + if (passes_quals && tuple_alive) { /* tuple passes all scan key conditions, so remember it */ itemIndex--; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 6b59e16c4d..92b8b5f134 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -48,7 +48,7 @@ static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, static bool _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption); static void _bt_mark_scankey_required(ScanKey skey); static bool _bt_check_rowcompare(ScanKey skey, - IndexTuple tuple, TupleDesc tupdesc, + IndexTuple tuple, int tupnatts, TupleDesc tupdesc, ScanDirection dir, bool *continuescan); static int _bt_keep_natts(Relation rel, IndexTuple lastleft, IndexTuple firstright, BTScanInsert itup_key); @@ -1333,73 +1333,35 @@ _bt_mark_scankey_required(ScanKey skey) /* * Test whether an indextuple satisfies all the scankey conditions. * - * If so, return the address of the index tuple on the index page. - * If not, return NULL. + * Return true if so, false if not. If the tuple fails to pass the qual, + * we also determine whether there's any need to continue the scan beyond + * this tuple, and set *continuescan accordingly. See comments for + * _bt_preprocess_keys(), above, about how this is done. * - * If the tuple fails to pass the qual, we also determine whether there's - * any need to continue the scan beyond this tuple, and set *continuescan - * accordingly. See comments for _bt_preprocess_keys(), above, about how - * this is done. + * Forward scan callers can pass a high key tuple in the hopes of having + * us set *continuescanthat to false, and avoiding an unnecessary visit to + * the page to the right. * * scan: index scan descriptor (containing a search-type scankey) - * page: buffer page containing index tuple - * offnum: offset number of index tuple (must be a valid item!) + * tuple: index tuple to test + * tupnatts: number of attributes in tupnatts (high key may be truncated) * dir: direction we are scanning in * continuescan: output parameter (will be set correctly in all cases) - * - * Caller must hold pin and lock on the index page. */ -IndexTuple -_bt_checkkeys(IndexScanDesc scan, - Page page, OffsetNumber offnum, +bool +_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts, ScanDirection dir, bool *continuescan) { - ItemId iid = PageGetItemId(page, offnum); - bool tuple_alive; - IndexTuple tuple; TupleDesc tupdesc; BTScanOpaque so; int keysz; int ikey; ScanKey key; + Assert(BTreeTupleGetNAtts(tuple, scan->indexRelation) == tupnatts); + *continuescan = true; /* default assumption */ - /* - * If the scan specifies not to return killed tuples, then we treat a - * killed tuple as not passing the qual. Most of the time, it's a win to - * not bother examining the tuple's index keys, but just return - * immediately with continuescan = true to proceed to the next tuple. - * However, if this is the last tuple on the page, we should check the - * index keys to prevent uselessly advancing to the next page. - */ - if (scan->ignore_killed_tuples && ItemIdIsDead(iid)) - { - /* return immediately if there are more tuples on the page */ - if (ScanDirectionIsForward(dir)) - { - if (offnum < PageGetMaxOffsetNumber(page)) - return NULL; - } - else - { - BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); - - if (offnum > P_FIRSTDATAKEY(opaque)) - return NULL; - } - - /* - * OK, we want to check the keys so we can set continuescan correctly, - * but we'll return NULL even if the tuple passes the key tests. - */ - tuple_alive = false; - } - else - tuple_alive = true; - - tuple = (IndexTuple) PageGetItem(page, iid); - tupdesc = RelationGetDescr(scan->indexRelation); so = (BTScanOpaque) scan->opaque; keysz = so->numberOfKeys; @@ -1410,13 +1372,25 @@ _bt_checkkeys(IndexScanDesc scan, bool isNull; Datum test; - Assert(key->sk_attno <= BTreeTupleGetNAtts(tuple, scan->indexRelation)); + if (key->sk_attno > tupnatts) + { + /* + * This attribute is truncated (must be high key). The value for + * this attribute in the first non-pivot tuple on the page to the + * right could be any possible value. Assume that truncated + * attribute passes the qual. + */ + Assert(ScanDirectionIsForward(dir)); + continue; + } + /* row-comparison keys need special processing */ if (key->sk_flags & SK_ROW_HEADER) { - if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan)) + if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir, + continuescan)) continue; - return NULL; + return false; } datum = index_getattr(tuple, @@ -1454,7 +1428,7 @@ _bt_checkkeys(IndexScanDesc scan, /* * In any case, this indextuple doesn't match the qual. */ - return NULL; + return false; } if (isNull) @@ -1495,7 +1469,7 @@ _bt_checkkeys(IndexScanDesc scan, /* * In any case, this indextuple doesn't match the qual. */ - return NULL; + return false; } test = FunctionCall2Coll(&key->sk_func, key->sk_collation, @@ -1523,16 +1497,12 @@ _bt_checkkeys(IndexScanDesc scan, /* * In any case, this indextuple doesn't match the qual. */ - return NULL; + return false; } } - /* Check for failure due to it being a killed tuple. */ - if (!tuple_alive) - return NULL; - /* If we get here, the tuple passes all index quals. */ - return tuple; + return true; } /* @@ -1545,8 +1515,8 @@ _bt_checkkeys(IndexScanDesc scan, * This is a subroutine for _bt_checkkeys, which see for more info. */ static bool -_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc, - ScanDirection dir, bool *continuescan) +_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, + TupleDesc tupdesc, ScanDirection dir, bool *continuescan) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); int32 cmpresult = 0; @@ -1563,6 +1533,19 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc, Assert(subkey->sk_flags & SK_ROW_MEMBER); + if (subkey->sk_attno > tupnatts) + { + /* + * This attribute is truncated (must be high key). The value for + * this attribute in the first non-pivot tuple on the page to the + * right could be any possible value. Assume that truncated + * attribute passes the qual. + */ + Assert(ScanDirectionIsForward(dir)); + cmpresult = 0; + continue; + } + datum = index_getattr(tuple, subkey->sk_attno, tupdesc, diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index e5876982a2..473c6f2918 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -772,9 +772,8 @@ extern bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir); extern void _bt_mark_array_keys(IndexScanDesc scan); extern void _bt_restore_array_keys(IndexScanDesc scan); extern void _bt_preprocess_keys(IndexScanDesc scan); -extern IndexTuple _bt_checkkeys(IndexScanDesc scan, - Page page, OffsetNumber offnum, - ScanDirection dir, bool *continuescan); +extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, + int tupnatts, ScanDirection dir, bool *continuescan); extern void _bt_killitems(IndexScanDesc scan); extern BTCycleId _bt_vacuum_cycleid(Relation rel); extern BTCycleId _bt_start_vacuum(Relation rel);