Improvements and fixes for e0b1ee17dc

e0b1ee17dc introduced optimization for matching B-tree scan keys required for
the directional scan.  However, it incorrectly assumed that all keys required
for opposite direction scan are satisfied by _bt_first().  It has been
illustrated that with multiple scan keys over the same column, a lesser one
(according to the scan direction) could win leaving the other one unsatisfied.

Instead of relying on _bt_first() this commit introduces code that memorizes
whether there was at least one match on the page.  If that's true we know that
keys required for opposite-direction scan are satisfied as soon as
corresponding values are not NULLs.

Also, this commit simplifies the description for the optimization of keys
required for the current direction scan.  Now the flag used for this is named
continuescanPrechecked and means exactly that *continuescan flag is known
to be true for the last item on the page.

Reported-by: Peter Geoghegan
Discussion: https://postgr.es/m/CAH2-Wzn0LeLcb1PdBnK0xisz8NpHkxRrMr3NWJ%2BKOK-WZ%2BQtTQ%40mail.gmail.com
Reviewed-by: Pavel Borisov
This commit is contained in:
Alexander Korotkov 2023-12-27 14:22:02 +02:00
parent 06b10f80ba
commit 7e6fb5da41
3 changed files with 51 additions and 33 deletions

View File

@ -1530,7 +1530,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
int itemIndex;
bool continuescan;
int indnatts;
bool requiredMatchedByPrecheck;
bool continuescanPrechecked;
bool haveFirstMatch = false;
/*
* We must have the buffer pinned and locked, but the usual macro can't be
@ -1585,12 +1586,11 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
Assert(BTScanPosIsPinned(so->currPos));
/*
* Prechecking the page with scan keys required for direction scan. We
* check these keys with the last item on the page (according to our scan
* direction). If these keys are matched, we can skip checking them with
* every item on the page. Scan keys for our scan direction would
* necessarily match the previous items. Scan keys required for opposite
* direction scan are already matched by the _bt_first() call.
* Prechecking the value of the continuescan flag for the last item on the
* page (for backwards scan it will be the first item on a page). If we
* observe it to be true, then it should be true for all other items. This
* allows us to do significant optimizations in the _bt_checkkeys()
* function for all the items on the page.
*
* With the forward scan, we do this check for the last item on the page
* instead of the high key. It's relatively likely that the most
@ -1610,17 +1610,17 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
itup = (IndexTuple) PageGetItem(page, iid);
/*
* Do the precheck. Note that we pass the pointer to
* 'requiredMatchedByPrecheck' to 'continuescan' argument. That will
* Do the precheck. Note that we pass the pointer to the
* 'continuescanPrechecked' to the 'continuescan' argument. That will
* set flag to true if all required keys are satisfied and false
* otherwise.
*/
(void) _bt_checkkeys(scan, itup, indnatts, dir,
&requiredMatchedByPrecheck, false);
&continuescanPrechecked, false, false);
}
else
{
requiredMatchedByPrecheck = false;
continuescanPrechecked = false;
}
if (ScanDirectionIsForward(dir))
@ -1650,19 +1650,22 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
Assert(!BTreeTupleIsPivot(itup));
passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
&continuescan, requiredMatchedByPrecheck);
&continuescan,
continuescanPrechecked,
haveFirstMatch);
/*
* If the result of prechecking required keys was true, then in
* assert-enabled builds we also recheck that the _bt_checkkeys()
* result is the same.
*/
Assert(!requiredMatchedByPrecheck ||
Assert((!continuescanPrechecked && haveFirstMatch) ||
passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
&continuescan, false));
&continuescan, false, false));
if (passes_quals)
{
/* tuple passes all scan key conditions */
haveFirstMatch = true;
if (!BTreeTupleIsPosting(itup))
{
/* Remember it */
@ -1717,7 +1720,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
int truncatt;
truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation);
_bt_checkkeys(scan, itup, truncatt, dir, &continuescan, false);
_bt_checkkeys(scan, itup, truncatt, dir, &continuescan, false, false);
}
if (!continuescan)
@ -1770,19 +1773,22 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
Assert(!BTreeTupleIsPivot(itup));
passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
&continuescan, requiredMatchedByPrecheck);
&continuescan,
continuescanPrechecked,
haveFirstMatch);
/*
* If the result of prechecking required keys was true, then in
* assert-enabled builds we also recheck that the _bt_checkkeys()
* result is the same.
*/
Assert(!requiredMatchedByPrecheck ||
Assert((!continuescanPrechecked && !haveFirstMatch) ||
passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
&continuescan, false));
&continuescan, false, false));
if (passes_quals && tuple_alive)
{
/* tuple passes all scan key conditions */
haveFirstMatch = true;
if (!BTreeTupleIsPosting(itup))
{
/* Remember it */

View File

@ -1364,13 +1364,15 @@ _bt_mark_scankey_required(ScanKey skey)
* tupnatts: number of attributes in tupnatts (high key may be truncated)
* dir: direction we are scanning in
* continuescan: output parameter (will be set correctly in all cases)
* requiredMatchedByPrecheck: indicates that scan keys required for
* direction scan are already matched
* continuescanPrechecked: indicates that *continuescan flag is known to
* be true for the last item on the page
* haveFirstMatch: indicates that we already have at least one match
* in the current page
*/
bool
_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
ScanDirection dir, bool *continuescan,
bool requiredMatchedByPrecheck)
bool continuescanPrechecked, bool haveFirstMatch)
{
TupleDesc tupdesc;
BTScanOpaque so;
@ -1406,13 +1408,23 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
requiredOppositeDir = true;
/*
* Is the key required for scanning for either forward or backward
* direction? If so and caller told us that these types of keys are
* known to be matched, skip the check. Except for the row keys,
* where NULLs could be found in the middle of matching values.
* If the caller told us the *continuescan flag is known to be true
* for the last item on the page, then we know the keys required for
* the current direction scan should be matched. Otherwise, the
* *continuescan flag would be set for the current item and
* subsequently the last item on the page accordingly.
*
* If the key is required for the opposite direction scan, we can skip
* the check if the caller tells us there was already at least one
* matching item on the page. Also, we require the *continuescan flag
* to be true for the last item on the page to know there are no
* NULLs.
*
* Both cases above work except for the row keys, where NULLs could be
* found in the middle of matching values.
*/
if ((requiredSameDir || requiredOppositeDir) &&
!(key->sk_flags & SK_ROW_HEADER) && requiredMatchedByPrecheck)
if ((requiredSameDir || (requiredOppositeDir && haveFirstMatch)) &&
!(key->sk_flags & SK_ROW_HEADER) && continuescanPrechecked)
continue;
if (key->sk_attno > tupnatts)
@ -1513,12 +1525,12 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
}
/*
* Apply the key checking function. When the key is required for
* opposite direction scan, it must be already satisfied by
* _bt_first() except for the NULLs checking, which have already done
* above.
* Apply the key-checking function. When the key is required for the
* opposite direction scan, it must be already satisfied as soon as
* there is already match on the page. Except for the NULLs checking,
* which have already done above.
*/
if (!requiredOppositeDir)
if (!(requiredOppositeDir && haveFirstMatch))
{
test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
datum, key->sk_argument);

View File

@ -1251,7 +1251,7 @@ extern void _bt_restore_array_keys(IndexScanDesc scan);
extern void _bt_preprocess_keys(IndexScanDesc scan);
extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
int tupnatts, ScanDirection dir, bool *continuescan,
bool requiredMatchedByPrecheck);
bool requiredMatchedByPrecheck, bool haveFirstMatch);
extern void _bt_killitems(IndexScanDesc scan);
extern BTCycleId _bt_vacuum_cycleid(Relation rel);
extern BTCycleId _bt_start_vacuum(Relation rel);