From 7c07305e6f025a97732adb01ca6fcb499655a886 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Thu, 28 Sep 2023 16:29:22 -0700 Subject: [PATCH] Fix btmarkpos/btrestrpos array key wraparound bug. nbtree's mark/restore processing failed to correctly handle an edge case involving array key advancement and related search-type scan key state. Scans with ScalarArrayScalarArrayOpExpr quals requiring mark/restore processing (for a merge join) could incorrectly conclude that an affected array/scan key must not have advanced during the time between marking and restoring the scan's position. As a result of all this, array key handling within btrestrpos could skip a required call to _bt_preprocess_keys(). This confusion allowed later primitive index scans to overlook tuples matching the true current array keys. The scan's search-type scan keys would still have spurious values corresponding to the final array element(s) -- not values matching the first/now-current array element(s). To fix, remember that "array key wraparound" has taken place during the ongoing btrescan in a flag variable stored in the scan's state, and use that information at the point where btrestrpos decides if another call to _bt_preprocess_keys is required. Oversight in commit 70bc5833, which taught nbtree to handle array keys during mark/restore processing, but missed this subtlety. That commit was itself a bug fix for an issue in commit 9e8da0f7, which taught nbtree to handle ScalarArrayOpExpr quals natively. Author: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-WzkgP3DDRJxw6DgjCxo-cu-DKrvjEv_ArkP2ctBJatDCYg@mail.gmail.com Backpatch: 11- (all supported branches). --- src/backend/access/nbtree/nbtree.c | 1 + src/backend/access/nbtree/nbtutils.c | 17 ++++++++++++++++- src/include/access/nbtree.h | 4 +++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 18b370376d..8bc55ec027 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -364,6 +364,7 @@ btbeginscan(Relation rel, int nkeys, int norderbys) so->keyData = NULL; so->arrayKeyData = NULL; /* assume no array keys for now */ + so->arraysStarted = false; so->numArrayKeys = 0; so->arrayKeys = NULL; so->arrayContext = NULL; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 2b0287e6dc..78657abf09 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -546,6 +546,8 @@ _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir) curArrayKey->cur_elem = 0; skey->sk_argument = curArrayKey->elem_values[curArrayKey->cur_elem]; } + + so->arraysStarted = true; } /* @@ -605,6 +607,14 @@ _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir) if (scan->parallel_scan != NULL) _bt_parallel_advance_array_keys(scan); + /* + * When no new array keys were found, the scan is "past the end" of the + * array keys. _bt_start_array_keys can still "restart" the array keys if + * a rescan is required. + */ + if (!found) + so->arraysStarted = false; + return found; } @@ -658,8 +668,13 @@ _bt_restore_array_keys(IndexScanDesc scan) * If we changed any keys, we must redo _bt_preprocess_keys. That might * sound like overkill, but in cases with multiple keys per index column * it seems necessary to do the full set of pushups. + * + * Also do this whenever the scan's set of array keys "wrapped around" at + * the end of the last primitive index scan. There won't have been a call + * to _bt_preprocess_keys from some other place following wrap around, so + * we do it for ourselves. */ - if (changed) + if (changed || !so->arraysStarted) { _bt_preprocess_keys(scan); /* The mark should have been set on a consistent set of keys... */ diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index e6b24408af..03cc7836e4 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -435,8 +435,10 @@ typedef struct BTArrayKeyInfo typedef struct BTScanOpaqueData { - /* these fields are set by _bt_preprocess_keys(): */ + /* all fields (except arraysStarted) are set by _bt_preprocess_keys(): */ bool qual_ok; /* false if qual can never be satisfied */ + bool arraysStarted; /* Started array keys, but have yet to "reach + * past the end" of all arrays? */ int numberOfKeys; /* number of preprocessed scan keys */ ScanKey keyData; /* array of preprocessed scan keys */