From 9e8da0f75731aaa7605cf4656c21ea09e84d2eb1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 16 Oct 2011 15:39:24 -0400 Subject: [PATCH] Teach btree to handle ScalarArrayOpExpr quals natively. This allows "indexedcol op ANY(ARRAY[...])" conditions to be used in plain indexscans, and particularly in index-only scans. --- doc/src/sgml/catalogs.sgml | 7 + src/backend/access/nbtree/nbtree.c | 151 ++++++--- src/backend/access/nbtree/nbtutils.c | 457 +++++++++++++++++++++++++- src/backend/executor/nodeIndexscan.c | 75 ++++- src/backend/optimizer/path/costsize.c | 11 +- src/backend/optimizer/path/indxpath.c | 49 +-- src/backend/optimizer/util/plancat.c | 1 + src/backend/utils/adt/selfuncs.c | 7 - src/include/access/nbtree.h | 19 ++ src/include/access/skey.h | 23 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_am.h | 50 +-- src/include/nodes/relation.h | 3 +- 13 files changed, 726 insertions(+), 129 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index e830c5f3d4..cfecaa6931 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -491,6 +491,13 @@ for the first index column? + + amsearcharray + bool + + Does the access method support ScalarArrayOpExpr searches? + + amsearchnulls bool diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 3401bc5bdb..60b7f599a7 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -276,39 +276,63 @@ btgettuple(PG_FUNCTION_ARGS) scan->xs_recheck = false; /* - * If we've already initialized this scan, we can just advance it in the - * appropriate direction. If we haven't done so yet, we call a routine to - * get the first item in the scan. + * If we have any array keys, initialize them during first call for a + * scan. We can't do this in btrescan because we don't know the scan + * direction at that time. */ - if (BTScanPosIsValid(so->currPos)) + if (so->numArrayKeys && !BTScanPosIsValid(so->currPos)) + { + /* punt if we have any unsatisfiable array keys */ + if (so->numArrayKeys < 0) + PG_RETURN_BOOL(false); + + _bt_start_array_keys(scan, dir); + } + + /* This loop handles advancing to the next array elements, if any */ + do { /* - * Check to see if we should kill the previously-fetched tuple. + * If we've already initialized this scan, we can just advance it in + * the appropriate direction. If we haven't done so yet, we call + * _bt_first() to get the first item in the scan. */ - if (scan->kill_prior_tuple) + if (!BTScanPosIsValid(so->currPos)) + res = _bt_first(scan, dir); + else { /* - * Yes, remember it for later. (We'll deal with all such tuples - * at once right before leaving the index page.) The test for - * numKilled overrun is not just paranoia: if the caller reverses - * direction in the indexscan then the same item might get entered - * multiple times. It's not worth trying to optimize that, so we - * don't detect it, but instead just forget any excess entries. + * Check to see if we should kill the previously-fetched tuple. */ - if (so->killedItems == NULL) - so->killedItems = (int *) - palloc(MaxIndexTuplesPerPage * sizeof(int)); - if (so->numKilled < MaxIndexTuplesPerPage) - so->killedItems[so->numKilled++] = so->currPos.itemIndex; + if (scan->kill_prior_tuple) + { + /* + * Yes, remember it for later. (We'll deal with all such + * tuples at once right before leaving the index page.) The + * test for numKilled overrun is not just paranoia: if the + * caller reverses direction in the indexscan then the same + * item might get entered multiple times. It's not worth + * trying to optimize that, so we don't detect it, but instead + * just forget any excess entries. + */ + if (so->killedItems == NULL) + so->killedItems = (int *) + palloc(MaxIndexTuplesPerPage * sizeof(int)); + if (so->numKilled < MaxIndexTuplesPerPage) + so->killedItems[so->numKilled++] = so->currPos.itemIndex; + } + + /* + * Now continue the scan. + */ + res = _bt_next(scan, dir); } - /* - * Now continue the scan. - */ - res = _bt_next(scan, dir); - } - else - res = _bt_first(scan, dir); + /* If we have a tuple, return it ... */ + if (res) + break; + /* ... otherwise see if we have more array keys to deal with */ + } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir)); PG_RETURN_BOOL(res); } @@ -325,35 +349,50 @@ btgetbitmap(PG_FUNCTION_ARGS) int64 ntids = 0; ItemPointer heapTid; - /* Fetch the first page & tuple. */ - if (!_bt_first(scan, ForwardScanDirection)) + /* + * If we have any array keys, initialize them. + */ + if (so->numArrayKeys) { - /* empty scan */ - PG_RETURN_INT64(0); - } - /* Save tuple ID, and continue scanning */ - heapTid = &scan->xs_ctup.t_self; - tbm_add_tuples(tbm, heapTid, 1, false); - ntids++; + /* punt if we have any unsatisfiable array keys */ + if (so->numArrayKeys < 0) + PG_RETURN_INT64(ntids); - for (;;) + _bt_start_array_keys(scan, ForwardScanDirection); + } + + /* This loop handles advancing to the next array elements, if any */ + do { - /* - * Advance to next tuple within page. This is the same as the easy - * case in _bt_next(). - */ - if (++so->currPos.itemIndex > so->currPos.lastItem) + /* Fetch the first page & tuple */ + if (_bt_first(scan, ForwardScanDirection)) { - /* let _bt_next do the heavy lifting */ - if (!_bt_next(scan, ForwardScanDirection)) - break; - } + /* Save tuple ID, and continue scanning */ + heapTid = &scan->xs_ctup.t_self; + tbm_add_tuples(tbm, heapTid, 1, false); + ntids++; - /* Save tuple ID, and continue scanning */ - heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid; - tbm_add_tuples(tbm, heapTid, 1, false); - ntids++; - } + for (;;) + { + /* + * Advance to next tuple within page. This is the same as the + * easy case in _bt_next(). + */ + if (++so->currPos.itemIndex > so->currPos.lastItem) + { + /* let _bt_next do the heavy lifting */ + if (!_bt_next(scan, ForwardScanDirection)) + break; + } + + /* Save tuple ID, and continue scanning */ + heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid; + tbm_add_tuples(tbm, heapTid, 1, false); + ntids++; + } + } + /* Now see if we have more array keys to deal with */ + } while (so->numArrayKeys && _bt_advance_array_keys(scan, ForwardScanDirection)); PG_RETURN_INT64(ntids); } @@ -383,6 +422,12 @@ btbeginscan(PG_FUNCTION_ARGS) so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); else so->keyData = NULL; + + so->arrayKeyData = NULL; /* assume no array keys for now */ + so->numArrayKeys = 0; + so->arrayKeys = NULL; + so->arrayContext = NULL; + so->killedItems = NULL; /* until needed */ so->numKilled = 0; @@ -460,6 +505,9 @@ btrescan(PG_FUNCTION_ARGS) scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ + /* If any keys are SK_SEARCHARRAY type, set up array-key info */ + _bt_preprocess_array_keys(scan); + PG_RETURN_VOID(); } @@ -490,10 +538,13 @@ btendscan(PG_FUNCTION_ARGS) so->markItemIndex = -1; /* Release storage */ - if (so->killedItems != NULL) - pfree(so->killedItems); if (so->keyData != NULL) pfree(so->keyData); + /* so->arrayKeyData and so->arrayKeys are in arrayContext */ + if (so->arrayContext != NULL) + MemoryContextDelete(so->arrayContext); + if (so->killedItems != NULL) + pfree(so->killedItems); if (so->currTuples != NULL) pfree(so->currTuples); /* so->markTuples should not be pfree'd, see btrescan */ diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index b6fb3867bf..134abe6d59 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -21,10 +21,26 @@ #include "access/reloptions.h" #include "access/relscan.h" #include "miscadmin.h" +#include "utils/array.h" #include "utils/lsyscache.h" +#include "utils/memutils.h" #include "utils/rel.h" +typedef struct BTSortArrayContext +{ + FmgrInfo flinfo; + Oid collation; + bool reverse; +} BTSortArrayContext; + +static Datum _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey, + StrategyNumber strat, + Datum *elems, int nelems); +static int _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey, + bool reverse, + Datum *elems, int nelems); +static int _bt_compare_array_elements(const void *a, const void *b, void *arg); static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, ScanKey leftarg, ScanKey rightarg, bool *result); @@ -158,13 +174,433 @@ _bt_freestack(BTStack stack) } +/* + * _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys + * + * If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and + * set up BTArrayKeyInfo info for each one that is an equality-type key. + * Prepare modified scan keys in so->arrayKeyData, which will hold the current + * array elements during each primitive indexscan operation. For inequality + * array keys, it's sufficient to find the extreme element value and replace + * the whole array with that scalar value. + * + * Note: the reason we need so->arrayKeyData, rather than just scribbling + * on scan->keyData, is that callers are permitted to call btrescan without + * supplying a new set of scankey data. + */ +void +_bt_preprocess_array_keys(IndexScanDesc scan) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + int numberOfKeys = scan->numberOfKeys; + int16 *indoption = scan->indexRelation->rd_indoption; + int numArrayKeys; + ScanKey cur; + int i; + MemoryContext oldContext; + + /* Quick check to see if there are any array keys */ + numArrayKeys = 0; + for (i = 0; i < numberOfKeys; i++) + { + cur = &scan->keyData[i]; + if (cur->sk_flags & SK_SEARCHARRAY) + { + numArrayKeys++; + Assert(!(cur->sk_flags & (SK_ROW_HEADER | SK_SEARCHNULL | SK_SEARCHNOTNULL))); + /* If any arrays are null as a whole, we can quit right now. */ + if (cur->sk_flags & SK_ISNULL) + { + so->numArrayKeys = -1; + so->arrayKeyData = NULL; + return; + } + } + } + + /* Quit if nothing to do. */ + if (numArrayKeys == 0) + { + so->numArrayKeys = 0; + so->arrayKeyData = NULL; + return; + } + + /* + * Make a scan-lifespan context to hold array-associated data, or reset + * it if we already have one from a previous rescan cycle. + */ + if (so->arrayContext == NULL) + so->arrayContext = AllocSetContextCreate(CurrentMemoryContext, + "BTree Array Context", + ALLOCSET_SMALL_MINSIZE, + ALLOCSET_SMALL_INITSIZE, + ALLOCSET_SMALL_MAXSIZE); + else + MemoryContextReset(so->arrayContext); + + oldContext = MemoryContextSwitchTo(so->arrayContext); + + /* Create modifiable copy of scan->keyData in the workspace context */ + so->arrayKeyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); + memcpy(so->arrayKeyData, + scan->keyData, + scan->numberOfKeys * sizeof(ScanKeyData)); + + /* Allocate space for per-array data in the workspace context */ + so->arrayKeys = (BTArrayKeyInfo *) palloc0(numArrayKeys * sizeof(BTArrayKeyInfo)); + + /* Now process each array key */ + numArrayKeys = 0; + for (i = 0; i < numberOfKeys; i++) + { + ArrayType *arrayval; + int16 elmlen; + bool elmbyval; + char elmalign; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + int num_nonnulls; + int j; + + cur = &so->arrayKeyData[i]; + if (!(cur->sk_flags & SK_SEARCHARRAY)) + continue; + + /* + * First, deconstruct the array into elements. Anything allocated + * here (including a possibly detoasted array value) is in the + * workspace context. + */ + arrayval = DatumGetArrayTypeP(cur->sk_argument); + /* We could cache this data, but not clear it's worth it */ + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, &num_elems); + + /* + * Compress out any null elements. We can ignore them since we assume + * all btree operators are strict. + */ + num_nonnulls = 0; + for (j = 0; j < num_elems; j++) + { + if (!elem_nulls[j]) + elem_values[num_nonnulls++] = elem_values[j]; + } + + /* We could pfree(elem_nulls) now, but not worth the cycles */ + + /* If there's no non-nulls, the scan qual is unsatisfiable */ + if (num_nonnulls == 0) + { + numArrayKeys = -1; + break; + } + + /* + * If the comparison operator is not equality, then the array qual + * degenerates to a simple comparison against the smallest or largest + * non-null array element, as appropriate. + */ + switch (cur->sk_strategy) + { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: + cur->sk_argument = + _bt_find_extreme_element(scan, cur, + BTGreaterStrategyNumber, + elem_values, num_nonnulls); + continue; + case BTEqualStrategyNumber: + /* proceed with rest of loop */ + break; + case BTGreaterEqualStrategyNumber: + case BTGreaterStrategyNumber: + cur->sk_argument = + _bt_find_extreme_element(scan, cur, + BTLessStrategyNumber, + elem_values, num_nonnulls); + continue; + default: + elog(ERROR, "unrecognized StrategyNumber: %d", + (int) cur->sk_strategy); + break; + } + + /* + * Sort the non-null elements and eliminate any duplicates. We must + * sort in the same ordering used by the index column, so that the + * successive primitive indexscans produce data in index order. + */ + num_elems = _bt_sort_array_elements(scan, cur, + (indoption[cur->sk_attno - 1] & INDOPTION_DESC) != 0, + elem_values, num_nonnulls); + + /* + * And set up the BTArrayKeyInfo data. + */ + so->arrayKeys[numArrayKeys].scan_key = i; + so->arrayKeys[numArrayKeys].num_elems = num_elems; + so->arrayKeys[numArrayKeys].elem_values = elem_values; + numArrayKeys++; + } + + so->numArrayKeys = numArrayKeys; + + MemoryContextSwitchTo(oldContext); +} + +/* + * _bt_find_extreme_element() -- get least or greatest array element + * + * scan and skey identify the index column, whose opfamily determines the + * comparison semantics. strat should be BTLessStrategyNumber to get the + * least element, or BTGreaterStrategyNumber to get the greatest. + */ +static Datum +_bt_find_extreme_element(IndexScanDesc scan, ScanKey skey, + StrategyNumber strat, + Datum *elems, int nelems) +{ + Relation rel = scan->indexRelation; + Oid elemtype, + cmp_op; + RegProcedure cmp_proc; + FmgrInfo flinfo; + Datum result; + int i; + + /* + * Determine the nominal datatype of the array elements. We have to + * support the convention that sk_subtype == InvalidOid means the opclass + * input type; this is a hack to simplify life for ScanKeyInit(). + */ + elemtype = skey->sk_subtype; + if (elemtype == InvalidOid) + elemtype = rel->rd_opcintype[skey->sk_attno - 1]; + + /* + * Look up the appropriate comparison operator in the opfamily. + * + * Note: it's possible that this would fail, if the opfamily is incomplete, + * but it seems quite unlikely that an opfamily would omit non-cross-type + * comparison operators for any datatype that it supports at all. + */ + cmp_op = get_opfamily_member(rel->rd_opfamily[skey->sk_attno - 1], + elemtype, + elemtype, + strat); + if (!OidIsValid(cmp_op)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + strat, elemtype, elemtype, + rel->rd_opfamily[skey->sk_attno - 1]); + cmp_proc = get_opcode(cmp_op); + if (!RegProcedureIsValid(cmp_proc)) + elog(ERROR, "missing oprcode for operator %u", cmp_op); + + fmgr_info(cmp_proc, &flinfo); + + Assert(nelems > 0); + result = elems[0]; + for (i = 1; i < nelems; i++) + { + if (DatumGetBool(FunctionCall2Coll(&flinfo, + skey->sk_collation, + elems[i], + result))) + result = elems[i]; + } + + return result; +} + +/* + * _bt_sort_array_elements() -- sort and de-dup array elements + * + * The array elements are sorted in-place, and the new number of elements + * after duplicate removal is returned. + * + * scan and skey identify the index column, whose opfamily determines the + * comparison semantics. If reverse is true, we sort in descending order. + */ +static int +_bt_sort_array_elements(IndexScanDesc scan, ScanKey skey, + bool reverse, + Datum *elems, int nelems) +{ + Relation rel = scan->indexRelation; + Oid elemtype; + RegProcedure cmp_proc; + BTSortArrayContext cxt; + int last_non_dup; + int i; + + if (nelems <= 1) + return nelems; /* no work to do */ + + /* + * Determine the nominal datatype of the array elements. We have to + * support the convention that sk_subtype == InvalidOid means the opclass + * input type; this is a hack to simplify life for ScanKeyInit(). + */ + elemtype = skey->sk_subtype; + if (elemtype == InvalidOid) + elemtype = rel->rd_opcintype[skey->sk_attno - 1]; + + /* + * Look up the appropriate comparison function in the opfamily. + * + * Note: it's possible that this would fail, if the opfamily is incomplete, + * but it seems quite unlikely that an opfamily would omit non-cross-type + * support functions for any datatype that it supports at all. + */ + cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1], + elemtype, + elemtype, + BTORDER_PROC); + if (!RegProcedureIsValid(cmp_proc)) + elog(ERROR, "missing support function %d(%u,%u) in opfamily %u", + BTORDER_PROC, elemtype, elemtype, + rel->rd_opfamily[skey->sk_attno - 1]); + + /* Sort the array elements */ + fmgr_info(cmp_proc, &cxt.flinfo); + cxt.collation = skey->sk_collation; + cxt.reverse = reverse; + qsort_arg((void *) elems, nelems, sizeof(Datum), + _bt_compare_array_elements, (void *) &cxt); + + /* Now scan the sorted elements and remove duplicates */ + last_non_dup = 0; + for (i = 1; i < nelems; i++) + { + int32 compare; + + compare = DatumGetInt32(FunctionCall2Coll(&cxt.flinfo, + cxt.collation, + elems[last_non_dup], + elems[i])); + if (compare != 0) + elems[++last_non_dup] = elems[i]; + } + + return last_non_dup + 1; +} + +/* + * qsort_arg comparator for sorting array elements + */ +static int +_bt_compare_array_elements(const void *a, const void *b, void *arg) +{ + Datum da = *((const Datum *) a); + Datum db = *((const Datum *) b); + BTSortArrayContext *cxt = (BTSortArrayContext *) arg; + int32 compare; + + compare = DatumGetInt32(FunctionCall2Coll(&cxt->flinfo, + cxt->collation, + da, db)); + if (cxt->reverse) + compare = -compare; + return compare; +} + +/* + * _bt_start_array_keys() -- Initialize array keys at start of a scan + * + * Set up the cur_elem counters and fill in the first sk_argument value for + * each array scankey. We can't do this until we know the scan direction. + */ +void +_bt_start_array_keys(IndexScanDesc scan, ScanDirection dir) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + int i; + + for (i = 0; i < so->numArrayKeys; i++) + { + BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i]; + ScanKey skey = &so->arrayKeyData[curArrayKey->scan_key]; + + Assert(curArrayKey->num_elems > 0); + if (ScanDirectionIsBackward(dir)) + curArrayKey->cur_elem = curArrayKey->num_elems - 1; + else + curArrayKey->cur_elem = 0; + skey->sk_argument = curArrayKey->elem_values[curArrayKey->cur_elem]; + } +} + +/* + * _bt_advance_array_keys() -- Advance to next set of array elements + * + * Returns TRUE if there is another set of values to consider, FALSE if not. + * On TRUE result, the scankeys are initialized with the next set of values. + */ +bool +_bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + bool found = false; + int i; + + /* + * We must advance the last array key most quickly, since it will + * correspond to the lowest-order index column among the available + * qualifications. This is necessary to ensure correct ordering of output + * when there are multiple array keys. + */ + for (i = so->numArrayKeys - 1; i >= 0; i--) + { + BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i]; + ScanKey skey = &so->arrayKeyData[curArrayKey->scan_key]; + int cur_elem = curArrayKey->cur_elem; + int num_elems = curArrayKey->num_elems; + + if (ScanDirectionIsBackward(dir)) + { + if (--cur_elem < 0) + { + cur_elem = num_elems - 1; + found = false; /* need to advance next array key */ + } + else + found = true; + } + else + { + if (++cur_elem >= num_elems) + { + cur_elem = 0; + found = false; /* need to advance next array key */ + } + else + found = true; + } + + curArrayKey->cur_elem = cur_elem; + skey->sk_argument = curArrayKey->elem_values[cur_elem]; + if (found) + break; + } + + return found; +} + + /* * _bt_preprocess_keys() -- Preprocess scan keys * - * The caller-supplied search-type keys (in scan->keyData[]) are copied to - * so->keyData[] with possible transformation. scan->numberOfKeys is - * the number of input keys, so->numberOfKeys gets the number of output - * keys (possibly less, never greater). + * The given search-type keys (in scan->keyData[] or so->arrayKeyData[]) + * are copied to so->keyData[] with possible transformation. + * scan->numberOfKeys is the number of input keys, so->numberOfKeys gets + * the number of output keys (possibly less, never greater). * * The output keys are marked with additional sk_flag bits beyond the * system-standard bits supplied by the caller. The DESC and NULLS_FIRST @@ -226,8 +662,8 @@ _bt_freestack(BTStack stack) * * Note: the reason we have to copy the preprocessed scan keys into private * storage is that we are modifying the array based on comparisons of the - * key argument values, which could change on a rescan. Therefore we can't - * overwrite the caller's data structure. + * key argument values, which could change on a rescan or after moving to + * new elements of array keys. Therefore we can't overwrite the source data. */ void _bt_preprocess_keys(IndexScanDesc scan) @@ -253,7 +689,14 @@ _bt_preprocess_keys(IndexScanDesc scan) if (numberOfKeys < 1) return; /* done if qual-less scan */ - inkeys = scan->keyData; + /* + * Read so->arrayKeyData if array keys are present, else scan->keyData + */ + if (so->arrayKeyData != NULL) + inkeys = so->arrayKeyData; + else + inkeys = scan->keyData; + outkeys = so->keyData; cur = &inkeys[0]; /* we check that input keys are correctly ordered */ diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 6d073bf5fd..e3be5a2cae 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -647,11 +647,13 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * as specified in access/skey.h. The elements of the row comparison * can have either constant or non-constant comparison values. * - * 4. ScalarArrayOpExpr ("indexkey op ANY (array-expression)"). For these, + * 4. ScalarArrayOpExpr ("indexkey op ANY (array-expression)"). If the index + * has rd_am->amsearcharray, we handle these the same as simple operators, + * setting the SK_SEARCHARRAY flag to tell the AM to handle them. Otherwise, * we create a ScanKey with everything filled in except the comparison value, * and set up an IndexArrayKeyInfo struct to drive processing of the qual. - * (Note that we treat all array-expressions as requiring runtime evaluation, - * even if they happen to be constants.) + * (Note that if we use an IndexArrayKeyInfo struct, the array expression is + * always treated as requiring runtime evaluation, even if it's a constant.) * * 5. NullTest ("indexkey IS NULL/IS NOT NULL"). We just fill in the * ScanKey properly. @@ -680,7 +682,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * *numArrayKeys: receives number of array keys * * Caller may pass NULL for arrayKeys and numArrayKeys to indicate that - * ScalarArrayOpExpr quals are not supported. + * IndexArrayKeyInfos are not supported. */ void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, @@ -981,6 +983,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, { /* indexkey op ANY (array-expression) */ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + int flags = 0; + Datum scanvalue; Assert(!isorderby); @@ -1027,23 +1031,72 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Assert(rightop != NULL); - array_keys[n_array_keys].scan_key = this_scan_key; - array_keys[n_array_keys].array_expr = - ExecInitExpr(rightop, planstate); - /* the remaining fields were zeroed by palloc0 */ - n_array_keys++; + if (index->rd_am->amsearcharray) + { + /* Index AM will handle this like a simple operator */ + flags |= SK_SEARCHARRAY; + if (IsA(rightop, Const)) + { + /* OK, simple constant comparison value */ + scanvalue = ((Const *) rightop)->constvalue; + if (((Const *) rightop)->constisnull) + flags |= SK_ISNULL; + } + else + { + /* Need to treat this one as a runtime key */ + if (n_runtime_keys >= max_runtime_keys) + { + if (max_runtime_keys == 0) + { + max_runtime_keys = 8; + runtime_keys = (IndexRuntimeKeyInfo *) + palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + else + { + max_runtime_keys *= 2; + runtime_keys = (IndexRuntimeKeyInfo *) + repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + } + runtime_keys[n_runtime_keys].scan_key = this_scan_key; + runtime_keys[n_runtime_keys].key_expr = + ExecInitExpr(rightop, planstate); + + /* + * Careful here: the runtime expression is not of + * op_righttype, but rather is an array of same; so + * TypeIsToastable() isn't helpful. However, we can + * assume that all array types are toastable. + */ + runtime_keys[n_runtime_keys].key_toastable = true; + n_runtime_keys++; + scanvalue = (Datum) 0; + } + } + else + { + /* Executor has to expand the array value */ + array_keys[n_array_keys].scan_key = this_scan_key; + array_keys[n_array_keys].array_expr = + ExecInitExpr(rightop, planstate); + /* the remaining fields were zeroed by palloc0 */ + n_array_keys++; + scanvalue = (Datum) 0; + } /* * initialize the scan key's fields appropriately */ ScanKeyEntryInitialize(this_scan_key, - 0, /* flags */ + flags, varattno, /* attribute number to scan */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ saop->inputcollid, /* collation */ opfuncid, /* reg proc to use */ - (Datum) 0); /* constant */ + scanvalue); /* constant */ } else if (IsA(clause, NullTest)) { diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index f821b508d6..348c36b40e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -394,9 +394,14 @@ cost_index(IndexPath *path, PlannerInfo *root, if (indexonly) pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); - min_IO_cost = spc_random_page_cost; - if (pages_fetched > 1) - min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost; + if (pages_fetched > 0) + { + min_IO_cost = spc_random_page_cost; + if (pages_fetched > 1) + min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost; + } + else + min_IO_cost = 0; } /* diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index ece326d885..940efb38b6 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -48,9 +48,9 @@ /* Whether to use ScalarArrayOpExpr to build index qualifications */ typedef enum { - SAOP_FORBID, /* Do not use ScalarArrayOpExpr */ - SAOP_ALLOW, /* OK to use ScalarArrayOpExpr */ - SAOP_REQUIRE /* Require ScalarArrayOpExpr */ + SAOP_PER_AM, /* Use ScalarArrayOpExpr if amsearcharray */ + SAOP_ALLOW, /* Use ScalarArrayOpExpr for all indexes */ + SAOP_REQUIRE /* Require ScalarArrayOpExpr to be used */ } SaOpControl; /* Whether we are looking for plain indexscan, bitmap scan, or either */ @@ -196,7 +196,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) */ indexpaths = find_usable_indexes(root, rel, rel->baserestrictinfo, NIL, - true, NULL, SAOP_FORBID, ST_ANYSCAN); + true, NULL, SAOP_PER_AM, ST_ANYSCAN); /* * Submit all the ones that can form plain IndexScan plans to add_path. @@ -233,8 +233,9 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) bitindexpaths = list_concat(bitindexpaths, indexpaths); /* - * Likewise, generate paths using ScalarArrayOpExpr clauses; these can't - * be simple indexscans but they can be used in bitmap scans. + * Likewise, generate paths using executor-managed ScalarArrayOpExpr + * clauses; these can't be simple indexscans but they can be used in + * bitmap scans. */ indexpaths = find_saop_paths(root, rel, rel->baserestrictinfo, NIL, @@ -337,6 +338,14 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, break; } + /* + * If we're doing find_saop_paths(), we can skip indexes that support + * ScalarArrayOpExpr natively. We already generated all the potential + * indexpaths for them, so no need to do anything more. + */ + if (saop_control == SAOP_REQUIRE && index->amsearcharray) + continue; + /* * Ignore partial indexes that do not match the query. If a partial * index is marked predOK then we know it's OK; otherwise, if we are @@ -492,10 +501,10 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, /* * find_saop_paths - * Find all the potential indexpaths that make use of ScalarArrayOpExpr - * clauses. The executor only supports these in bitmap scans, not - * plain indexscans, so we need to segregate them from the normal case. - * Otherwise, same API as find_usable_indexes(). + * Find all the potential indexpaths that make use of executor-managed + * ScalarArrayOpExpr clauses. The executor only supports these in bitmap + * scans, not plain indexscans, so we need to segregate them from the + * normal case. Otherwise, same API as find_usable_indexes(). * Returns a list of IndexPaths. */ static List * @@ -1287,9 +1296,10 @@ group_clauses_by_indexkey(IndexOptInfo *index, * expand_indexqual_rowcompare(). * * It is also possible to match ScalarArrayOpExpr clauses to indexes, when - * the clause is of the form "indexkey op ANY (arrayconst)". Since the - * executor can only handle these in the context of bitmap index scans, - * our caller specifies whether to allow these or not. + * the clause is of the form "indexkey op ANY (arrayconst)". Since not + * all indexes handle these natively, and the executor implements them + * only in the context of bitmap index scans, our caller specifies whether + * to allow these or not. * * For boolean indexes, it is also possible to match the clause directly * to the indexkey; or perhaps the clause is (NOT indexkey). @@ -1357,8 +1367,8 @@ match_clause_to_indexcol(IndexOptInfo *index, expr_coll = ((OpExpr *) clause)->inputcollid; plain_op = true; } - else if (saop_control != SAOP_FORBID && - clause && IsA(clause, ScalarArrayOpExpr)) + else if (clause && IsA(clause, ScalarArrayOpExpr) && + (index->amsearcharray || saop_control != SAOP_PER_AM)) { ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; @@ -2089,12 +2099,12 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, /* * Find all the index paths that are usable for this join, except for - * stuff involving OR and ScalarArrayOpExpr clauses. + * stuff involving OR and executor-managed ScalarArrayOpExpr clauses. */ allindexpaths = find_usable_indexes(root, rel, clause_list, NIL, false, outer_rel, - SAOP_FORBID, + SAOP_PER_AM, ST_ANYSCAN); /* @@ -2123,8 +2133,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, outer_rel)); /* - * Likewise, generate paths using ScalarArrayOpExpr clauses; these can't - * be simple indexscans but they can be used in bitmap scans. + * Likewise, generate paths using executor-managed ScalarArrayOpExpr + * clauses; these can't be simple indexscans but they can be used in + * bitmap scans. */ bitindexpaths = list_concat(bitindexpaths, find_saop_paths(root, rel, diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index aa436004f8..bb80952242 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -215,6 +215,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->amcanorderbyop = indexRelation->rd_am->amcanorderbyop; info->amcanreturn = indexRelation->rd_am->amcanreturn; info->amoptionalkey = indexRelation->rd_am->amoptionalkey; + info->amsearcharray = indexRelation->rd_am->amsearcharray; info->amsearchnulls = indexRelation->rd_am->amsearchnulls; info->amhasgettuple = OidIsValid(indexRelation->rd_am->amgettuple); info->amhasgetbitmap = OidIsValid(indexRelation->rd_am->amgetbitmap); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 8ceea820bd..96946281da 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6385,14 +6385,7 @@ btcostestimate(PG_FUNCTION_ARGS) * is that multiple columns dilute the importance of the first column's * ordering, but don't negate it entirely. Before 8.0 we divided the * correlation by the number of columns, but that seems too strong.) - * - * We can skip all this if we found a ScalarArrayOpExpr, because then the - * call must be for a bitmap index scan, and the caller isn't going to - * care what the index correlation is. */ - if (found_saop) - PG_RETURN_VOID(); - MemSet(&vardata, 0, sizeof(vardata)); if (index->indexkeys[0] != 0) diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 199fc94026..347d9423ba 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -525,6 +525,15 @@ typedef BTScanPosData *BTScanPos; #define BTScanPosIsValid(scanpos) BufferIsValid((scanpos).buf) +/* We need one of these for each equality-type SK_SEARCHARRAY scan key */ +typedef struct BTArrayKeyInfo +{ + int scan_key; /* index of associated key in arrayKeyData */ + int cur_elem; /* index of current element in elem_values */ + int num_elems; /* number of elems in current array value */ + Datum *elem_values; /* array of num_elems Datums */ +} BTArrayKeyInfo; + typedef struct BTScanOpaqueData { /* these fields are set by _bt_preprocess_keys(): */ @@ -532,6 +541,13 @@ typedef struct BTScanOpaqueData int numberOfKeys; /* number of preprocessed scan keys */ ScanKey keyData; /* array of preprocessed scan keys */ + /* workspace for SK_SEARCHARRAY support */ + ScanKey arrayKeyData; /* modified copy of scan->keyData */ + int numArrayKeys; /* number of equality-type array keys (-1 if + * there are any unsatisfiable array keys) */ + BTArrayKeyInfo *arrayKeys; /* info about each equality-type array key */ + MemoryContext arrayContext; /* scan-lifespan context for array data */ + /* info about killed items if any (killedItems is NULL if never used) */ int *killedItems; /* currPos.items indexes of killed items */ int numKilled; /* number of currently stored items */ @@ -639,6 +655,9 @@ extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup); extern ScanKey _bt_mkscankey_nodata(Relation rel); extern void _bt_freeskey(ScanKey skey); extern void _bt_freestack(BTStack stack); +extern void _bt_preprocess_array_keys(IndexScanDesc scan); +extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir); +extern bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir); extern void _bt_preprocess_keys(IndexScanDesc scan); extern IndexTuple _bt_checkkeys(IndexScanDesc scan, Page page, OffsetNumber offnum, diff --git a/src/include/access/skey.h b/src/include/access/skey.h index a82e46ee0e..b9c61cd10a 100644 --- a/src/include/access/skey.h +++ b/src/include/access/skey.h @@ -55,18 +55,27 @@ typedef uint16 StrategyNumber; * If the operator is collation-sensitive, sk_collation must be set * correctly as well. * + * A ScanKey can also represent a ScalarArrayOpExpr, that is a condition + * "column op ANY(ARRAY[...])". This is signaled by the SK_SEARCHARRAY + * flag bit. The sk_argument is not a value of the operator's right-hand + * argument type, but rather an array of such values, and the per-element + * comparisons are to be ORed together. + * * A ScanKey can also represent a condition "column IS NULL" or "column * IS NOT NULL"; these cases are signaled by the SK_SEARCHNULL and * SK_SEARCHNOTNULL flag bits respectively. The argument is always NULL, * and the sk_strategy, sk_subtype, sk_collation, and sk_func fields are - * not used (unless set by the index AM). Currently, SK_SEARCHNULL and - * SK_SEARCHNOTNULL are supported only for index scans, not heap scans; - * and not all index AMs support them. + * not used (unless set by the index AM). + * + * SK_SEARCHARRAY, SK_SEARCHNULL and SK_SEARCHNOTNULL are supported only + * for index scans, not heap scans; and not all index AMs support them, + * only those that set amsearcharray or amsearchnulls respectively. * * A ScanKey can also represent an ordering operator invocation, that is * an ordering requirement "ORDER BY indexedcol op constant". This looks * the same as a comparison operator, except that the operator doesn't * (usually) yield boolean. We mark such ScanKeys with SK_ORDER_BY. + * SK_SEARCHARRAY, SK_SEARCHNULL, SK_SEARCHNOTNULL cannot be used here. * * Note: in some places, ScanKeys are used as a convenient representation * for the invocation of an access method support procedure. In this case @@ -114,6 +123,7 @@ typedef ScanKeyData *ScanKey; * opclass, NOT the operator's implementation function. * sk_strategy must be the same in all elements of the subsidiary array, * that is, the same as in the header entry. + * SK_SEARCHARRAY, SK_SEARCHNULL, SK_SEARCHNOTNULL cannot be used here. */ /* @@ -128,10 +138,11 @@ typedef ScanKeyData *ScanKey; #define SK_ROW_HEADER 0x0004 /* row comparison header (see above) */ #define SK_ROW_MEMBER 0x0008 /* row comparison member (see above) */ #define SK_ROW_END 0x0010 /* last row comparison member */ -#define SK_SEARCHNULL 0x0020 /* scankey represents "col IS NULL" */ -#define SK_SEARCHNOTNULL 0x0040 /* scankey represents "col IS NOT +#define SK_SEARCHARRAY 0x0020 /* scankey represents ScalarArrayOp */ +#define SK_SEARCHNULL 0x0040 /* scankey represents "col IS NULL" */ +#define SK_SEARCHNOTNULL 0x0080 /* scankey represents "col IS NOT * NULL" */ -#define SK_ORDER_BY 0x0080 /* scankey is for ORDER BY op */ +#define SK_ORDER_BY 0x0100 /* scankey is for ORDER BY op */ /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 8fff3675ef..8097545faa 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201110141 +#define CATALOG_VERSION_NO 201110161 #endif diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index c3c864f95f..8b075d30d6 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -47,6 +47,7 @@ CATALOG(pg_am,2601) bool amcanmulticol; /* does AM support multi-column indexes? */ bool amcanreturn; /* can AM return IndexTuples? */ bool amoptionalkey; /* can query omit key for the first column? */ + bool amsearcharray; /* can AM handle ScalarArrayOpExpr quals? */ bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */ bool amstorage; /* can storage type differ from column type? */ bool amclusterable; /* does AM support cluster command? */ @@ -79,7 +80,7 @@ typedef FormData_pg_am *Form_pg_am; * compiler constants for pg_am * ---------------- */ -#define Natts_pg_am 29 +#define Natts_pg_am 30 #define Anum_pg_am_amname 1 #define Anum_pg_am_amstrategies 2 #define Anum_pg_am_amsupport 3 @@ -90,41 +91,42 @@ typedef FormData_pg_am *Form_pg_am; #define Anum_pg_am_amcanmulticol 8 #define Anum_pg_am_amcanreturn 9 #define Anum_pg_am_amoptionalkey 10 -#define Anum_pg_am_amsearchnulls 11 -#define Anum_pg_am_amstorage 12 -#define Anum_pg_am_amclusterable 13 -#define Anum_pg_am_ampredlocks 14 -#define Anum_pg_am_amkeytype 15 -#define Anum_pg_am_aminsert 16 -#define Anum_pg_am_ambeginscan 17 -#define Anum_pg_am_amgettuple 18 -#define Anum_pg_am_amgetbitmap 19 -#define Anum_pg_am_amrescan 20 -#define Anum_pg_am_amendscan 21 -#define Anum_pg_am_ammarkpos 22 -#define Anum_pg_am_amrestrpos 23 -#define Anum_pg_am_ambuild 24 -#define Anum_pg_am_ambuildempty 25 -#define Anum_pg_am_ambulkdelete 26 -#define Anum_pg_am_amvacuumcleanup 27 -#define Anum_pg_am_amcostestimate 28 -#define Anum_pg_am_amoptions 29 +#define Anum_pg_am_amsearcharray 11 +#define Anum_pg_am_amsearchnulls 12 +#define Anum_pg_am_amstorage 13 +#define Anum_pg_am_amclusterable 14 +#define Anum_pg_am_ampredlocks 15 +#define Anum_pg_am_amkeytype 16 +#define Anum_pg_am_aminsert 17 +#define Anum_pg_am_ambeginscan 18 +#define Anum_pg_am_amgettuple 19 +#define Anum_pg_am_amgetbitmap 20 +#define Anum_pg_am_amrescan 21 +#define Anum_pg_am_amendscan 22 +#define Anum_pg_am_ammarkpos 23 +#define Anum_pg_am_amrestrpos 24 +#define Anum_pg_am_ambuild 25 +#define Anum_pg_am_ambuildempty 26 +#define Anum_pg_am_ambulkdelete 27 +#define Anum_pg_am_amvacuumcleanup 28 +#define Anum_pg_am_amcostestimate 29 +#define Anum_pg_am_amoptions 30 /* ---------------- * initial contents of pg_am * ---------------- */ -DATA(insert OID = 403 ( btree 5 1 t f t t t t t t f t t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbuildempty btbulkdelete btvacuumcleanup btcostestimate btoptions )); +DATA(insert OID = 403 ( btree 5 1 t f t t t t t t t f t t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbuildempty btbulkdelete btvacuumcleanup btcostestimate btoptions )); DESCR("b-tree index access method"); #define BTREE_AM_OID 403 -DATA(insert OID = 405 ( hash 1 1 f f t f f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); +DATA(insert OID = 405 ( hash 1 1 f f t f f f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); DESCR("hash index access method"); #define HASH_AM_OID 405 -DATA(insert OID = 783 ( gist 0 8 f t f f t f t t t t f 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); +DATA(insert OID = 783 ( gist 0 8 f t f f t f t f t t t f 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 -DATA(insert OID = 2742 ( gin 0 5 f f f f t f t f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); +DATA(insert OID = 2742 ( gin 0 5 f f f f t f t f f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); DESCR("GIN index access method"); #define GIN_AM_OID 2742 diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index ef84e9f138..2925d7e765 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -490,8 +490,9 @@ typedef struct IndexOptInfo bool unique; /* true if a unique index */ bool hypothetical; /* true if index doesn't really exist */ bool amcanorderbyop; /* does AM support order by operator result? */ - bool amcanreturn; /* does AM know how to return tuples? */ + bool amcanreturn; /* can AM return IndexTuples? */ bool amoptionalkey; /* can query omit key for the first column? */ + bool amsearcharray; /* can AM handle ScalarArrayOpExpr quals? */ bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */ bool amhasgettuple; /* does AM have amgettuple interface? */ bool amhasgetbitmap; /* does AM have amgetbitmap interface? */