From d583f10b7e0b9e1ed18f339f3177ed42ac2f7570 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 2 Dec 2010 20:50:48 -0500 Subject: [PATCH] Create core infrastructure for KNNGIST. This is a heavily revised version of builtin_knngist_core-0.9. The ordering operators are no longer mixed in with actual quals, which would have confused not only humans but significant parts of the planner. Instead, ordering operators are carried separately throughout planning and execution. Since the API for ambeginscan and amrescan functions had to be changed anyway, this commit takes the opportunity to rationalize that a bit. RelationGetIndexScan no longer forces a premature index_rescan call; instead, callers of index_beginscan must call index_rescan too. Aside from making the AM-side initialization logic a bit less peculiar, this has the advantage that we do not make a useless extra am_rescan call when there are runtime key values. AMs formerly could not assume that the key values passed to amrescan were actually valid; now they can. Teodor Sigaev and Tom Lane --- doc/src/sgml/catalogs.sgml | 6 +- doc/src/sgml/indexam.sgml | 69 ++++--- src/backend/access/gin/ginscan.c | 55 +++--- src/backend/access/gist/gistscan.c | 72 ++++---- src/backend/access/hash/hash.c | 40 ++--- src/backend/access/index/genam.c | 33 ++-- src/backend/access/index/indexam.c | 42 +++-- src/backend/access/nbtree/nbtree.c | 39 ++-- src/backend/commands/cluster.c | 4 +- src/backend/commands/explain.c | 2 + src/backend/executor/execQual.c | 2 +- src/backend/executor/execUtils.c | 4 +- src/backend/executor/nodeBitmapIndexscan.c | 24 ++- src/backend/executor/nodeIndexscan.c | 162 ++++++++++++----- src/backend/executor/nodeMergejoin.c | 2 +- src/backend/nodes/copyfuncs.c | 2 + src/backend/nodes/outfuncs.c | 3 + src/backend/optimizer/path/costsize.c | 11 +- src/backend/optimizer/path/indxpath.c | 198 ++++++++++++++++++++- src/backend/optimizer/plan/createplan.c | 86 ++++++++- src/backend/optimizer/plan/planner.c | 2 +- src/backend/optimizer/plan/setrefs.c | 4 + src/backend/optimizer/plan/subselect.c | 5 +- src/backend/optimizer/util/pathnode.c | 6 +- src/backend/utils/adt/selfuncs.c | 89 +++++---- src/backend/utils/cache/lsyscache.c | 31 +++- src/include/access/genam.h | 10 +- src/include/access/relscan.h | 6 +- src/include/access/skey.h | 6 + src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_am.h | 6 +- src/include/catalog/pg_proc.h | 16 +- src/include/executor/nodeIndexscan.h | 4 +- src/include/nodes/execnodes.h | 18 +- src/include/nodes/plannodes.h | 19 +- src/include/nodes/relation.h | 8 + src/include/optimizer/cost.h | 2 +- src/include/optimizer/pathnode.h | 1 + src/include/pg_config_manual.h | 2 +- src/include/utils/lsyscache.h | 3 +- 40 files changed, 786 insertions(+), 310 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 54a6dcc102..217a04e499 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -510,7 +510,7 @@ ambeginscan regproc pg_proc.oid - Start new scan function + Prepare for index scan function @@ -531,14 +531,14 @@ amrescan regproc pg_proc.oid - Restart this scan function + (Re)start index scan function amendscan regproc pg_proc.oid - End this scan function + Clean up after index scan function diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 925aac4571..d0905eb3e2 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -268,6 +268,7 @@ void amcostestimate (PlannerInfo *root, IndexOptInfo *index, List *indexQuals, + List *indexOrderBys, RelOptInfo *outer_rel, Cost *indexStartupCost, Cost *indexTotalCost, @@ -318,19 +319,42 @@ amoptions (ArrayType *reloptions, IndexScanDesc ambeginscan (Relation indexRelation, int nkeys, - ScanKey key); + int norderbys); - Begin a new scan. The key array (of length nkeys) - describes the scan key(s) for the index scan. The result must be a - palloc'd struct. For implementation reasons the index access method + Prepare for an index scan. The nkeys and norderbys + parameters indicate the number of quals and ordering operators that will be + used in the scan; these may be useful for space allocation purposes. + Note that the actual values of the scan keys aren't provided yet. + The result must be a palloc'd struct. + For implementation reasons the index access method must create this struct by calling RelationGetIndexScan(). In most cases - ambeginscan itself does little beyond making that call; + ambeginscan does little beyond making that call and perhaps + acquiring locks; the interesting parts of index-scan startup are in amrescan. +void +amrescan (IndexScanDesc scan, + ScanKey keys, + int nkeys, + ScanKey orderbys, + int norderbys); + + Start or restart an indexscan, possibly with new scan keys. (To restart + using previously-passed keys, NULL is passed for keys and/or + orderbys.) Note that it is not allowed for + the number of keys or order-by operators to be larger than + what was passed to ambeginscan. In practice the restart + feature is used when a new outer tuple is selected by a nested-loop join + and so a new key comparison value is needed, but the scan key structure + remains the same. + + + + boolean amgettuple (IndexScanDesc scan, ScanDirection direction); @@ -393,22 +417,6 @@ amgetbitmap (IndexScanDesc scan, void -amrescan (IndexScanDesc scan, - ScanKey key); - - Restart the given scan, possibly with new scan keys (to continue using - the old keys, NULL is passed for key). Note that it is not - possible for the number of keys to be changed. In practice the restart - feature is used when a new outer tuple is selected by a nested-loop join - and so a new key comparison value is needed, but the scan key structure - remains the same. This function is also called by - RelationGetIndexScan(), so it is used for initial setup - of an index scan as well as rescanning. - - - - -void amendscan (IndexScanDesc scan); End a scan and release resources. The scan struct itself @@ -820,8 +828,9 @@ amrestrpos (IndexScanDesc scan); Index Cost Estimation Functions - The amcostestimate function is given a list of WHERE clauses that have - been determined to be usable with the index. It must return estimates + The amcostestimate function is given information describing + a possible index scan, including lists of WHERE and ORDER BY clauses that + have been determined to be usable with the index. It must return estimates of the cost of accessing the index and the selectivity of the WHERE clauses (that is, the fraction of parent-table rows that will be retrieved during the index scan). For simple cases, nearly all the @@ -839,6 +848,7 @@ void amcostestimate (PlannerInfo *root, IndexOptInfo *index, List *indexQuals, + List *indexOrderBys, RelOptInfo *outer_rel, Cost *indexStartupCost, Cost *indexTotalCost, @@ -846,7 +856,7 @@ amcostestimate (PlannerInfo *root, double *indexCorrelation); - The first four parameters are inputs: + The first five parameters are inputs: @@ -873,6 +883,17 @@ amcostestimate (PlannerInfo *root, List of index qual clauses (implicitly ANDed); a NIL list indicates no qualifiers are available. + Note that the list contains expression trees with RestrictInfo nodes + at the top, not ScanKeys. + + + + + + indexOrderBys + + + List of indexable ORDER BY operators, or NIL if none. Note that the list contains expression trees, not ScanKeys. diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index a6604c4c93..3a5e52dc38 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -26,11 +26,28 @@ Datum ginbeginscan(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); - int keysz = PG_GETARG_INT32(1); - ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); IndexScanDesc scan; + GinScanOpaque so; - scan = RelationGetIndexScan(rel, keysz, scankey); + /* no order by operators allowed */ + Assert(norderbys == 0); + + scan = RelationGetIndexScan(rel, nkeys, norderbys); + + /* allocate private workspace */ + so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData)); + so->keys = NULL; + so->nkeys = 0; + so->tempCtx = AllocSetContextCreate(CurrentMemoryContext, + "Gin scan temporary context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + initGinState(&so->ginstate, scan->indexRelation); + + scan->opaque = so; PG_RETURN_POINTER(scan); } @@ -241,27 +258,10 @@ ginrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); - GinScanOpaque so; - - so = (GinScanOpaque) scan->opaque; - - if (so == NULL) - { - /* if called from ginbeginscan */ - so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData)); - so->tempCtx = AllocSetContextCreate(CurrentMemoryContext, - "Gin scan temporary context", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - initGinState(&so->ginstate, scan->indexRelation); - scan->opaque = so; - } - else - { - freeScanKeys(so->keys, so->nkeys); - } + /* remaining arguments are ignored */ + GinScanOpaque so = (GinScanOpaque) scan->opaque; + freeScanKeys(so->keys, so->nkeys); so->keys = NULL; if (scankey && scan->numberOfKeys > 0) @@ -280,14 +280,11 @@ ginendscan(PG_FUNCTION_ARGS) IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); GinScanOpaque so = (GinScanOpaque) scan->opaque; - if (so != NULL) - { - freeScanKeys(so->keys, so->nkeys); + freeScanKeys(so->keys, so->nkeys); - MemoryContextDelete(so->tempCtx); + MemoryContextDelete(so->tempCtx); - pfree(so); - } + pfree(so); PG_RETURN_VOID(); } diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 21f4ea54b7..106714511a 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -28,10 +28,24 @@ gistbeginscan(PG_FUNCTION_ARGS) { Relation r = (Relation) PG_GETARG_POINTER(0); int nkeys = PG_GETARG_INT32(1); - ScanKey key = (ScanKey) PG_GETARG_POINTER(2); + int norderbys = PG_GETARG_INT32(2); IndexScanDesc scan; + GISTScanOpaque so; - scan = RelationGetIndexScan(r, nkeys, key); + /* no order by operators allowed */ + Assert(norderbys == 0); + + scan = RelationGetIndexScan(r, nkeys, norderbys); + + /* initialize opaque data */ + so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); + so->stack = NULL; + so->tempCxt = createTempGistContext(); + so->curbuf = InvalidBuffer; + so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); + initGISTstate(so->giststate, scan->indexRelation); + + scan->opaque = so; PG_RETURN_POINTER(scan); } @@ -41,33 +55,18 @@ gistrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey key = (ScanKey) PG_GETARG_POINTER(1); - GISTScanOpaque so; + /* remaining arguments are ignored */ + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; int i; - so = (GISTScanOpaque) scan->opaque; - if (so != NULL) + /* rescan an existing indexscan --- reset state */ + gistfreestack(so->stack); + so->stack = NULL; + /* drop pins on buffers -- no locks held */ + if (BufferIsValid(so->curbuf)) { - /* rescan an existing indexscan --- reset state */ - gistfreestack(so->stack); - so->stack = NULL; - /* drop pins on buffers -- no locks held */ - if (BufferIsValid(so->curbuf)) - { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - } - } - else - { - /* initialize opaque data */ - so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); - so->stack = NULL; - so->tempCxt = createTempGistContext(); + ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; - so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); - initGISTstate(so->giststate, scan->indexRelation); - - scan->opaque = so; } /* @@ -130,21 +129,16 @@ Datum gistendscan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - GISTScanOpaque so; + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; - so = (GISTScanOpaque) scan->opaque; - - if (so != NULL) - { - gistfreestack(so->stack); - if (so->giststate != NULL) - freeGISTstate(so->giststate); - /* drop pins on buffers -- we aren't holding any locks */ - if (BufferIsValid(so->curbuf)) - ReleaseBuffer(so->curbuf); - MemoryContextDelete(so->tempCxt); - pfree(scan->opaque); - } + gistfreestack(so->stack); + if (so->giststate != NULL) + freeGISTstate(so->giststate); + /* drop pins on buffers -- we aren't holding any locks */ + if (BufferIsValid(so->curbuf)) + ReleaseBuffer(so->curbuf); + MemoryContextDelete(so->tempCxt); + pfree(so); PG_RETURN_VOID(); } diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index bb46446d71..e53ec3d5ea 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -366,12 +366,16 @@ Datum hashbeginscan(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); - int keysz = PG_GETARG_INT32(1); - ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); IndexScanDesc scan; HashScanOpaque so; - scan = RelationGetIndexScan(rel, keysz, scankey); + /* no order by operators allowed */ + Assert(norderbys == 0); + + scan = RelationGetIndexScan(rel, nkeys, norderbys); + so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData)); so->hashso_bucket_valid = false; so->hashso_bucket_blkno = 0; @@ -396,26 +400,23 @@ hashrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); + /* remaining arguments are ignored */ HashScanOpaque so = (HashScanOpaque) scan->opaque; Relation rel = scan->indexRelation; - /* if we are called from beginscan, so is still NULL */ - if (so) - { - /* release any pin we still hold */ - if (BufferIsValid(so->hashso_curbuf)) - _hash_dropbuf(rel, so->hashso_curbuf); - so->hashso_curbuf = InvalidBuffer; + /* release any pin we still hold */ + if (BufferIsValid(so->hashso_curbuf)) + _hash_dropbuf(rel, so->hashso_curbuf); + so->hashso_curbuf = InvalidBuffer; - /* release lock on bucket, too */ - if (so->hashso_bucket_blkno) - _hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE); - so->hashso_bucket_blkno = 0; + /* release lock on bucket, too */ + if (so->hashso_bucket_blkno) + _hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE); + so->hashso_bucket_blkno = 0; - /* set position invalid (this will cause _hash_first call) */ - ItemPointerSetInvalid(&(so->hashso_curpos)); - ItemPointerSetInvalid(&(so->hashso_heappos)); - } + /* set position invalid (this will cause _hash_first call) */ + ItemPointerSetInvalid(&(so->hashso_curpos)); + ItemPointerSetInvalid(&(so->hashso_heappos)); /* Update scan key, if a new one is given */ if (scankey && scan->numberOfKeys > 0) @@ -423,8 +424,7 @@ hashrescan(PG_FUNCTION_ARGS) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); - if (so) - so->hashso_bucket_valid = false; + so->hashso_bucket_valid = false; } PG_RETURN_VOID(); diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index cd0212aa94..d0eaa36b3b 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -57,22 +57,20 @@ /* ---------------- * RelationGetIndexScan -- Create and fill an IndexScanDesc. * - * This routine creates an index scan structure and sets its contents - * up correctly. This routine calls AMrescan to set up the scan with - * the passed key. + * This routine creates an index scan structure and sets up initial + * contents for it. * * Parameters: * indexRelation -- index relation for scan. - * nkeys -- count of scan keys. - * key -- array of scan keys to restrict the index scan. + * nkeys -- count of scan keys (index qual conditions). + * norderbys -- count of index order-by operators. * * Returns: * An initialized IndexScanDesc. * ---------------- */ IndexScanDesc -RelationGetIndexScan(Relation indexRelation, - int nkeys, ScanKey key) +RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) { IndexScanDesc scan; @@ -82,15 +80,19 @@ RelationGetIndexScan(Relation indexRelation, scan->indexRelation = indexRelation; scan->xs_snapshot = SnapshotNow; /* may be set later */ scan->numberOfKeys = nkeys; + scan->numberOfOrderBys = norderbys; /* - * We allocate the key space here, but the AM is responsible for actually - * filling it from the passed key array. + * We allocate key workspace here, but it won't get filled until amrescan. */ if (nkeys > 0) scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else scan->keyData = NULL; + if (norderbys > 0) + scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys); + else + scan->orderByData = NULL; /* * During recovery we ignore killed tuples and don't bother to kill them @@ -115,11 +117,6 @@ RelationGetIndexScan(Relation indexRelation, scan->xs_next_hot = InvalidOffsetNumber; scan->xs_prev_xmax = InvalidTransactionId; - /* - * Let the AM fill in the key and any opaque data it wants. - */ - index_rescan(scan, key); - return scan; } @@ -140,6 +137,8 @@ IndexScanEnd(IndexScanDesc scan) { if (scan->keyData != NULL) pfree(scan->keyData); + if (scan->orderByData != NULL) + pfree(scan->orderByData); pfree(scan); } @@ -286,7 +285,8 @@ systable_beginscan(Relation heapRelation, } sysscan->iscan = index_beginscan(heapRelation, irel, - snapshot, nkeys, key); + snapshot, nkeys, 0); + index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; } else @@ -450,7 +450,8 @@ systable_beginscan_ordered(Relation heapRelation, } sysscan->iscan = index_beginscan(heapRelation, indexRelation, - snapshot, nkeys, key); + snapshot, nkeys, 0); + index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; return sysscan; diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index d151ffda8c..8c79c6149b 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -114,7 +114,7 @@ do { \ } while(0) static IndexScanDesc index_beginscan_internal(Relation indexRelation, - int nkeys, ScanKey key); + int nkeys, int norderbys); /* ---------------------------------------------------------------- @@ -213,11 +213,11 @@ IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, - int nkeys, ScanKey key) + int nkeys, int norderbys) { IndexScanDesc scan; - scan = index_beginscan_internal(indexRelation, nkeys, key); + scan = index_beginscan_internal(indexRelation, nkeys, norderbys); /* * Save additional parameters into the scandesc. Everything else was set @@ -238,11 +238,11 @@ index_beginscan(Relation heapRelation, IndexScanDesc index_beginscan_bitmap(Relation indexRelation, Snapshot snapshot, - int nkeys, ScanKey key) + int nkeys) { IndexScanDesc scan; - scan = index_beginscan_internal(indexRelation, nkeys, key); + scan = index_beginscan_internal(indexRelation, nkeys, 0); /* * Save additional parameters into the scandesc. Everything else was set @@ -258,7 +258,7 @@ index_beginscan_bitmap(Relation indexRelation, */ static IndexScanDesc index_beginscan_internal(Relation indexRelation, - int nkeys, ScanKey key) + int nkeys, int norderbys) { IndexScanDesc scan; FmgrInfo *procedure; @@ -278,7 +278,7 @@ index_beginscan_internal(Relation indexRelation, DatumGetPointer(FunctionCall3(procedure, PointerGetDatum(indexRelation), Int32GetDatum(nkeys), - PointerGetDatum(key))); + Int32GetDatum(norderbys))); return scan; } @@ -286,23 +286,28 @@ index_beginscan_internal(Relation indexRelation, /* ---------------- * index_rescan - (re)start a scan of an index * - * The caller may specify a new set of scankeys (but the number of keys - * cannot change). To restart the scan without changing keys, pass NULL - * for the key array. - * - * Note that this is also called when first starting an indexscan; - * see RelationGetIndexScan. Keys *must* be passed in that case, - * unless scan->numberOfKeys is zero. + * During a restart, the caller may specify a new set of scankeys and/or + * orderbykeys; but the number of keys cannot differ from what index_beginscan + * was told. (Later we might relax that to "must not exceed", but currently + * the index AMs tend to assume that scan->numberOfKeys is what to believe.) + * To restart the scan without changing keys, pass NULL for the key arrays. + * (Of course, keys *must* be passed on the first call, unless + * scan->numberOfKeys is zero.) * ---------------- */ void -index_rescan(IndexScanDesc scan, ScanKey key) +index_rescan(IndexScanDesc scan, + ScanKey keys, int nkeys, + ScanKey orderbys, int norderbys) { FmgrInfo *procedure; SCAN_CHECKS; GET_SCAN_PROCEDURE(amrescan); + Assert(nkeys == scan->numberOfKeys); + Assert(norderbys == scan->numberOfOrderBys); + /* Release any held pin on a heap page */ if (BufferIsValid(scan->xs_cbuf)) { @@ -314,9 +319,12 @@ index_rescan(IndexScanDesc scan, ScanKey key) scan->kill_prior_tuple = false; /* for safety */ - FunctionCall2(procedure, + FunctionCall5(procedure, PointerGetDatum(scan), - PointerGetDatum(key)); + PointerGetDatum(keys), + Int32GetDatum(nkeys), + PointerGetDatum(orderbys), + Int32GetDatum(norderbys)); } /* ---------------- diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 46aeb9e6ad..655a40090e 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -337,12 +337,27 @@ Datum btbeginscan(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); - int keysz = PG_GETARG_INT32(1); - ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); IndexScanDesc scan; + BTScanOpaque so; + + /* no order by operators allowed */ + Assert(norderbys == 0); /* get the scan */ - scan = RelationGetIndexScan(rel, keysz, scankey); + scan = RelationGetIndexScan(rel, nkeys, norderbys); + + /* allocate private workspace */ + so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); + so->currPos.buf = so->markPos.buf = InvalidBuffer; + if (scan->numberOfKeys > 0) + so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); + else + so->keyData = NULL; + so->killedItems = NULL; /* until needed */ + so->numKilled = 0; + scan->opaque = so; PG_RETURN_POINTER(scan); } @@ -355,22 +370,8 @@ btrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); - BTScanOpaque so; - - so = (BTScanOpaque) scan->opaque; - - if (so == NULL) /* if called from btbeginscan */ - { - so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); - so->currPos.buf = so->markPos.buf = InvalidBuffer; - if (scan->numberOfKeys > 0) - so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); - else - so->keyData = NULL; - so->killedItems = NULL; /* until needed */ - so->numKilled = 0; - scan->opaque = so; - } + /* remaining arguments are ignored */ + BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index bb7cd746b1..e1dbd6d985 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -875,8 +875,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, if (OldIndex != NULL && !use_sort) { heapScan = NULL; - indexScan = index_beginscan(OldHeap, OldIndex, - SnapshotAny, 0, (ScanKey) NULL); + indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0); + index_rescan(indexScan, NULL, 0, NULL, 0); } else { diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index a5e44c046f..81885b4fb7 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1017,6 +1017,8 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_IndexScan: show_scan_qual(((IndexScan *) plan)->indexqualorig, "Index Cond", planstate, ancestors, es); + show_scan_qual(((IndexScan *) plan)->indexorderbyorig, + "Order By", planstate, ancestors, es); show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); break; case T_BitmapIndexScan: diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 27ea91c014..6bac6d0623 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -4694,7 +4694,7 @@ ExecInitExpr(Expr *node, PlanState *parent) Oid righttype; Oid proc; - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, false, &strategy, &lefttype, &righttype); diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 57806ca8f0..6ad0f1e52a 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -1211,8 +1211,8 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo, retry: conflict = false; found_self = false; - index_scan = index_beginscan(heap, index, &DirtySnapshot, - index_natts, scankeys); + index_scan = index_beginscan(heap, index, &DirtySnapshot, index_natts, 0); + index_rescan(index_scan, scankeys, index_natts, NULL, 0); while ((tup = index_getnext(index_scan, ForwardScanDirection)) != NULL) diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c index 97ce0dde29..573e294882 100644 --- a/src/backend/executor/nodeBitmapIndexscan.c +++ b/src/backend/executor/nodeBitmapIndexscan.c @@ -95,7 +95,9 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node) doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys, node->biss_NumArrayKeys); if (doscan) /* reset index scan */ - index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); + index_rescan(node->biss_ScanDesc, + node->biss_ScanKeys, node->biss_NumScanKeys, + NULL, 0); } /* must provide our own instrumentation support */ @@ -147,7 +149,9 @@ ExecReScanBitmapIndexScan(BitmapIndexScanState *node) /* reset index scan */ if (node->biss_RuntimeKeysReady) - index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); + index_rescan(node->biss_ScanDesc, + node->biss_ScanKeys, node->biss_NumScanKeys, + NULL, 0); } /* ---------------------------------------------------------------- @@ -256,6 +260,8 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) * Initialize index-specific scan state */ indexstate->biss_RuntimeKeysReady = false; + indexstate->biss_RuntimeKeys = NULL; + indexstate->biss_NumRuntimeKeys = 0; /* * build the index scan keys from the index qualification @@ -264,6 +270,7 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) indexstate->biss_RelationDesc, node->scan.scanrelid, node->indexqual, + false, &indexstate->biss_ScanKeys, &indexstate->biss_NumScanKeys, &indexstate->biss_RuntimeKeys, @@ -297,8 +304,17 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) indexstate->biss_ScanDesc = index_beginscan_bitmap(indexstate->biss_RelationDesc, estate->es_snapshot, - indexstate->biss_NumScanKeys, - indexstate->biss_ScanKeys); + indexstate->biss_NumScanKeys); + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to + * the index AM. + */ + if (indexstate->biss_NumRuntimeKeys == 0 && + indexstate->biss_NumArrayKeys == 0) + index_rescan(indexstate->biss_ScanDesc, + indexstate->biss_ScanKeys, indexstate->biss_NumScanKeys, + NULL, 0); /* * all done. diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index ee5fc72c20..3aed2960d3 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -181,7 +181,9 @@ ExecReScanIndexScan(IndexScanState *node) node->iss_RuntimeKeysReady = true; /* reset index scan */ - index_rescan(node->iss_ScanDesc, node->iss_ScanKeys); + index_rescan(node->iss_ScanDesc, + node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); ExecScanReScan(&node->ss); } @@ -480,10 +482,11 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * initialize child expressions * * Note: we don't initialize all of the indexqual expression, only the - * sub-parts corresponding to runtime keys (see below). The indexqualorig - * expression is always initialized even though it will only be used in - * some uncommon cases --- would be nice to improve that. (Problem is - * that any SubPlans present in the expression must be found now...) + * sub-parts corresponding to runtime keys (see below). Likewise for + * indexorderby, if any. But the indexqualorig expression is always + * initialized even though it will only be used in some uncommon cases --- + * would be nice to improve that. (Problem is that any SubPlans present + * in the expression must be found now...) */ indexstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, @@ -543,6 +546,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * Initialize index-specific scan state */ indexstate->iss_RuntimeKeysReady = false; + indexstate->iss_RuntimeKeys = NULL; + indexstate->iss_NumRuntimeKeys = 0; /* * build the index scan keys from the index qualification @@ -551,6 +556,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->iss_RelationDesc, node->scan.scanrelid, node->indexqual, + false, &indexstate->iss_ScanKeys, &indexstate->iss_NumScanKeys, &indexstate->iss_RuntimeKeys, @@ -558,6 +564,21 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) NULL, /* no ArrayKeys */ NULL); + /* + * any ORDER BY exprs have to be turned into scankeys in the same way + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->iss_RelationDesc, + node->scan.scanrelid, + node->indexorderby, + true, + &indexstate->iss_OrderByKeys, + &indexstate->iss_NumOrderByKeys, + &indexstate->iss_RuntimeKeys, + &indexstate->iss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + /* * If we have runtime keys, we need an ExprContext to evaluate them. The * node's standard context won't do because we want to reset that context @@ -584,7 +605,16 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->iss_RelationDesc, estate->es_snapshot, indexstate->iss_NumScanKeys, - indexstate->iss_ScanKeys); + indexstate->iss_NumOrderByKeys); + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to + * the index AM. + */ + if (indexstate->iss_NumRuntimeKeys == 0) + index_rescan(indexstate->iss_ScanDesc, + indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys, + indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys); /* * all done. @@ -624,12 +654,20 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * 5. NullTest ("indexkey IS NULL/IS NOT NULL"). We just fill in the * ScanKey properly. * + * This code is also used to prepare ORDER BY expressions for amcanorderbyop + * indexes. The behavior is exactly the same, except that we have to look up + * the operator differently. Note that only cases 1 and 2 are currently + * possible for ORDER BY. + * * Input params are: * * planstate: executor state node we are working for * index: the index we are building scan keys for * scanrelid: varno of the index's relation within current query - * quals: indexquals expressions + * quals: indexquals (or indexorderbys) expressions + * isorderby: true if processing ORDER BY exprs, false if processing quals + * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none + * *numRuntimeKeys: number of pre-existing runtime keys * * Output params are: * @@ -645,7 +683,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) */ void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, - List *quals, ScanKey *scanKeys, int *numScanKeys, + List *quals, bool isorderby, + ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys) { @@ -654,41 +693,29 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, IndexRuntimeKeyInfo *runtime_keys; IndexArrayKeyInfo *array_keys; int n_scan_keys; - int extra_scan_keys; int n_runtime_keys; + int max_runtime_keys; int n_array_keys; int j; - /* - * If there are any RowCompareExpr quals, we need extra ScanKey entries - * for them, and possibly extra runtime-key entries. Count up what's - * needed. (The subsidiary ScanKey arrays for the RowCompareExprs could - * be allocated as separate chunks, but we have to count anyway to make - * runtime_keys large enough, so might as well just do one palloc.) - */ + /* Allocate array for ScanKey structs: one per qual */ n_scan_keys = list_length(quals); - extra_scan_keys = 0; - foreach(qual_cell, quals) - { - if (IsA(lfirst(qual_cell), RowCompareExpr)) - extra_scan_keys += - list_length(((RowCompareExpr *) lfirst(qual_cell))->opnos); - } - scan_keys = (ScanKey) - palloc((n_scan_keys + extra_scan_keys) * sizeof(ScanKeyData)); - /* Allocate these arrays as large as they could possibly need to be */ - runtime_keys = (IndexRuntimeKeyInfo *) - palloc((n_scan_keys + extra_scan_keys) * sizeof(IndexRuntimeKeyInfo)); - array_keys = (IndexArrayKeyInfo *) - palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo)); - n_runtime_keys = 0; - n_array_keys = 0; + scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData)); /* - * Below here, extra_scan_keys is index of first cell to use for next - * RowCompareExpr + * runtime_keys array is dynamically resized as needed. We handle it + * this way so that the same runtime keys array can be shared between + * indexquals and indexorderbys, which will be processed in separate + * calls of this function. Caller must be sure to pass in NULL/0 for + * first call. */ - extra_scan_keys = n_scan_keys; + runtime_keys = *runtimeKeys; + n_runtime_keys = max_runtime_keys = *numRuntimeKeys; + + /* Allocate array_keys as large as it could possibly need to be */ + array_keys = (IndexArrayKeyInfo *) + palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo)); + n_array_keys = 0; /* * for each opclause in the given qual, convert the opclause into a single @@ -742,11 +769,14 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, */ opfamily = index->rd_opfamily[varattno - 1]; - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); + if (isorderby) + flags |= SK_ORDER_BY; + /* * rightop is the constant or variable comparison value */ @@ -767,6 +797,21 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, else { /* Need to treat this one as a runtime key */ + if (n_runtime_keys >= max_runtime_keys) + { + if (max_runtime_keys == 0) + { + max_runtime_keys = 8; + runtime_keys = (IndexRuntimeKeyInfo *) + palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + else + { + max_runtime_keys *= 2; + runtime_keys = (IndexRuntimeKeyInfo *) + repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + } runtime_keys[n_runtime_keys].scan_key = this_scan_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); @@ -794,12 +839,19 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, ListCell *largs_cell = list_head(rc->largs); ListCell *rargs_cell = list_head(rc->rargs); ListCell *opnos_cell = list_head(rc->opnos); - ScanKey first_sub_key = &scan_keys[extra_scan_keys]; + ScanKey first_sub_key; + int n_sub_key; + + Assert(!isorderby); + + first_sub_key = (ScanKey) + palloc(list_length(rc->opnos) * sizeof(ScanKeyData)); + n_sub_key = 0; /* Scan RowCompare columns and generate subsidiary ScanKey items */ while (opnos_cell != NULL) { - ScanKey this_sub_key = &scan_keys[extra_scan_keys]; + ScanKey this_sub_key = &first_sub_key[n_sub_key]; int flags = SK_ROW_MEMBER; Datum scanvalue; @@ -832,7 +884,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, elog(ERROR, "bogus RowCompare index qualification"); opfamily = index->rd_opfamily[varattno - 1]; - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); @@ -866,6 +918,21 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, else { /* Need to treat this one as a runtime key */ + if (n_runtime_keys >= max_runtime_keys) + { + if (max_runtime_keys == 0) + { + max_runtime_keys = 8; + runtime_keys = (IndexRuntimeKeyInfo *) + palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + else + { + max_runtime_keys *= 2; + runtime_keys = (IndexRuntimeKeyInfo *) + repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + } runtime_keys[n_runtime_keys].scan_key = this_sub_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); @@ -885,11 +952,11 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, op_righttype, /* strategy subtype */ opfuncid, /* reg proc to use */ scanvalue); /* constant */ - extra_scan_keys++; + n_sub_key++; } /* Mark the last subsidiary scankey correctly */ - scan_keys[extra_scan_keys - 1].sk_flags |= SK_ROW_END; + first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END; /* * We don't use ScanKeyEntryInitialize for the header because it @@ -907,6 +974,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, /* indexkey op ANY (array-expression) */ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Assert(!isorderby); + Assert(saop->useOr); opno = saop->opno; opfuncid = saop->opfuncid; @@ -935,7 +1004,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, */ opfamily = index->rd_opfamily[varattno - 1]; - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); @@ -973,6 +1042,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, NullTest *ntest = (NullTest *) clause; int flags; + Assert(!isorderby); + /* * argument should be the index key Var, possibly relabeled */ @@ -1020,12 +1091,9 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, (int) nodeTag(clause)); } + Assert(n_runtime_keys <= max_runtime_keys); + /* Get rid of any unused arrays */ - if (n_runtime_keys == 0) - { - pfree(runtime_keys); - runtime_keys = NULL; - } if (n_array_keys == 0) { pfree(array_keys); diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index e8ce5bc02b..98d1615514 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -201,7 +201,7 @@ MJExamineQuals(List *mergeclauses, clause->rexpr = ExecInitExpr((Expr *) lsecond(qual->args), parent); /* Extract the operator's declared left/right datatypes */ - get_op_opfamily_properties(qual->opno, opfamily, + get_op_opfamily_properties(qual->opno, opfamily, false, &op_strategy, &op_lefttype, &op_righttype); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 0e0b4dc598..4506518768 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -363,6 +363,8 @@ _copyIndexScan(IndexScan *from) COPY_SCALAR_FIELD(indexid); COPY_NODE_FIELD(indexqual); COPY_NODE_FIELD(indexqualorig); + COPY_NODE_FIELD(indexorderby); + COPY_NODE_FIELD(indexorderbyorig); COPY_SCALAR_FIELD(indexorderdir); return newnode; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index afbfccabda..5d09e16477 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -439,6 +439,8 @@ _outIndexScan(StringInfo str, IndexScan *node) WRITE_OID_FIELD(indexid); WRITE_NODE_FIELD(indexqual); WRITE_NODE_FIELD(indexqualorig); + WRITE_NODE_FIELD(indexorderby); + WRITE_NODE_FIELD(indexorderbyorig); WRITE_ENUM_FIELD(indexorderdir, ScanDirection); } @@ -1424,6 +1426,7 @@ _outIndexPath(StringInfo str, IndexPath *node) WRITE_NODE_FIELD(indexinfo); WRITE_NODE_FIELD(indexclauses); WRITE_NODE_FIELD(indexquals); + WRITE_NODE_FIELD(indexorderbys); WRITE_BOOL_FIELD(isjoininner); WRITE_ENUM_FIELD(indexscandir, ScanDirection); WRITE_FLOAT_FIELD(indextotalcost, "%.2f"); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 0724f9a6c9..e6edbdb1e8 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -209,6 +209,7 @@ cost_seqscan(Path *path, PlannerInfo *root, * * 'index' is the index to be used * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics) + * 'indexOrderBys' is the list of ORDER BY operators for amcanorderbyop indexes * 'outer_rel' is the outer relation when we are considering using the index * scan as the inside of a nestloop join (hence, some of the indexQuals * are join clauses, and we should expect repeated scans of the index); @@ -218,18 +219,19 @@ cost_seqscan(Path *path, PlannerInfo *root, * additional fields of the IndexPath besides startup_cost and total_cost. * These fields are needed if the IndexPath is used in a BitmapIndexScan. * + * indexQuals is a list of RestrictInfo nodes, but indexOrderBys is a list of + * bare expressions. + * * NOTE: 'indexQuals' must contain only clauses usable as index restrictions. * Any additional quals evaluated as qpquals may reduce the number of returned * tuples, but they won't reduce the number of tuples we have to fetch from * the table, so they don't reduce the scan cost. - * - * NOTE: as of 8.0, indexQuals is a list of RestrictInfo nodes, where formerly - * it was a list of bare clause expressions. */ void cost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index, List *indexQuals, + List *indexOrderBys, RelOptInfo *outer_rel) { RelOptInfo *baserel = index->rel; @@ -263,10 +265,11 @@ cost_index(IndexPath *path, PlannerInfo *root, * the fraction of main-table tuples we will have to retrieve) and its * correlation to the main-table tuple order. */ - OidFunctionCall8(index->amcostestimate, + OidFunctionCall9(index->amcostestimate, PointerGetDatum(root), PointerGetDatum(index), PointerGetDatum(indexQuals), + PointerGetDatum(indexOrderBys), PointerGetDatum(outer_rel), PointerGetDatum(&indexStartupCost), PointerGetDatum(&indexTotalCost), diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index f73e0e6dc6..90ccb3928b 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -89,6 +89,9 @@ static bool match_rowcompare_to_indexcol(IndexOptInfo *index, Oid opfamily, RowCompareExpr *clause, Relids outer_relids); +static List *match_index_to_pathkeys(IndexOptInfo *index, List *pathkeys); +static Expr *match_clause_to_ordering_op(IndexOptInfo *index, + int indexcol, Expr *clause, Oid pk_opfamily); static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel); static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids); @@ -286,6 +289,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); IndexPath *ipath; List *restrictclauses; + List *orderbyclauses; List *index_pathkeys; List *useful_pathkeys; bool useful_predicate; @@ -388,9 +392,24 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, ForwardScanDirection); useful_pathkeys = truncate_useless_pathkeys(root, rel, index_pathkeys); + orderbyclauses = NIL; + } + else if (index->amcanorderbyop && possibly_useful_pathkeys && + istoplevel && outer_rel == NULL && scantype != ST_BITMAPSCAN) + { + /* see if we can generate ordering operators for query_pathkeys */ + orderbyclauses = match_index_to_pathkeys(index, + root->query_pathkeys); + if (orderbyclauses) + useful_pathkeys = root->query_pathkeys; + else + useful_pathkeys = NIL; } else + { useful_pathkeys = NIL; + orderbyclauses = NIL; + } /* * 3. Generate an indexscan path if there are relevant restriction @@ -402,6 +421,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, { ipath = create_index_path(root, index, restrictclauses, + orderbyclauses, useful_pathkeys, index_is_ordered ? ForwardScanDirection : @@ -425,6 +445,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, { ipath = create_index_path(root, index, restrictclauses, + NIL, useful_pathkeys, BackwardScanDirection, outer_rel); @@ -1384,6 +1405,179 @@ match_rowcompare_to_indexcol(IndexOptInfo *index, } +/**************************************************************************** + * ---- ROUTINES TO CHECK ORDERING OPERATORS ---- + ****************************************************************************/ + +/* + * match_index_to_pathkeys + * Test whether an index can produce output ordered according to the + * given pathkeys using "ordering operators". + * + * If it can, return a list of suitable ORDER BY expressions, each of the form + * "indexedcol operator pseudoconstant". If not, return NIL. + */ +static List * +match_index_to_pathkeys(IndexOptInfo *index, List *pathkeys) +{ + List *orderbyexprs = NIL; + ListCell *lc1; + + /* Only indexes with the amcanorderbyop property are interesting here */ + if (!index->amcanorderbyop) + return NIL; + + foreach(lc1, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc1); + bool found = false; + ListCell *lc2; + + /* + * Note: for any failure to match, we just return NIL immediately. + * There is no value in matching just some of the pathkeys. + */ + + /* Pathkey must request default sort order for the target opfamily */ + if (pathkey->pk_strategy != BTLessStrategyNumber || + pathkey->pk_nulls_first) + return NIL; + + /* If eclass is volatile, no hope of using an indexscan */ + if (pathkey->pk_eclass->ec_has_volatile) + return NIL; + + /* Try to match eclass member expression(s) to index */ + foreach(lc2, pathkey->pk_eclass->ec_members) + { + EquivalenceMember *member = (EquivalenceMember *) lfirst(lc2); + int indexcol; + + /* No possibility of match if it references other relations */ + if (!bms_equal(member->em_relids, index->rel->relids)) + continue; + + for (indexcol = 0; indexcol < index->ncolumns; indexcol++) + { + Expr *expr; + + expr = match_clause_to_ordering_op(index, + indexcol, + member->em_expr, + pathkey->pk_opfamily); + if (expr) + { + orderbyexprs = lappend(orderbyexprs, expr); + found = true; + break; + } + } + + if (found) /* don't want to look at remaining members */ + break; + } + + if (!found) /* fail if no match for this pathkey */ + return NIL; + } + + return orderbyexprs; /* success! */ +} + +/* + * match_clause_to_ordering_op + * Determines whether an ordering operator expression matches an + * index column. + * + * This is similar to, but simpler than, match_clause_to_indexcol. + * We only care about simple OpExpr cases. The input is a bare + * expression that is being ordered by, which must be of the form + * (indexkey op const) or (const op indexkey) where op is an ordering + * operator for the column's opfamily. + * + * 'index' is the index of interest. + * 'indexcol' is a column number of 'index' (counting from 0). + * 'clause' is the ordering expression to be tested. + * 'pk_opfamily' is the btree opfamily describing the required sort order. + * + * If successful, return 'clause' as-is if the indexkey is on the left, + * otherwise a commuted copy of 'clause'. If no match, return NULL. + */ +static Expr * +match_clause_to_ordering_op(IndexOptInfo *index, + int indexcol, + Expr *clause, + Oid pk_opfamily) +{ + Oid opfamily = index->opfamily[indexcol]; + Node *leftop, + *rightop; + Oid expr_op; + Oid sortfamily; + bool commuted; + + /* + * Clause must be a binary opclause. + */ + if (!is_opclause(clause)) + return NULL; + leftop = get_leftop(clause); + rightop = get_rightop(clause); + if (!leftop || !rightop) + return NULL; + expr_op = ((OpExpr *) clause)->opno; + + /* + * Check for clauses of the form: (indexkey operator constant) or + * (constant operator indexkey). + */ + if (match_index_to_operand(leftop, indexcol, index) && + !contain_var_clause(rightop) && + !contain_volatile_functions(rightop)) + { + commuted = false; + } + else if (match_index_to_operand(rightop, indexcol, index) && + !contain_var_clause(leftop) && + !contain_volatile_functions(leftop)) + { + /* Might match, but we need a commuted operator */ + expr_op = get_commutator(expr_op); + if (expr_op == InvalidOid) + return NULL; + commuted = true; + } + else + return NULL; + + /* + * Is the (commuted) operator an ordering operator for the opfamily? + * And if so, does it yield the right sorting semantics? + */ + sortfamily = get_op_opfamily_sortfamily(expr_op, opfamily); + if (sortfamily != pk_opfamily) + return NULL; + + /* We have a match. Return clause or a commuted version thereof. */ + if (commuted) + { + OpExpr *newclause = makeNode(OpExpr); + + /* flat-copy all the fields of clause */ + memcpy(newclause, clause, sizeof(OpExpr)); + + /* commute it */ + newclause->opno = expr_op; + newclause->opfuncid = InvalidOid; + newclause->args = list_make2(rightop, leftop); + + clause = (Expr *) newclause; + } + + return clause; +} + + /**************************************************************************** * ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ---- ****************************************************************************/ @@ -2630,7 +2824,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo, expr_op = linitial_oid(clause->opnos); if (!var_on_left) expr_op = get_commutator(expr_op); - get_op_opfamily_properties(expr_op, index->opfamily[indexcol], + get_op_opfamily_properties(expr_op, index->opfamily[indexcol], false, &op_strategy, &op_lefttype, &op_righttype); @@ -2698,7 +2892,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo, break; /* Add opfamily and datatypes to lists */ - get_op_opfamily_properties(expr_op, index->opfamily[i], + get_op_opfamily_properties(expr_op, index->opfamily[i], false, &op_strategy, &op_lefttype, &op_righttype); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 41ad512a29..1bbf35ed74 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -81,6 +81,8 @@ static Node *replace_nestloop_params(PlannerInfo *root, Node *expr); static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root); static List *fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, List *indexquals); +static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path, + List *indexorderbys); static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index); static List *get_switched_clauses(List *clauses, Relids outerrelids); static List *order_qual_clauses(PlannerInfo *root, List *clauses); @@ -89,6 +91,7 @@ static void copy_plan_costsize(Plan *dest, Plan *src); static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig, + List *indexorderby, List *indexorderbyorig, ScanDirection indexscandir); static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, List *indexqual, @@ -1028,11 +1031,13 @@ create_indexscan_plan(PlannerInfo *root, List *scan_clauses) { List *indexquals = best_path->indexquals; + List *indexorderbys = best_path->indexorderbys; Index baserelid = best_path->path.parent->relid; Oid indexoid = best_path->indexinfo->indexoid; List *qpqual; List *stripped_indexquals; List *fixed_indexquals; + List *fixed_indexorderbys; ListCell *l; IndexScan *scan_plan; @@ -1052,6 +1057,11 @@ create_indexscan_plan(PlannerInfo *root, */ fixed_indexquals = fix_indexqual_references(root, best_path, indexquals); + /* + * Likewise fix up index attr references in the ORDER BY expressions. + */ + fixed_indexorderbys = fix_indexorderby_references(root, best_path, indexorderbys); + /* * If this is an innerjoin scan, the indexclauses will contain join * clauses that are not present in scan_clauses (since the passed-in value @@ -1123,11 +1133,12 @@ create_indexscan_plan(PlannerInfo *root, /* * We have to replace any outer-relation variables with nestloop params - * in the indexqualorig and qpqual expressions. A bit annoying to have to - * do this separately from the processing in fix_indexqual_references --- - * rethink this when generalizing the inner indexscan support. But note - * we can't really do this earlier because it'd break the comparisons to - * predicates above ... (or would it? Those wouldn't have outer refs) + * in the indexqualorig, qpqual, and indexorderbyorig expressions. A bit + * annoying to have to do this separately from the processing in + * fix_indexqual_references --- rethink this when generalizing the inner + * indexscan support. But note we can't really do this earlier because + * it'd break the comparisons to predicates above ... (or would it? Those + * wouldn't have outer refs) */ if (best_path->isjoininner) { @@ -1135,6 +1146,8 @@ create_indexscan_plan(PlannerInfo *root, replace_nestloop_params(root, (Node *) stripped_indexquals); qpqual = (List *) replace_nestloop_params(root, (Node *) qpqual); + indexorderbys = (List *) + replace_nestloop_params(root, (Node *) indexorderbys); } /* Finally ready to build the plan node */ @@ -1144,6 +1157,8 @@ create_indexscan_plan(PlannerInfo *root, indexoid, fixed_indexquals, stripped_indexquals, + fixed_indexorderbys, + indexorderbys, best_path->indexscandir); copy_path_costsize(&scan_plan->scan.plan, &best_path->path); @@ -2394,6 +2409,63 @@ fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, return fixed_indexquals; } +/* + * fix_indexorderby_references + * Adjust indexorderby clauses to the form the executor's index + * machinery needs. + * + * This is a simplified version of fix_indexqual_references. The input does + * not have RestrictInfo nodes, and we assume that indxqual.c already + * commuted the clauses to put the index keys on the left. Also, we don't + * bother to support any cases except simple OpExprs, since nothing else + * is allowed for ordering operators. + */ +static List * +fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path, + List *indexorderbys) +{ + IndexOptInfo *index = index_path->indexinfo; + List *fixed_indexorderbys; + ListCell *l; + + fixed_indexorderbys = NIL; + + foreach(l, indexorderbys) + { + Node *clause = (Node *) lfirst(l); + + /* + * Replace any outer-relation variables with nestloop params. + * + * This also makes a copy of the clause, so it's safe to modify it + * in-place below. + */ + clause = replace_nestloop_params(root, clause); + + if (IsA(clause, OpExpr)) + { + OpExpr *op = (OpExpr *) clause; + + if (list_length(op->args) != 2) + elog(ERROR, "indexorderby clause is not binary opclause"); + + /* + * Now, determine which index attribute this is and change the + * indexkey operand as needed. + */ + linitial(op->args) = fix_indexqual_operand(linitial(op->args), + index); + } + else + elog(ERROR, "unsupported indexorderby type: %d", + (int) nodeTag(clause)); + + fixed_indexorderbys = lappend(fixed_indexorderbys, clause); + } + + return fixed_indexorderbys; +} + /* * fix_indexqual_operand * Convert an indexqual expression to a Var referencing the index column. @@ -2685,6 +2757,8 @@ make_indexscan(List *qptlist, Oid indexid, List *indexqual, List *indexqualorig, + List *indexorderby, + List *indexorderbyorig, ScanDirection indexscandir) { IndexScan *node = makeNode(IndexScan); @@ -2699,6 +2773,8 @@ make_indexscan(List *qptlist, node->indexid = indexid; node->indexqual = indexqual; node->indexqualorig = indexqualorig; + node->indexorderby = indexorderby; + node->indexorderbyorig = indexorderbyorig; node->indexorderdir = indexscandir; return node; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a1e5900592..6d0b3dbce9 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3135,7 +3135,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) /* Estimate the cost of index scan */ indexScanPath = create_index_path(root, indexInfo, - NIL, NIL, + NIL, NIL, NIL, ForwardScanDirection, NULL); return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost); diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 9aef7fc35a..0074679207 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -301,6 +301,10 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset) fix_scan_list(glob, splan->indexqual, rtoffset); splan->indexqualorig = fix_scan_list(glob, splan->indexqualorig, rtoffset); + splan->indexorderby = + fix_scan_list(glob, splan->indexorderby, rtoffset); + splan->indexorderbyorig = + fix_scan_list(glob, splan->indexorderbyorig, rtoffset); } break; case T_BitmapIndexScan: diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 754753cc12..39ef420284 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1942,10 +1942,13 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, case T_IndexScan: finalize_primnode((Node *) ((IndexScan *) plan)->indexqual, &context); + finalize_primnode((Node *) ((IndexScan *) plan)->indexorderby, + &context); /* * we need not look at indexqualorig, since it will have the same - * param references as indexqual. + * param references as indexqual. Likewise, we can ignore + * indexorderbyorig. */ context.paramids = bms_add_members(context.paramids, scan_params); break; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 231d221b21..2439d814ce 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -414,6 +414,8 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel) * 'index' is a usable index. * 'clause_groups' is a list of lists of RestrictInfo nodes * to be used as index qual conditions in the scan. + * 'indexorderbys' is a list of bare expressions (no RestrictInfos) + * to be used as index ordering operators in the scan. * 'pathkeys' describes the ordering of the path. * 'indexscandir' is ForwardScanDirection or BackwardScanDirection * for an ordered index, or NoMovementScanDirection for @@ -427,6 +429,7 @@ IndexPath * create_index_path(PlannerInfo *root, IndexOptInfo *index, List *clause_groups, + List *indexorderbys, List *pathkeys, ScanDirection indexscandir, RelOptInfo *outer_rel) @@ -463,6 +466,7 @@ create_index_path(PlannerInfo *root, pathnode->indexinfo = index; pathnode->indexclauses = allclauses; pathnode->indexquals = indexquals; + pathnode->indexorderbys = indexorderbys; pathnode->isjoininner = (outer_rel != NULL); pathnode->indexscandir = indexscandir; @@ -504,7 +508,7 @@ create_index_path(PlannerInfo *root, pathnode->rows = rel->rows; } - cost_index(pathnode, root, index, indexquals, outer_rel); + cost_index(pathnode, root, index, indexquals, indexorderbys, outer_rel); return pathnode; } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 95397aa7ce..ef87f724ae 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2631,7 +2631,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause, examine_variable(root, right, 0, &rightvar); /* Extract the operator's declared left/right datatypes */ - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, false, &op_strategy, &op_lefttype, &op_righttype); @@ -4646,7 +4646,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, if (min) { index_scan = index_beginscan(heapRel, indexRel, SnapshotNow, - 1, scankeys); + 1, 0); + index_rescan(index_scan, scankeys, 1, NULL, 0); /* Fetch first tuple in sortop's direction */ if ((tup = index_getnext(index_scan, @@ -4677,7 +4678,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, if (max && have_data) { index_scan = index_beginscan(heapRel, indexRel, SnapshotNow, - 1, scankeys); + 1, 0); + index_rescan(index_scan, scankeys, 1, NULL, 0); /* Fetch first tuple in reverse direction */ if ((tup = index_getnext(index_scan, @@ -5644,7 +5646,9 @@ string_to_bytea_const(const char *str, size_t str_len) static void genericcostestimate(PlannerInfo *root, - IndexOptInfo *index, List *indexQuals, + IndexOptInfo *index, + List *indexQuals, + List *indexOrderBys, RelOptInfo *outer_rel, double numIndexTuples, Cost *indexStartupCost, @@ -5856,7 +5860,8 @@ genericcostestimate(PlannerInfo *root, * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per * indexqual operator. Because we have numIndexTuples as a per-scan * number, we have to multiply by num_sa_scans to get the correct result - * for ScalarArrayOpExpr cases. + * for ScalarArrayOpExpr cases. Similarly add in costs for any index + * ORDER BY expressions. * * Note: this neglects the possible costs of rechecking lossy operators * and OR-clause expressions. Detecting that that might be needed seems @@ -5864,11 +5869,15 @@ genericcostestimate(PlannerInfo *root, * inaccuracies here ... */ cost_qual_eval(&index_qual_cost, indexQuals, root); - qual_op_cost = cpu_operator_cost * list_length(indexQuals); - qual_arg_cost = index_qual_cost.startup + - index_qual_cost.per_tuple - qual_op_cost; + qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple; + cost_qual_eval(&index_qual_cost, indexOrderBys, root); + qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple; + qual_op_cost = cpu_operator_cost * + (list_length(indexQuals) + list_length(indexOrderBys)); + qual_arg_cost -= qual_op_cost; if (qual_arg_cost < 0) /* just in case... */ qual_arg_cost = 0; + *indexStartupCost = qual_arg_cost; *indexTotalCost += qual_arg_cost; *indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost); @@ -5901,11 +5910,12 @@ btcostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); - double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + List *indexOrderBys = (List *) PG_GETARG_POINTER(3); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7); + double *indexCorrelation = (double *) PG_GETARG_POINTER(8); Oid relid; AttrNumber colnum; VariableStatData vardata; @@ -6082,7 +6092,8 @@ btcostestimate(PG_FUNCTION_ARGS) numIndexTuples = rint(numIndexTuples / num_sa_scans); } - genericcostestimate(root, index, indexQuals, outer_rel, numIndexTuples, + genericcostestimate(root, index, indexQuals, indexOrderBys, + outer_rel, numIndexTuples, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); @@ -6206,13 +6217,14 @@ hashcostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); - double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + List *indexOrderBys = (List *) PG_GETARG_POINTER(3); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7); + double *indexCorrelation = (double *) PG_GETARG_POINTER(8); - genericcostestimate(root, index, indexQuals, outer_rel, 0.0, + genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); @@ -6225,13 +6237,14 @@ gistcostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); - double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + List *indexOrderBys = (List *) PG_GETARG_POINTER(3); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7); + double *indexCorrelation = (double *) PG_GETARG_POINTER(8); - genericcostestimate(root, index, indexQuals, outer_rel, 0.0, + genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); @@ -6262,11 +6275,12 @@ gincostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); - double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + List *indexOrderBys = (List *) PG_GETARG_POINTER(3); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7); + double *indexCorrelation = (double *) PG_GETARG_POINTER(8); ListCell *l; int32 nfullscan = 0; List *selectivityQuals; @@ -6432,7 +6446,7 @@ gincostestimate(PG_FUNCTION_ARGS) * Get the operator's strategy number and declared input data types * within the index opfamily. */ - get_op_opfamily_properties(clause_op, index->opfamily[indexcol], + get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false, &strategy_op, &lefttype, &righttype); /* @@ -6581,15 +6595,18 @@ gincostestimate(PG_FUNCTION_ARGS) * Add on index qual eval costs, much as in genericcostestimate */ cost_qual_eval(&index_qual_cost, indexQuals, root); - qual_op_cost = cpu_operator_cost * list_length(indexQuals); - qual_arg_cost = index_qual_cost.startup + - index_qual_cost.per_tuple - qual_op_cost; + qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple; + cost_qual_eval(&index_qual_cost, indexOrderBys, root); + qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple; + qual_op_cost = cpu_operator_cost * + (list_length(indexQuals) + list_length(indexOrderBys)); + qual_arg_cost -= qual_op_cost; if (qual_arg_cost < 0) /* just in case... */ qual_arg_cost = 0; *indexStartupCost += qual_arg_cost; *indexTotalCost += qual_arg_cost; - *indexTotalCost += ( numTuples * *indexSelectivity ) * (cpu_index_tuple_cost + qual_op_cost); + *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); PG_RETURN_VOID(); } diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 9beae0d9ef..cbdfe05031 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -85,19 +85,42 @@ get_op_opfamily_strategy(Oid opno, Oid opfamily) return result; } +/* + * get_op_opfamily_sortfamily + * + * If the operator is an ordering operator within the specified opfamily, + * return its amopsortfamily OID; else return InvalidOid. + */ +Oid +get_op_opfamily_sortfamily(Oid opno, Oid opfamily) +{ + HeapTuple tp; + Form_pg_amop amop_tup; + Oid result; + + tp = SearchSysCache3(AMOPOPID, + ObjectIdGetDatum(opno), + CharGetDatum(AMOP_ORDER), + ObjectIdGetDatum(opfamily)); + if (!HeapTupleIsValid(tp)) + return InvalidOid; + amop_tup = (Form_pg_amop) GETSTRUCT(tp); + result = amop_tup->amopsortfamily; + ReleaseSysCache(tp); + return result; +} + /* * get_op_opfamily_properties * * Get the operator's strategy number and declared input data types * within the specified opfamily. * - * This function only considers search operators, not ordering operators. - * * Caller should already have verified that opno is a member of opfamily, * therefore we raise an error if the tuple is not found. */ void -get_op_opfamily_properties(Oid opno, Oid opfamily, +get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype) @@ -107,7 +130,7 @@ get_op_opfamily_properties(Oid opno, Oid opfamily, tp = SearchSysCache3(AMOPOPID, ObjectIdGetDatum(opno), - CharGetDatum(AMOP_SEARCH), + CharGetDatum(ordering_op ? AMOP_ORDER : AMOP_SEARCH), ObjectIdGetDatum(opfamily)); if (!HeapTupleIsValid(tp)) elog(ERROR, "operator %u is not a member of opfamily %u", diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 48380ef32f..896fb75fdd 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -135,11 +135,13 @@ extern bool index_insert(Relation indexRelation, extern IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, - int nkeys, ScanKey key); + int nkeys, int norderbys); extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation, Snapshot snapshot, - int nkeys, ScanKey key); -extern void index_rescan(IndexScanDesc scan, ScanKey key); + int nkeys); +extern void index_rescan(IndexScanDesc scan, + ScanKey keys, int nkeys, + ScanKey orderbys, int norderbys); extern void index_endscan(IndexScanDesc scan); extern void index_markpos(IndexScanDesc scan); extern void index_restrpos(IndexScanDesc scan); @@ -161,7 +163,7 @@ extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum, * index access method support routines (in genam.c) */ extern IndexScanDesc RelationGetIndexScan(Relation indexRelation, - int nkeys, ScanKey key); + int nkeys, int norderbys); extern void IndexScanEnd(IndexScanDesc scan); extern char *BuildIndexValueDescription(Relation indexRelation, Datum *values, bool *isnull); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index b4ec01ed12..f412fc3844 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -62,8 +62,10 @@ typedef struct IndexScanDescData Relation heapRelation; /* heap relation descriptor, or NULL */ Relation indexRelation; /* index relation descriptor */ Snapshot xs_snapshot; /* snapshot to see */ - int numberOfKeys; /* number of scan keys */ - ScanKey keyData; /* array of scan key descriptors */ + int numberOfKeys; /* number of index qualifier conditions */ + int numberOfOrderBys; /* number of ordering operators */ + ScanKey keyData; /* array of index qualifier descriptors */ + ScanKey orderByData; /* array of ordering op descriptors */ /* signaling to index AM about killing index tuples */ bool kill_prior_tuple; /* last-returned tuple is dead */ diff --git a/src/include/access/skey.h b/src/include/access/skey.h index fcf81ba6ab..c30a44bde3 100644 --- a/src/include/access/skey.h +++ b/src/include/access/skey.h @@ -60,6 +60,11 @@ typedef uint16 StrategyNumber; * supported only for index scans, not heap scans; and not all index AMs * support them. * + * A ScanKey can also represent an ordering operator invocation, that is + * an ordering requirement "ORDER BY indexedcol op constant". This looks + * the same as a comparison operator, except that the operator doesn't + * (usually) yield boolean. We mark such ScanKeys with SK_ORDER_BY. + * * Note: in some places, ScanKeys are used as a convenient representation * for the invocation of an access method support procedure. In this case * sk_strategy/sk_subtype are not meaningful, and sk_func may refer to a @@ -122,6 +127,7 @@ typedef ScanKeyData *ScanKey; #define SK_SEARCHNULL 0x0020 /* scankey represents "col IS NULL" */ #define SK_SEARCHNOTNULL 0x0040 /* scankey represents "col IS NOT * NULL" */ +#define SK_ORDER_BY 0x0080 /* scankey is for ORDER BY op */ /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 8698a43371..f28162b439 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201011241 +#define CATALOG_VERSION_NO 201012021 #endif diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 5a18dee0bd..a729690aff 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -52,11 +52,11 @@ CATALOG(pg_am,2601) bool amclusterable; /* does AM support cluster command? */ Oid amkeytype; /* type of data in index, or InvalidOid */ regproc aminsert; /* "insert this tuple" function */ - regproc ambeginscan; /* "start new scan" function */ + regproc ambeginscan; /* "prepare for index scan" function */ regproc amgettuple; /* "next valid tuple" function, or 0 */ regproc amgetbitmap; /* "fetch all valid tuples" function, or 0 */ - regproc amrescan; /* "restart this scan" function */ - regproc amendscan; /* "end this scan" function */ + regproc amrescan; /* "(re)start index scan" function */ + regproc amendscan; /* "end index scan" function */ regproc ammarkpos; /* "mark current scan position" function */ regproc amrestrpos; /* "restore marked scan position" function */ regproc ambuild; /* "build new index" function */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 25a391282a..611adef83c 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -679,7 +679,7 @@ DATA(insert OID = 331 ( btinsert PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16 "2 DESCR("btree(internal)"); DATA(insert OID = 333 ( btbeginscan PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ btbeginscan _null_ _null_ _null_ )); DESCR("btree(internal)"); -DATA(insert OID = 334 ( btrescan PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ btrescan _null_ _null_ _null_ )); +DATA(insert OID = 334 ( btrescan PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ btrescan _null_ _null_ _null_ )); DESCR("btree(internal)"); DATA(insert OID = 335 ( btendscan PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ btendscan _null_ _null_ _null_ )); DESCR("btree(internal)"); @@ -693,7 +693,7 @@ DATA(insert OID = 332 ( btbulkdelete PGNSP PGUID 12 1 0 0 f f f t f v 4 0 22 DESCR("btree(internal)"); DATA(insert OID = 972 ( btvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ btvacuumcleanup _null_ _null_ _null_ )); DESCR("btree(internal)"); -DATA(insert OID = 1268 ( btcostestimate PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ btcostestimate _null_ _null_ _null_ )); +DATA(insert OID = 1268 ( btcostestimate PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ btcostestimate _null_ _null_ _null_ )); DESCR("btree(internal)"); DATA(insert OID = 2785 ( btoptions PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_ btoptions _null_ _null_ _null_ )); DESCR("btree(internal)"); @@ -798,7 +798,7 @@ DATA(insert OID = 441 ( hashinsert PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16 DESCR("hash(internal)"); DATA(insert OID = 443 ( hashbeginscan PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ hashbeginscan _null_ _null_ _null_ )); DESCR("hash(internal)"); -DATA(insert OID = 444 ( hashrescan PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ hashrescan _null_ _null_ _null_ )); +DATA(insert OID = 444 ( hashrescan PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ hashrescan _null_ _null_ _null_ )); DESCR("hash(internal)"); DATA(insert OID = 445 ( hashendscan PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ hashendscan _null_ _null_ _null_ )); DESCR("hash(internal)"); @@ -812,7 +812,7 @@ DATA(insert OID = 442 ( hashbulkdelete PGNSP PGUID 12 1 0 0 f f f t f v 4 0 DESCR("hash(internal)"); DATA(insert OID = 425 ( hashvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ hashvacuumcleanup _null_ _null_ _null_ )); DESCR("hash(internal)"); -DATA(insert OID = 438 ( hashcostestimate PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ hashcostestimate _null_ _null_ _null_ )); +DATA(insert OID = 438 ( hashcostestimate PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ hashcostestimate _null_ _null_ _null_ )); DESCR("hash(internal)"); DATA(insert OID = 2786 ( hashoptions PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_ hashoptions _null_ _null_ _null_ )); DESCR("hash(internal)"); @@ -1094,7 +1094,7 @@ DATA(insert OID = 775 ( gistinsert PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16 DESCR("gist(internal)"); DATA(insert OID = 777 ( gistbeginscan PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ gistbeginscan _null_ _null_ _null_ )); DESCR("gist(internal)"); -DATA(insert OID = 778 ( gistrescan PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ gistrescan _null_ _null_ _null_ )); +DATA(insert OID = 778 ( gistrescan PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gistrescan _null_ _null_ _null_ )); DESCR("gist(internal)"); DATA(insert OID = 779 ( gistendscan PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ gistendscan _null_ _null_ _null_ )); DESCR("gist(internal)"); @@ -1108,7 +1108,7 @@ DATA(insert OID = 776 ( gistbulkdelete PGNSP PGUID 12 1 0 0 f f f t f v 4 0 DESCR("gist(internal)"); DATA(insert OID = 2561 ( gistvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ gistvacuumcleanup _null_ _null_ _null_ )); DESCR("gist(internal)"); -DATA(insert OID = 772 ( gistcostestimate PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gistcostestimate _null_ _null_ _null_ )); +DATA(insert OID = 772 ( gistcostestimate PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gistcostestimate _null_ _null_ _null_ )); DESCR("gist(internal)"); DATA(insert OID = 2787 ( gistoptions PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_ gistoptions _null_ _null_ _null_ )); DESCR("gist(internal)"); @@ -4335,7 +4335,7 @@ DATA(insert OID = 2732 ( gininsert PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16 DESCR("gin(internal)"); DATA(insert OID = 2733 ( ginbeginscan PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ ginbeginscan _null_ _null_ _null_ )); DESCR("gin(internal)"); -DATA(insert OID = 2734 ( ginrescan PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ ginrescan _null_ _null_ _null_ )); +DATA(insert OID = 2734 ( ginrescan PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ ginrescan _null_ _null_ _null_ )); DESCR("gin(internal)"); DATA(insert OID = 2735 ( ginendscan PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ ginendscan _null_ _null_ _null_ )); DESCR("gin(internal)"); @@ -4349,7 +4349,7 @@ DATA(insert OID = 2739 ( ginbulkdelete PGNSP PGUID 12 1 0 0 f f f t f v 4 0 DESCR("gin(internal)"); DATA(insert OID = 2740 ( ginvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ ginvacuumcleanup _null_ _null_ _null_ )); DESCR("gin(internal)"); -DATA(insert OID = 2741 ( gincostestimate PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gincostestimate _null_ _null_ _null_ )); +DATA(insert OID = 2741 ( gincostestimate PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gincostestimate _null_ _null_ _null_ )); DESCR("gin(internal)"); DATA(insert OID = 2788 ( ginoptions PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_ ginoptions _null_ _null_ _null_ )); DESCR("gin(internal)"); diff --git a/src/include/executor/nodeIndexscan.h b/src/include/executor/nodeIndexscan.h index 48d35e4a48..d1e0f380c0 100644 --- a/src/include/executor/nodeIndexscan.h +++ b/src/include/executor/nodeIndexscan.h @@ -25,8 +25,8 @@ extern void ExecReScanIndexScan(IndexScanState *node); /* routines exported to share code with nodeBitmapIndexscan.c */ extern void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, - Index scanrelid, - List *quals, ScanKey *scanKeys, int *numScanKeys, + Index scanrelid, List *quals, bool isorderby, + ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys); extern void ExecIndexEvalRuntimeKeys(ExprContext *econtext, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 89f8e202e3..d669c24b98 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1182,10 +1182,12 @@ typedef struct * IndexScanState information * * indexqualorig execution state for indexqualorig expressions - * ScanKeys Skey structures to scan index rel - * NumScanKeys number of Skey structs + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * OrderByKeys Skey structures for index ordering operators + * NumOrderByKeys number of OrderByKeys * RuntimeKeys info about Skeys that must be evaluated at runtime - * NumRuntimeKeys number of RuntimeKeys structs + * NumRuntimeKeys number of RuntimeKeys * RuntimeKeysReady true if runtime Skeys have been computed * RuntimeContext expr context for evaling runtime Skeys * RelationDesc index relation descriptor @@ -1198,6 +1200,8 @@ typedef struct IndexScanState List *indexqualorig; ScanKey iss_ScanKeys; int iss_NumScanKeys; + ScanKey iss_OrderByKeys; + int iss_NumOrderByKeys; IndexRuntimeKeyInfo *iss_RuntimeKeys; int iss_NumRuntimeKeys; bool iss_RuntimeKeysReady; @@ -1210,12 +1214,12 @@ typedef struct IndexScanState * BitmapIndexScanState information * * result bitmap to return output into, or NULL - * ScanKeys Skey structures to scan index rel - * NumScanKeys number of Skey structs + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys * RuntimeKeys info about Skeys that must be evaluated at runtime - * NumRuntimeKeys number of RuntimeKeys structs + * NumRuntimeKeys number of RuntimeKeys * ArrayKeys info about Skeys that come from ScalarArrayOpExprs - * NumArrayKeys number of ArrayKeys structs + * NumArrayKeys number of ArrayKeys * RuntimeKeysReady true if runtime Skeys have been computed * RuntimeContext expr context for evaling runtime Skeys * RelationDesc index relation descriptor diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index fec4acea34..b89eb55ad7 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -271,7 +271,10 @@ typedef Scan SeqScan; * be of the form (indexkey OP comparisonval) or (comparisonval OP indexkey). * The indexkey is a Var or expression referencing column(s) of the index's * base table. The comparisonval might be any expression, but it won't use - * any columns of the base table. + * any columns of the base table. The expressions are ordered by index + * column position (but items referencing the same index column can appear + * in any order). indexqualorig is used at runtime only if we have to recheck + * a lossy indexqual. * * indexqual has the same form, but the expressions have been commuted if * necessary to put the indexkeys on the left, and the indexkeys are replaced @@ -280,14 +283,26 @@ typedef Scan SeqScan; * table). This is a bit hokey ... would be cleaner to use a special-purpose * node type that could not be mistaken for a regular Var. But it will do * for now. + * + * indexorderbyorig is similarly the original form of any ORDER BY expressions + * that are being implemented by the index, while indexorderby is modified to + * have index column Vars on the left-hand side. Here, multiple expressions + * must appear in exactly the ORDER BY order, and this is not necessarily the + * index column order. Only the expressions are provided, not the auxiliary + * sort-order information from the ORDER BY SortGroupClauses; it's assumed + * that the sort ordering is fully determinable from the top-level operators. + * indexorderbyorig is unused at run time, but is needed for EXPLAIN. + * (Note these fields are used for amcanorderbyop cases, not amcanorder cases.) * ---------------- */ typedef struct IndexScan { Scan scan; Oid indexid; /* OID of index to scan */ - List *indexqual; /* list of index quals (OpExprs) */ + List *indexqual; /* list of index quals (usually OpExprs) */ List *indexqualorig; /* the same in original form */ + List *indexorderby; /* list of index ORDER BY exprs */ + List *indexorderbyorig; /* the same in original form */ ScanDirection indexorderdir; /* forward or backward or don't care */ } IndexScan; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index d084338f35..e7ebcfcc81 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -631,6 +631,13 @@ typedef struct Path * indexable operators appear in 'indexclauses', they are replaced by the * derived indexscannable conditions in 'indexquals'. * + * 'indexorderbys', if not NIL, is a list of ORDER BY expressions that have + * been found to be usable as ordering operators for an amcanorderbyop index. + * Note that these are not RestrictInfos, just bare expressions, since they + * generally won't yield booleans. The list will match the path's pathkeys. + * Also, unlike the case for quals, it's guaranteed that each expression has + * the index key on the left side of the operator. + * * 'isjoininner' is TRUE if the path is a nestloop inner scan (that is, * some of the index conditions are join rather than restriction clauses). * Note that the path costs will be calculated differently from a plain @@ -663,6 +670,7 @@ typedef struct IndexPath IndexOptInfo *indexinfo; List *indexclauses; List *indexquals; + List *indexorderbys; bool isjoininner; ScanDirection indexscandir; Cost indextotalcost; diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 8df1b95abe..48de2a989f 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -67,7 +67,7 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root); extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel); extern void cost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index, - List *indexQuals, RelOptInfo *outer_rel); + List *indexQuals, List *indexOrderBys, RelOptInfo *outer_rel); extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, RelOptInfo *outer_rel); extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 53ebe5756b..2dde5e07ef 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -31,6 +31,7 @@ extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel); extern IndexPath *create_index_path(PlannerInfo *root, IndexOptInfo *index, List *clause_groups, + List *indexorderbys, List *pathkeys, ScanDirection indexscandir, RelOptInfo *outer_rel); diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 62d15cca36..5f41adfcc2 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -22,7 +22,7 @@ /* * Maximum number of arguments to a function. * - * The minimum value is 8 (index cost estimation uses 8-argument functions). + * The minimum value is 9 (index cost estimation uses 9-argument functions). * The maximum possible value is around 600 (limited by index tuple size in * pg_proc's index; BLCKSZ larger than 8K would allow more). Values larger * than needed will waste memory and processing time, but do not directly diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index b6104d7dec..7bf3f360f5 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -32,7 +32,8 @@ extern PGDLLIMPORT get_attavgwidth_hook_type get_attavgwidth_hook; extern bool op_in_opfamily(Oid opno, Oid opfamily); extern int get_op_opfamily_strategy(Oid opno, Oid opfamily); -extern void get_op_opfamily_properties(Oid opno, Oid opfamily, +extern Oid get_op_opfamily_sortfamily(Oid opno, Oid opfamily); +extern void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype);