diff --git a/doc/src/sgml/gist.sgml b/doc/src/sgml/gist.sgml index 1291f8dd0c..641d1d04ba 100644 --- a/doc/src/sgml/gist.sgml +++ b/doc/src/sgml/gist.sgml @@ -818,10 +818,15 @@ my_distance(PG_FUNCTION_ARGS) - The result value can be any finite float8 value. (Infinity and - minus infinity are used internally to handle cases such as nulls, so it - is not recommended that distance functions return these - values.) + If the distance function returns *recheck=true for a leaf node, the + original ordering operator's return type must be float8 or float4, and + the distance function's return value must be comparable with the actual + distance operator. Otherwise, the distance function's return type can + be any finit float8 value, as long as the relative order of + the returned values matches the order returned by the ordering operator. + (Infinity and minus infinity are used internally to handle cases such as + nulls, so it is not recommended that distance functions + return these values.) diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 1f791a4f8e..09fba06f73 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -16,6 +16,7 @@ #include "access/gist_private.h" #include "access/relscan.h" +#include "catalog/pg_type.h" #include "miscadmin.h" #include "pgstat.h" #include "lib/pairingheap.h" @@ -31,9 +32,11 @@ * depending on whether the containing page is a leaf page or not. * * On success return for a heap tuple, *recheck_p is set to indicate whether - * recheck is needed. We recheck if any of the consistent() or distance() + * the quals need to be rechecked. We recheck if any of the consistent() * functions request it. recheck is not interesting when examining a non-leaf * entry, since we must visit the lower index page if there's any doubt. + * Similarly, *recheck_distances_p is set to indicate whether the distances + * need to be rechecked, and it is also ignored for non-leaf entries. * * If we are doing an ordered scan, so->distances[] is filled with distance * data from the distance() functions before returning success. @@ -50,7 +53,8 @@ gistindex_keytest(IndexScanDesc scan, IndexTuple tuple, Page page, OffsetNumber offset, - bool *recheck_p) + bool *recheck_p, + bool *recheck_distances_p) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; GISTSTATE *giststate = so->giststate; @@ -60,6 +64,7 @@ gistindex_keytest(IndexScanDesc scan, Relation r = scan->indexRelation; *recheck_p = false; + *recheck_distances_p = false; /* * If it's a leftover invalid tuple from pre-9.1, treat it as a match with @@ -194,11 +199,14 @@ gistindex_keytest(IndexScanDesc scan, * use.) * * Distance functions get a recheck argument as well. In this - * case the returned distance is the lower bound of distance - * and needs to be rechecked. We return single recheck flag - * which means that both quals and distances are to be - * rechecked. + * case the returned distance is the lower bound of distance and + * needs to be rechecked. We return single recheck flag which + * means that both quals and distances are to be rechecked. We + * initialize the flag to 'false'. The flag was added in version + * 9.5 and the distance operators written before that won't know + * about the flag, and are never lossy. */ + recheck = false; dist = FunctionCall5Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), @@ -206,9 +214,7 @@ gistindex_keytest(IndexScanDesc scan, Int32GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype), PointerGetDatum(&recheck)); - - *recheck_p |= recheck; - + *recheck_distances_p |= recheck; *distance_p = DatumGetFloat8(dist); } @@ -310,6 +316,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, IndexTuple it = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); bool match; bool recheck; + bool recheck_distances; /* * Must call gistindex_keytest in tempCxt, and clean up any leftover @@ -317,7 +324,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, */ oldcxt = MemoryContextSwitchTo(so->giststate->tempCxt); - match = gistindex_keytest(scan, it, page, i, &recheck); + match = gistindex_keytest(scan, it, page, i, + &recheck, &recheck_distances); MemoryContextSwitchTo(oldcxt); MemoryContextReset(so->giststate->tempCxt); @@ -375,6 +383,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, item->blkno = InvalidBlockNumber; item->data.heap.heapPtr = it->t_tid; item->data.heap.recheck = recheck; + item->data.heap.recheckDistances = recheck_distances; /* * In an index-only scan, also fetch the data from the tuple. @@ -461,10 +470,33 @@ getNextNearest(IndexScanDesc scan) /* found a heap item at currently minimal distance */ scan->xs_ctup.t_self = item->data.heap.heapPtr; scan->xs_recheck = item->data.heap.recheck; + scan->xs_recheckorderby = item->data.heap.recheckDistances; for (i = 0; i < scan->numberOfOrderBys; i++) { - scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]); - scan->xs_orderbynulls[i] = false; + if (so->orderByTypes[i] == FLOAT8OID) + { + scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]); + scan->xs_orderbynulls[i] = false; + } + else if (so->orderByTypes[i] == FLOAT4OID) + { + scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i]); + scan->xs_orderbynulls[i] = false; + } + else + { + /* + * If the ordering operator's return value is anything + * else, we don't know how to convert the float8 bound + * calculated by the distance function to that. The + * executor won't actually need the order by values we + * return here, if there are no lossy results, so only + * insist on the datatype if the *recheck is set. + */ + if (scan->xs_recheckorderby) + elog(ERROR, "GiST operator family's FOR ORDER BY operator must return float8 or float4 if the distance function is lossy"); + scan->xs_orderbynulls[i] = true; + } } /* in an index-only scan, also return the reconstructed tuple. */ diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 099849a606..d0fea2b86c 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -17,6 +17,7 @@ #include "access/gist_private.h" #include "access/gistscan.h" #include "access/relscan.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -263,6 +264,8 @@ gistrescan(PG_FUNCTION_ARGS) memmove(scan->orderByData, orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData)); + so->orderByTypes = (Oid *) palloc(scan->numberOfOrderBys * sizeof(Oid)); + /* * Modify the order-by key so that the Distance method is called for * all comparisons. The original operator is passed to the Distance @@ -281,6 +284,19 @@ gistrescan(PG_FUNCTION_ARGS) GIST_DISTANCE_PROC, skey->sk_attno, RelationGetRelationName(scan->indexRelation)); + /* + * Look up the datatype returned by the original ordering operator. + * GiST always uses a float8 for the distance function, but the + * ordering operator could be anything else. + * + * XXX: The distance function is only allowed to be lossy if the + * ordering operator's result type is float4 or float8. Otherwise + * we don't know how to return the distance to the executor. But + * we cannot check that here, as we won't know if the distance + * function is lossy until it returns *recheck = true for the + * first time. + */ + so->orderByTypes[i] = get_func_rettype(skey->sk_func.fn_oid); fmgr_info_copy(&(skey->sk_func), finfo, so->giststate->scanCxt); /* Restore prior fn_extra pointers, if not first time */ diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 2164da0c84..434a1d95f0 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -236,7 +236,12 @@ next_indextuple: InstrCountFiltered2(node, 1); goto next_indextuple; } + } + if (scandesc->xs_recheckorderby) + { + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); EvalOrderByExpressions(node, econtext); /* diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 9d3714d27d..0e819d7b51 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -121,6 +121,7 @@ typedef struct GISTSearchHeapItem { ItemPointerData heapPtr; bool recheck; /* T if quals must be rechecked */ + bool recheckDistances; /* T if distances must be rechecked */ IndexTuple ftup; /* data fetched back from the index, used in * index-only scans */ } GISTSearchHeapItem; @@ -150,6 +151,8 @@ typedef struct GISTSearchItem typedef struct GISTScanOpaqueData { GISTSTATE *giststate; /* index information, see above */ + Oid *orderByTypes; /* datatypes of ORDER BY expressions */ + pairingheap *queue; /* queue of unvisited items */ MemoryContext queueCxt; /* context holding the queue */ bool qual_ok; /* false if qual can never be satisfied */ diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 865d36403a..5a0d724aca 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -99,6 +99,7 @@ typedef struct IndexScanDescData */ Datum *xs_orderbyvals; bool *xs_orderbynulls; + bool xs_recheckorderby; /* T means ORDER BY exprs must be rechecked */ /* state data for traversing HOT chains in index_getnext */ bool xs_continue_hot; /* T if must keep walking HOT chain */