Still more fixes for lossy-GiST-distance-functions patch.
Fix confusion in documentation, substantial memory leakage if float8 or
float4 are pass-by-reference, and assorted comments that were obsoleted
by commit 98edd617f3
.
This commit is contained in:
parent
284bef2977
commit
821b821a24
|
@ -208,12 +208,6 @@
|
|||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>
|
||||
Currently, ordering by the distance operator <literal><-></>
|
||||
is supported only with <literal>point</> by the operator classes
|
||||
of the geometric types.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For historical reasons, the <literal>inet_ops</> operator class is
|
||||
not the default class for types <type>inet</> and <type>cidr</>.
|
||||
|
@ -805,28 +799,30 @@ my_distance(PG_FUNCTION_ARGS)
|
|||
</para>
|
||||
|
||||
<para>
|
||||
Some approximation is allowed when determining the distance, as long as
|
||||
the result is never greater than the entry's actual distance. Thus, for
|
||||
example, distance to a bounding box is usually sufficient in geometric
|
||||
applications. For an internal tree node, the distance returned must not
|
||||
be greater than the distance to any of the child nodes. If the returned
|
||||
distance is not accurate, the function must set *recheck to false. (This
|
||||
is not necessary for internal tree nodes; for them, the calculation is
|
||||
always assumed to be inaccurate). The executor will calculate the
|
||||
accurate distance after fetching the tuple from the heap, and reorder
|
||||
the tuples if necessary.
|
||||
Some approximation is allowed when determining the distance, so long
|
||||
as the result is never greater than the entry's actual distance. Thus,
|
||||
for example, distance to a bounding box is usually sufficient in
|
||||
geometric applications. For an internal tree node, the distance
|
||||
returned must not be greater than the distance to any of the child
|
||||
nodes. If the returned distance is not exact, the function must set
|
||||
<literal>*recheck</> to true. (This is not necessary for internal tree
|
||||
nodes; for them, the calculation is always assumed to be inexact.) In
|
||||
this case the executor will calculate the accurate distance after
|
||||
fetching the tuple from the heap, and reorder the tuples if necessary.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If the distance function returns *recheck=true for a leaf node, the
|
||||
original ordering operator's return type must be float8 or float4, and
|
||||
the distance function's return value must be comparable with the actual
|
||||
distance operator. Otherwise, the distance function's return type can
|
||||
be any finit <type>float8</> value, as long as the relative order of
|
||||
the returned values matches the order returned by the ordering operator.
|
||||
(Infinity and minus infinity are used internally to handle cases such as
|
||||
nulls, so it is not recommended that <function>distance</> functions
|
||||
return these values.)
|
||||
If the distance function returns <literal>*recheck = true</> for any
|
||||
leaf node, the original ordering operator's return type must
|
||||
be <type>float8</> or <type>float4</>, and the distance function's
|
||||
result values must be comparable to those of the original ordering
|
||||
operator, since the executor will sort using both distance function
|
||||
results and recalculated ordering-operator results. Otherwise, the
|
||||
distance function's result values can be any finite <type>float8</>
|
||||
values, so long as the relative order of the result values matches the
|
||||
order returned by the ordering operator. (Infinity and minus infinity
|
||||
are used internally to handle cases such as nulls, so it is not
|
||||
recommended that <function>distance</> functions return these values.)
|
||||
</para>
|
||||
|
||||
</listitem>
|
||||
|
@ -857,7 +853,7 @@ LANGUAGE C STRICT;
|
|||
struct, whose 'key' field contains the same datum in the original,
|
||||
uncompressed form. If the opclass' compress function does nothing for
|
||||
leaf entries, the fetch method can return the argument as is.
|
||||
</para>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The matching code in the C module could then follow this skeleton:
|
||||
|
|
|
@ -191,20 +191,18 @@ gistindex_keytest(IndexScanDesc scan,
|
|||
/*
|
||||
* Call the Distance function to evaluate the distance. The
|
||||
* arguments are the index datum (as a GISTENTRY*), the comparison
|
||||
* datum, and the ordering operator's strategy number and subtype
|
||||
* from pg_amop.
|
||||
* datum, the ordering operator's strategy number and subtype from
|
||||
* pg_amop, and the recheck flag.
|
||||
*
|
||||
* (Presently there's no need to pass the subtype since it'll
|
||||
* always be zero, but might as well pass it for possible future
|
||||
* use.)
|
||||
*
|
||||
* Distance functions get a recheck argument as well. In this
|
||||
* case the returned distance is the lower bound of distance and
|
||||
* needs to be rechecked. We return single recheck flag which
|
||||
* means that both quals and distances are to be rechecked. We
|
||||
* initialize the flag to 'false'. The flag was added in version
|
||||
* 9.5 and the distance operators written before that won't know
|
||||
* about the flag, and are never lossy.
|
||||
* If the function sets the recheck flag, the returned distance is
|
||||
* a lower bound on the true distance and needs to be rechecked.
|
||||
* We initialize the flag to 'false'. This flag was added in
|
||||
* version 9.5; distance functions written before that won't know
|
||||
* about the flag, but are expected to never be lossy.
|
||||
*/
|
||||
recheck = false;
|
||||
dist = FunctionCall5Coll(&key->sk_func,
|
||||
|
@ -475,11 +473,22 @@ getNextNearest(IndexScanDesc scan)
|
|||
{
|
||||
if (so->orderByTypes[i] == FLOAT8OID)
|
||||
{
|
||||
#ifndef USE_FLOAT8_BYVAL
|
||||
/* must free any old value to avoid memory leakage */
|
||||
if (!scan->xs_orderbynulls[i])
|
||||
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
|
||||
#endif
|
||||
scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]);
|
||||
scan->xs_orderbynulls[i] = false;
|
||||
}
|
||||
else if (so->orderByTypes[i] == FLOAT4OID)
|
||||
{
|
||||
/* convert distance function's result to ORDER BY type */
|
||||
#ifndef USE_FLOAT4_BYVAL
|
||||
/* must free any old value to avoid memory leakage */
|
||||
if (!scan->xs_orderbynulls[i])
|
||||
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
|
||||
#endif
|
||||
scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i]);
|
||||
scan->xs_orderbynulls[i] = false;
|
||||
}
|
||||
|
@ -491,7 +500,7 @@ getNextNearest(IndexScanDesc scan)
|
|||
* calculated by the distance function to that. The
|
||||
* executor won't actually need the order by values we
|
||||
* return here, if there are no lossy results, so only
|
||||
* insist on the datatype if the *recheck is set.
|
||||
* insist on converting if the *recheck flag is set.
|
||||
*/
|
||||
if (scan->xs_recheckorderby)
|
||||
elog(ERROR, "GiST operator family's FOR ORDER BY operator must return float8 or float4 if the distance function is lossy");
|
||||
|
|
|
@ -88,8 +88,9 @@ gistbeginscan(PG_FUNCTION_ARGS)
|
|||
so->qual_ok = true; /* in case there are zero keys */
|
||||
if (scan->numberOfOrderBys > 0)
|
||||
{
|
||||
scan->xs_orderbyvals = palloc(sizeof(Datum) * scan->numberOfOrderBys);
|
||||
scan->xs_orderbyvals = palloc0(sizeof(Datum) * scan->numberOfOrderBys);
|
||||
scan->xs_orderbynulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
|
||||
memset(scan->xs_orderbynulls, true, sizeof(bool) * scan->numberOfOrderBys);
|
||||
}
|
||||
|
||||
scan->opaque = so;
|
||||
|
@ -284,6 +285,8 @@ gistrescan(PG_FUNCTION_ARGS)
|
|||
GIST_DISTANCE_PROC, skey->sk_attno,
|
||||
RelationGetRelationName(scan->indexRelation));
|
||||
|
||||
fmgr_info_copy(&(skey->sk_func), finfo, so->giststate->scanCxt);
|
||||
|
||||
/*
|
||||
* Look up the datatype returned by the original ordering operator.
|
||||
* GiST always uses a float8 for the distance function, but the
|
||||
|
@ -297,7 +300,6 @@ gistrescan(PG_FUNCTION_ARGS)
|
|||
* first time.
|
||||
*/
|
||||
so->orderByTypes[i] = get_func_rettype(skey->sk_func.fn_oid);
|
||||
fmgr_info_copy(&(skey->sk_func), finfo, so->giststate->scanCxt);
|
||||
|
||||
/* Restore prior fn_extra pointers, if not first time */
|
||||
if (!first_time)
|
||||
|
|
|
@ -459,10 +459,16 @@ reorderqueue_pop(IndexScanState *node)
|
|||
{
|
||||
HeapTuple result;
|
||||
ReorderTuple *topmost;
|
||||
int i;
|
||||
|
||||
topmost = (ReorderTuple *) pairingheap_remove_first(node->iss_ReorderQueue);
|
||||
|
||||
result = topmost->htup;
|
||||
for (i = 0; i < node->iss_NumOrderByKeys; i++)
|
||||
{
|
||||
if (!node->iss_OrderByTypByVals[i] && !topmost->orderbynulls[i])
|
||||
pfree(DatumGetPointer(topmost->orderbyvals[i]));
|
||||
}
|
||||
pfree(topmost->orderbyvals);
|
||||
pfree(topmost->orderbynulls);
|
||||
pfree(topmost);
|
||||
|
|
|
@ -95,12 +95,13 @@ typedef struct IndexScanDescData
|
|||
/*
|
||||
* When fetching with an ordering operator, the values of the ORDER BY
|
||||
* expressions of the last returned tuple, according to the index. If
|
||||
* xs_recheck is true, these need to be rechecked just like the scan keys,
|
||||
* and the values returned here are a lower-bound on the actual values.
|
||||
* xs_recheckorderby is true, these need to be rechecked just like the
|
||||
* scan keys, and the values returned here are a lower-bound on the actual
|
||||
* values.
|
||||
*/
|
||||
Datum *xs_orderbyvals;
|
||||
bool *xs_orderbynulls;
|
||||
bool xs_recheckorderby; /* T means ORDER BY exprs must be rechecked */
|
||||
bool xs_recheckorderby;
|
||||
|
||||
/* state data for traversing HOT chains in index_getnext */
|
||||
bool xs_continue_hot; /* T if must keep walking HOT chain */
|
||||
|
|
Loading…
Reference in New Issue