1996-08-26 22:02:12 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* gistscan.c
|
2001-05-30 21:53:40 +02:00
|
|
|
* routines to manage scans on GiST index relations
|
1996-08-26 22:02:12 +02:00
|
|
|
*
|
|
|
|
*
|
2023-01-02 21:00:37 +01:00
|
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
2001-05-30 21:53:40 +02:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
1996-08-26 22:02:12 +02:00
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/access/gist/gistscan.c
|
1996-08-26 22:02:12 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "postgres.h"
|
1996-10-31 09:09:47 +01:00
|
|
|
|
2005-05-17 05:34:18 +02:00
|
|
|
#include "access/gist_private.h"
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "access/gistscan.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "access/relscan.h"
|
2019-09-08 20:07:30 +02:00
|
|
|
#include "utils/float.h"
|
Fix datatype confusion with the new lossy GiST distance functions.
We can only support a lossy distance function when the distance function's
datatype is comparable with the original ordering operator's datatype.
The distance function always returns a float8, so we are limited to float8,
and float4 (by a hard-coded cast of the float8 to float4).
In light of this limitation, it seems like a good idea to have a separate
'recheck' flag for the ORDER BY expressions, so that if you have a non-lossy
distance function, it still works with lossy quals. There are cases like
that with the build-in or contrib opclasses, but it's plausible.
There was a hidden assumption that the ORDER BY values returned by GiST
match the original ordering operator's return type, but there are plenty
of examples where that's not true, e.g. in btree_gist and pg_trgm. As long
as the distance function is not lossy, we can tolerate that and just not
return the distance to the executor (or rather, always return NULL). The
executor doesn't need the distances if there are no lossy results.
There was another little bug: the recheck variable was not initialized
before calling the distance function. That revealed the bigger issue,
as the executor tried to reorder tuples that didn't need reordering, and
that failed because of the datatype mismatch.
2015-05-15 16:59:46 +02:00
|
|
|
#include "utils/lsyscache.h"
|
2005-05-17 02:59:30 +02:00
|
|
|
#include "utils/memutils.h"
|
2010-12-04 02:52:18 +01:00
|
|
|
#include "utils/rel.h"
|
1996-10-21 07:14:02 +02:00
|
|
|
|
2010-12-04 02:52:18 +01:00
|
|
|
|
|
|
|
/*
|
2014-12-22 11:05:57 +01:00
|
|
|
* Pairing heap comparison function for the GISTSearchItem queue
|
2010-12-04 02:52:18 +01:00
|
|
|
*/
|
|
|
|
static int
|
2014-12-22 11:05:57 +01:00
|
|
|
pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
|
2010-12-04 02:52:18 +01:00
|
|
|
{
|
2014-12-22 11:05:57 +01:00
|
|
|
const GISTSearchItem *sa = (const GISTSearchItem *) a;
|
|
|
|
const GISTSearchItem *sb = (const GISTSearchItem *) b;
|
2010-12-04 02:52:18 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) arg;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Order according to distance comparison */
|
|
|
|
for (i = 0; i < scan->numberOfOrderBys; i++)
|
|
|
|
{
|
2019-09-19 20:30:19 +02:00
|
|
|
if (sa->distances[i].isnull)
|
2019-09-08 20:13:40 +02:00
|
|
|
{
|
2019-09-19 20:30:19 +02:00
|
|
|
if (!sb->distances[i].isnull)
|
2019-09-08 20:13:40 +02:00
|
|
|
return -1;
|
|
|
|
}
|
2019-09-19 20:30:19 +02:00
|
|
|
else if (sb->distances[i].isnull)
|
2019-09-08 20:13:40 +02:00
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-09-19 20:30:19 +02:00
|
|
|
int cmp = -float8_cmp_internal(sa->distances[i].value,
|
|
|
|
sb->distances[i].value);
|
2019-09-08 20:13:40 +02:00
|
|
|
|
|
|
|
if (cmp != 0)
|
|
|
|
return cmp;
|
|
|
|
}
|
2010-12-04 02:52:18 +01:00
|
|
|
}
|
|
|
|
|
2014-12-22 11:05:57 +01:00
|
|
|
/* Heap items go before inner pages, to ensure a depth-first search */
|
|
|
|
if (GISTSearchItemIsHeap(*sa) && !GISTSearchItemIsHeap(*sb))
|
|
|
|
return 1;
|
2015-02-17 21:33:38 +01:00
|
|
|
if (!GISTSearchItemIsHeap(*sa) && GISTSearchItemIsHeap(*sb))
|
|
|
|
return -1;
|
2010-12-04 02:52:18 +01:00
|
|
|
|
2014-12-22 11:05:57 +01:00
|
|
|
return 0;
|
2010-12-04 02:52:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Index AM API functions for scanning GiST indexes
|
|
|
|
*/
|
1996-08-26 22:02:12 +02:00
|
|
|
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
2016-01-18 01:36:59 +01:00
|
|
|
IndexScanDesc
|
|
|
|
gistbeginscan(Relation r, int nkeys, int norderbys)
|
1996-08-26 22:02:12 +02:00
|
|
|
{
|
2005-05-17 02:59:30 +02:00
|
|
|
IndexScanDesc scan;
|
2011-10-01 01:48:57 +02:00
|
|
|
GISTSTATE *giststate;
|
2010-12-03 02:50:48 +01:00
|
|
|
GISTScanOpaque so;
|
2011-10-01 01:48:57 +02:00
|
|
|
MemoryContext oldCxt;
|
2010-12-03 02:50:48 +01:00
|
|
|
|
|
|
|
scan = RelationGetIndexScan(r, nkeys, norderbys);
|
|
|
|
|
2011-10-01 01:48:57 +02:00
|
|
|
/* First, set up a GISTSTATE with a scan-lifespan memory context */
|
|
|
|
giststate = initGISTstate(scan->indexRelation);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Everything made below is in the scanCxt, or is a child of the scanCxt,
|
|
|
|
* so it'll all go away automatically in gistendscan.
|
|
|
|
*/
|
|
|
|
oldCxt = MemoryContextSwitchTo(giststate->scanCxt);
|
|
|
|
|
2010-12-03 02:50:48 +01:00
|
|
|
/* initialize opaque data */
|
2010-12-04 02:52:18 +01:00
|
|
|
so = (GISTScanOpaque) palloc0(sizeof(GISTScanOpaqueData));
|
2011-10-01 01:48:57 +02:00
|
|
|
so->giststate = giststate;
|
|
|
|
giststate->tempCxt = createTempGistContext();
|
|
|
|
so->queue = NULL;
|
|
|
|
so->queueCxt = giststate->scanCxt; /* see gistrescan */
|
|
|
|
|
2010-12-04 02:52:18 +01:00
|
|
|
/* workspaces with size dependent on numberOfOrderBys: */
|
2019-09-19 20:30:19 +02:00
|
|
|
so->distances = palloc(sizeof(so->distances[0]) * scan->numberOfOrderBys);
|
2010-12-04 02:52:18 +01:00
|
|
|
so->qual_ok = true; /* in case there are zero keys */
|
2015-05-15 13:26:51 +02:00
|
|
|
if (scan->numberOfOrderBys > 0)
|
|
|
|
{
|
2015-05-23 21:22:25 +02:00
|
|
|
scan->xs_orderbyvals = palloc0(sizeof(Datum) * scan->numberOfOrderBys);
|
2015-05-15 13:26:51 +02:00
|
|
|
scan->xs_orderbynulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
|
2015-05-23 21:22:25 +02:00
|
|
|
memset(scan->xs_orderbynulls, true, sizeof(bool) * scan->numberOfOrderBys);
|
2015-05-15 13:26:51 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2015-09-09 17:43:37 +02:00
|
|
|
so->killedItems = NULL; /* until needed */
|
|
|
|
so->numKilled = 0;
|
|
|
|
so->curBlkno = InvalidBlockNumber;
|
|
|
|
so->curPageLSN = InvalidXLogRecPtr;
|
|
|
|
|
2010-12-03 02:50:48 +01:00
|
|
|
scan->opaque = so;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
|
2015-03-26 18:12:00 +01:00
|
|
|
/*
|
2015-03-26 22:07:52 +01:00
|
|
|
* All fields required for index-only scans are initialized in gistrescan,
|
|
|
|
* as we don't know yet if we're doing an index-only scan or not.
|
2015-03-26 18:12:00 +01:00
|
|
|
*/
|
|
|
|
|
2011-10-01 01:48:57 +02:00
|
|
|
MemoryContextSwitchTo(oldCxt);
|
|
|
|
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
2016-01-18 01:36:59 +01:00
|
|
|
return scan;
|
1996-08-26 22:02:12 +02:00
|
|
|
}
|
|
|
|
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
2016-01-18 01:36:59 +01:00
|
|
|
void
|
|
|
|
gistrescan(IndexScanDesc scan, ScanKey key, int nkeys,
|
|
|
|
ScanKey orderbys, int norderbys)
|
1996-08-26 22:02:12 +02:00
|
|
|
{
|
2010-12-04 02:52:18 +01:00
|
|
|
/* nkeys and norderbys arguments are ignored */
|
2010-12-03 02:50:48 +01:00
|
|
|
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
|
2011-10-01 01:48:57 +02:00
|
|
|
bool first_time;
|
1996-08-26 22:02:12 +02:00
|
|
|
int i;
|
2010-12-04 02:52:18 +01:00
|
|
|
MemoryContext oldCxt;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2010-12-03 02:50:48 +01:00
|
|
|
/* rescan an existing indexscan --- reset state */
|
2011-10-01 01:48:57 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The first time through, we create the search queue in the scanCxt.
|
|
|
|
* Subsequent times through, we create the queue in a separate queueCxt,
|
|
|
|
* which is created on the second call and reset on later calls. Thus, in
|
|
|
|
* the common case where a scan is only rescan'd once, we just put the
|
|
|
|
* queue in scanCxt and don't pay the overhead of making a second memory
|
2016-09-20 10:38:25 +02:00
|
|
|
* context. If we do rescan more than once, the first queue is just left
|
2011-10-01 01:48:57 +02:00
|
|
|
* for dead until end of scan; this small wastage seems worth the savings
|
|
|
|
* in the common case.
|
|
|
|
*/
|
|
|
|
if (so->queue == NULL)
|
|
|
|
{
|
|
|
|
/* first time through */
|
|
|
|
Assert(so->queueCxt == so->giststate->scanCxt);
|
|
|
|
first_time = true;
|
|
|
|
}
|
|
|
|
else if (so->queueCxt == so->giststate->scanCxt)
|
|
|
|
{
|
|
|
|
/* second time through */
|
|
|
|
so->queueCxt = AllocSetContextCreate(so->giststate->scanCxt,
|
|
|
|
"GiST queue context",
|
Add macros to make AllocSetContextCreate() calls simpler and safer.
I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls
had typos in the context-sizing parameters. While none of these led to
especially significant problems, they did create minor inefficiencies,
and it's now clear that expecting people to copy-and-paste those calls
accurately is not a great idea. Let's reduce the risk of future errors
by introducing single macros that encapsulate the common use-cases.
Three such macros are enough to cover all but two special-purpose contexts;
those two calls can be left as-is, I think.
While this patch doesn't in itself improve matters for third-party
extensions, it doesn't break anything for them either, and they can
gradually adopt the simplified notation over time.
In passing, change TopMemoryContext to use the default allocation
parameters. Formerly it could only be extended 8K at a time. That was
probably reasonable when this code was written; but nowadays we create
many more contexts than we did then, so that it's not unusual to have a
couple hundred K in TopMemoryContext, even without considering various
dubious code that sticks other things there. There seems no good reason
not to let it use growing blocks like most other contexts.
Back-patch to 9.6, mostly because that's still close enough to HEAD that
it's easy to do so, and keeping the branches in sync can be expected to
avoid some future back-patching pain. The bugs fixed by these changes
don't seem to be significant enough to justify fixing them further back.
Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 23:50:38 +02:00
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
2011-10-01 01:48:57 +02:00
|
|
|
first_time = false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* third or later time through */
|
|
|
|
MemoryContextReset(so->queueCxt);
|
|
|
|
first_time = false;
|
|
|
|
}
|
2003-03-24 00:01:03 +01:00
|
|
|
|
2015-03-26 18:12:00 +01:00
|
|
|
/*
|
2015-03-26 22:07:52 +01:00
|
|
|
* If we're doing an index-only scan, on the first call, also initialize a
|
|
|
|
* tuple descriptor to represent the returned index tuples and create a
|
|
|
|
* memory context to hold them during the scan.
|
2015-03-26 18:12:00 +01:00
|
|
|
*/
|
2017-02-27 23:20:34 +01:00
|
|
|
if (scan->xs_want_itup && !scan->xs_hitupdesc)
|
2015-03-26 22:07:52 +01:00
|
|
|
{
|
|
|
|
int natts;
|
2019-03-10 09:36:47 +01:00
|
|
|
int nkeyatts;
|
2015-03-26 22:07:52 +01:00
|
|
|
int attno;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The storage type of the index can be different from the original
|
|
|
|
* datatype being indexed, so we cannot just grab the index's tuple
|
|
|
|
* descriptor. Instead, construct a descriptor with the original data
|
|
|
|
* types.
|
|
|
|
*/
|
|
|
|
natts = RelationGetNumberOfAttributes(scan->indexRelation);
|
2019-03-10 09:36:47 +01:00
|
|
|
nkeyatts = IndexRelationGetNumberOfKeyAttributes(scan->indexRelation);
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
so->giststate->fetchTupdesc = CreateTemplateTupleDesc(natts);
|
2019-03-10 09:36:47 +01:00
|
|
|
for (attno = 1; attno <= nkeyatts; attno++)
|
2015-03-26 22:07:52 +01:00
|
|
|
{
|
|
|
|
TupleDescInitEntry(so->giststate->fetchTupdesc, attno, NULL,
|
|
|
|
scan->indexRelation->rd_opcintype[attno - 1],
|
|
|
|
-1, 0);
|
|
|
|
}
|
2019-03-10 09:36:47 +01:00
|
|
|
|
|
|
|
for (; attno <= natts; attno++)
|
|
|
|
{
|
|
|
|
/* taking opcintype from giststate->tupdesc */
|
|
|
|
TupleDescInitEntry(so->giststate->fetchTupdesc, attno, NULL,
|
|
|
|
TupleDescAttr(so->giststate->leafTupdesc,
|
|
|
|
attno - 1)->atttypid,
|
|
|
|
-1, 0);
|
|
|
|
}
|
2017-02-27 23:20:34 +01:00
|
|
|
scan->xs_hitupdesc = so->giststate->fetchTupdesc;
|
2015-03-26 22:07:52 +01:00
|
|
|
|
2017-02-27 23:20:34 +01:00
|
|
|
/* Also create a memory context that will hold the returned tuples */
|
2015-03-26 18:12:00 +01:00
|
|
|
so->pageDataCxt = AllocSetContextCreate(so->giststate->scanCxt,
|
|
|
|
"GiST page data context",
|
Add macros to make AllocSetContextCreate() calls simpler and safer.
I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls
had typos in the context-sizing parameters. While none of these led to
especially significant problems, they did create minor inefficiencies,
and it's now clear that expecting people to copy-and-paste those calls
accurately is not a great idea. Let's reduce the risk of future errors
by introducing single macros that encapsulate the common use-cases.
Three such macros are enough to cover all but two special-purpose contexts;
those two calls can be left as-is, I think.
While this patch doesn't in itself improve matters for third-party
extensions, it doesn't break anything for them either, and they can
gradually adopt the simplified notation over time.
In passing, change TopMemoryContext to use the default allocation
parameters. Formerly it could only be extended 8K at a time. That was
probably reasonable when this code was written; but nowadays we create
many more contexts than we did then, so that it's not unusual to have a
couple hundred K in TopMemoryContext, even without considering various
dubious code that sticks other things there. There seems no good reason
not to let it use growing blocks like most other contexts.
Back-patch to 9.6, mostly because that's still close enough to HEAD that
it's easy to do so, and keeping the branches in sync can be expected to
avoid some future back-patching pain. The bugs fixed by these changes
don't seem to be significant enough to justify fixing them further back.
Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 23:50:38 +02:00
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
2015-03-26 22:07:52 +01:00
|
|
|
}
|
2015-03-26 18:12:00 +01:00
|
|
|
|
2016-09-20 10:38:25 +02:00
|
|
|
/* create new, empty pairing heap for search queue */
|
2010-12-04 02:52:18 +01:00
|
|
|
oldCxt = MemoryContextSwitchTo(so->queueCxt);
|
2014-12-22 11:05:57 +01:00
|
|
|
so->queue = pairingheap_allocate(pairingheap_GISTSearchItem_cmp, scan);
|
2010-12-04 02:52:18 +01:00
|
|
|
MemoryContextSwitchTo(oldCxt);
|
2007-01-20 19:43:35 +01:00
|
|
|
|
2010-12-04 02:52:18 +01:00
|
|
|
so->firstCall = true;
|
2008-12-04 12:08:46 +01:00
|
|
|
|
2003-03-24 00:01:03 +01:00
|
|
|
/* Update scan key, if a new one is given */
|
2005-05-17 02:59:30 +02:00
|
|
|
if (key && scan->numberOfKeys > 0)
|
2003-03-24 00:01:03 +01:00
|
|
|
{
|
2013-05-10 05:08:19 +02:00
|
|
|
void **fn_extras = NULL;
|
|
|
|
|
2011-10-01 01:48:57 +02:00
|
|
|
/*
|
|
|
|
* If this isn't the first time through, preserve the fn_extra
|
|
|
|
* pointers, so that if the consistentFns are using them to cache
|
|
|
|
* data, that data is not leaked across a rescan.
|
|
|
|
*/
|
|
|
|
if (!first_time)
|
|
|
|
{
|
2013-05-10 05:08:19 +02:00
|
|
|
fn_extras = (void **) palloc(scan->numberOfKeys * sizeof(void *));
|
2011-10-01 01:48:57 +02:00
|
|
|
for (i = 0; i < scan->numberOfKeys; i++)
|
2013-05-10 05:08:19 +02:00
|
|
|
fn_extras[i] = scan->keyData[i].sk_func.fn_extra;
|
2011-10-01 01:48:57 +02:00
|
|
|
}
|
|
|
|
|
2005-05-17 02:59:30 +02:00
|
|
|
memmove(scan->keyData, key,
|
|
|
|
scan->numberOfKeys * sizeof(ScanKeyData));
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-03-24 00:01:03 +01:00
|
|
|
/*
|
2010-12-04 02:52:18 +01:00
|
|
|
* Modify the scan key so that the Consistent method is called for all
|
2005-05-17 02:59:30 +02:00
|
|
|
* comparisons. The original operator is passed to the Consistent
|
|
|
|
* function in the form of its strategy number, which is available
|
|
|
|
* from the sk_strategy field, and its subtype from the sk_subtype
|
2011-04-13 01:19:24 +02:00
|
|
|
* field.
|
2008-10-17 19:02:21 +02:00
|
|
|
*
|
|
|
|
* Next, if any of keys is a NULL and that key is not marked with
|
2010-01-01 22:53:49 +01:00
|
|
|
* SK_SEARCHNULL/SK_SEARCHNOTNULL then nothing can be found (ie, we
|
|
|
|
* assume all indexable operators are strict).
|
2003-03-24 00:01:03 +01:00
|
|
|
*/
|
2010-12-04 02:52:18 +01:00
|
|
|
so->qual_ok = true;
|
|
|
|
|
2008-10-17 19:02:21 +02:00
|
|
|
for (i = 0; i < scan->numberOfKeys; i++)
|
|
|
|
{
|
2010-12-04 02:52:18 +01:00
|
|
|
ScanKey skey = scan->keyData + i;
|
2008-10-17 19:02:21 +02:00
|
|
|
|
2016-02-02 13:20:33 +01:00
|
|
|
/*
|
|
|
|
* Copy consistent support function to ScanKey structure instead
|
|
|
|
* of function implementing filtering operator.
|
|
|
|
*/
|
2013-05-10 05:08:19 +02:00
|
|
|
fmgr_info_copy(&(skey->sk_func),
|
|
|
|
&(so->giststate->consistentFn[skey->sk_attno - 1]),
|
|
|
|
so->giststate->scanCxt);
|
|
|
|
|
|
|
|
/* Restore prior fn_extra pointers, if not first time */
|
|
|
|
if (!first_time)
|
|
|
|
skey->sk_func.fn_extra = fn_extras[i];
|
2010-01-01 22:53:49 +01:00
|
|
|
|
|
|
|
if (skey->sk_flags & SK_ISNULL)
|
2008-10-17 19:02:21 +02:00
|
|
|
{
|
2010-01-01 22:53:49 +01:00
|
|
|
if (!(skey->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL)))
|
2008-10-17 19:02:21 +02:00
|
|
|
so->qual_ok = false;
|
|
|
|
}
|
|
|
|
}
|
2013-05-10 05:08:19 +02:00
|
|
|
|
|
|
|
if (!first_time)
|
|
|
|
pfree(fn_extras);
|
1997-05-22 18:01:33 +02:00
|
|
|
}
|
2000-06-13 09:35:40 +02:00
|
|
|
|
2010-12-04 02:52:18 +01:00
|
|
|
/* Update order-by key, if a new one is given */
|
|
|
|
if (orderbys && scan->numberOfOrderBys > 0)
|
|
|
|
{
|
2013-05-10 05:08:19 +02:00
|
|
|
void **fn_extras = NULL;
|
|
|
|
|
2011-10-01 01:48:57 +02:00
|
|
|
/* As above, preserve fn_extra if not first time through */
|
|
|
|
if (!first_time)
|
|
|
|
{
|
2013-05-10 05:08:19 +02:00
|
|
|
fn_extras = (void **) palloc(scan->numberOfOrderBys * sizeof(void *));
|
2011-10-01 01:48:57 +02:00
|
|
|
for (i = 0; i < scan->numberOfOrderBys; i++)
|
2013-05-10 05:08:19 +02:00
|
|
|
fn_extras[i] = scan->orderByData[i].sk_func.fn_extra;
|
2011-10-01 01:48:57 +02:00
|
|
|
}
|
|
|
|
|
2010-12-04 02:52:18 +01:00
|
|
|
memmove(scan->orderByData, orderbys,
|
|
|
|
scan->numberOfOrderBys * sizeof(ScanKeyData));
|
|
|
|
|
Fix datatype confusion with the new lossy GiST distance functions.
We can only support a lossy distance function when the distance function's
datatype is comparable with the original ordering operator's datatype.
The distance function always returns a float8, so we are limited to float8,
and float4 (by a hard-coded cast of the float8 to float4).
In light of this limitation, it seems like a good idea to have a separate
'recheck' flag for the ORDER BY expressions, so that if you have a non-lossy
distance function, it still works with lossy quals. There are cases like
that with the build-in or contrib opclasses, but it's plausible.
There was a hidden assumption that the ORDER BY values returned by GiST
match the original ordering operator's return type, but there are plenty
of examples where that's not true, e.g. in btree_gist and pg_trgm. As long
as the distance function is not lossy, we can tolerate that and just not
return the distance to the executor (or rather, always return NULL). The
executor doesn't need the distances if there are no lossy results.
There was another little bug: the recheck variable was not initialized
before calling the distance function. That revealed the bigger issue,
as the executor tried to reorder tuples that didn't need reordering, and
that failed because of the datatype mismatch.
2015-05-15 16:59:46 +02:00
|
|
|
so->orderByTypes = (Oid *) palloc(scan->numberOfOrderBys * sizeof(Oid));
|
|
|
|
|
2010-12-04 02:52:18 +01:00
|
|
|
/*
|
|
|
|
* Modify the order-by key so that the Distance method is called for
|
|
|
|
* all comparisons. The original operator is passed to the Distance
|
|
|
|
* function in the form of its strategy number, which is available
|
|
|
|
* from the sk_strategy field, and its subtype from the sk_subtype
|
2011-04-13 01:19:24 +02:00
|
|
|
* field.
|
2010-12-04 02:52:18 +01:00
|
|
|
*/
|
|
|
|
for (i = 0; i < scan->numberOfOrderBys; i++)
|
|
|
|
{
|
|
|
|
ScanKey skey = scan->orderByData + i;
|
2013-05-10 05:08:19 +02:00
|
|
|
FmgrInfo *finfo = &(so->giststate->distanceFn[skey->sk_attno - 1]);
|
2010-12-04 02:52:18 +01:00
|
|
|
|
|
|
|
/* Check we actually have a distance function ... */
|
2013-05-10 05:08:19 +02:00
|
|
|
if (!OidIsValid(finfo->fn_oid))
|
2010-12-04 02:52:18 +01:00
|
|
|
elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
|
|
|
|
GIST_DISTANCE_PROC, skey->sk_attno,
|
|
|
|
RelationGetRelationName(scan->indexRelation));
|
2013-05-10 05:08:19 +02:00
|
|
|
|
Fix datatype confusion with the new lossy GiST distance functions.
We can only support a lossy distance function when the distance function's
datatype is comparable with the original ordering operator's datatype.
The distance function always returns a float8, so we are limited to float8,
and float4 (by a hard-coded cast of the float8 to float4).
In light of this limitation, it seems like a good idea to have a separate
'recheck' flag for the ORDER BY expressions, so that if you have a non-lossy
distance function, it still works with lossy quals. There are cases like
that with the build-in or contrib opclasses, but it's plausible.
There was a hidden assumption that the ORDER BY values returned by GiST
match the original ordering operator's return type, but there are plenty
of examples where that's not true, e.g. in btree_gist and pg_trgm. As long
as the distance function is not lossy, we can tolerate that and just not
return the distance to the executor (or rather, always return NULL). The
executor doesn't need the distances if there are no lossy results.
There was another little bug: the recheck variable was not initialized
before calling the distance function. That revealed the bigger issue,
as the executor tried to reorder tuples that didn't need reordering, and
that failed because of the datatype mismatch.
2015-05-15 16:59:46 +02:00
|
|
|
/*
|
|
|
|
* Look up the datatype returned by the original ordering
|
|
|
|
* operator. GiST always uses a float8 for the distance function,
|
|
|
|
* but the ordering operator could be anything else.
|
|
|
|
*
|
|
|
|
* XXX: The distance function is only allowed to be lossy if the
|
|
|
|
* ordering operator's result type is float4 or float8. Otherwise
|
|
|
|
* we don't know how to return the distance to the executor. But
|
|
|
|
* we cannot check that here, as we won't know if the distance
|
|
|
|
* function is lossy until it returns *recheck = true for the
|
|
|
|
* first time.
|
|
|
|
*/
|
|
|
|
so->orderByTypes[i] = get_func_rettype(skey->sk_func.fn_oid);
|
2013-05-10 05:08:19 +02:00
|
|
|
|
2016-02-02 13:20:33 +01:00
|
|
|
/*
|
|
|
|
* Copy distance support function to ScanKey structure instead of
|
|
|
|
* function implementing ordering operator.
|
|
|
|
*/
|
|
|
|
fmgr_info_copy(&(skey->sk_func), finfo, so->giststate->scanCxt);
|
|
|
|
|
2013-05-10 05:08:19 +02:00
|
|
|
/* Restore prior fn_extra pointers, if not first time */
|
|
|
|
if (!first_time)
|
|
|
|
skey->sk_func.fn_extra = fn_extras[i];
|
2010-12-04 02:52:18 +01:00
|
|
|
}
|
2013-05-10 05:08:19 +02:00
|
|
|
|
|
|
|
if (!first_time)
|
|
|
|
pfree(fn_extras);
|
2010-12-04 02:52:18 +01:00
|
|
|
}
|
Fix pfree-of-already-freed-tuple when rescanning a GiST index-only scan.
GiST's getNextNearest() function attempts to pfree the previously-returned
tuple if any (that is, scan->xs_hitup in HEAD, or scan->xs_itup in older
branches). However, if we are rescanning a plan node after ending a
previous scan early, those tuple pointers could be pointing to garbage,
because they would be pointing into the scan's pageDataCxt or queueCxt
which has been reset. In a debug build this reliably results in a crash,
although I think it might sometimes accidentally fail to fail in
production builds.
To fix, clear the pointer field anyplace we reset a context it might
be pointing into. This may be overkill --- I think probably only the
queueCxt case is involved in this bug, so that resetting in gistrescan()
would be sufficient --- but dangling pointers are generally bad news,
so let's avoid them.
Another plausible answer might be to just not bother with the pfree in
getNextNearest(). The reconstructed tuples would go away anyway in the
context resets, and I'm far from convinced that freeing them a bit earlier
really saves anything meaningful. I'll stick with the original logic in
this patch, but if we find more problems in the same area we should
consider that approach.
Per bug #14641 from Denis Smirnov. Back-patch to 9.5 where this
logic was introduced.
Discussion: https://postgr.es/m/20170504072034.24366.57688@wrigleys.postgresql.org
2017-05-04 19:59:13 +02:00
|
|
|
|
|
|
|
/* any previous xs_hitup will have been pfree'd in context resets above */
|
|
|
|
scan->xs_hitup = NULL;
|
1996-08-26 22:02:12 +02:00
|
|
|
}
|
|
|
|
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
2016-01-18 01:36:59 +01:00
|
|
|
void
|
|
|
|
gistendscan(IndexScanDesc scan)
|
1996-08-26 22:02:12 +02:00
|
|
|
{
|
2010-12-03 02:50:48 +01:00
|
|
|
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
|
|
|
|
|
2011-10-01 01:48:57 +02:00
|
|
|
/*
|
|
|
|
* freeGISTstate is enough to clean up everything made by gistbeginscan,
|
|
|
|
* as well as the queueCxt if there is a separate context for it.
|
|
|
|
*/
|
2010-12-04 02:52:18 +01:00
|
|
|
freeGISTstate(so->giststate);
|
1996-08-26 22:02:12 +02:00
|
|
|
}
|