2011-10-11 20:20:06 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* nodeIndexonlyscan.c
|
|
|
|
* Routines to support index-only scans
|
|
|
|
*
|
2014-01-07 22:05:30 +01:00
|
|
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
2011-10-11 20:20:06 +02:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/executor/nodeIndexonlyscan.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* INTERFACE ROUTINES
|
|
|
|
* ExecIndexOnlyScan scans an index
|
|
|
|
* IndexOnlyNext retrieve next tuple
|
|
|
|
* ExecInitIndexOnlyScan creates and initializes state info.
|
|
|
|
* ExecReScanIndexOnlyScan rescans the indexed relation.
|
|
|
|
* ExecEndIndexOnlyScan releases all storage.
|
|
|
|
* ExecIndexOnlyMarkPos marks scan position.
|
|
|
|
* ExecIndexOnlyRestrPos restores scan position.
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "access/relscan.h"
|
|
|
|
#include "access/visibilitymap.h"
|
|
|
|
#include "executor/execdebug.h"
|
|
|
|
#include "executor/nodeIndexonlyscan.h"
|
|
|
|
#include "executor/nodeIndexscan.h"
|
|
|
|
#include "storage/bufmgr.h"
|
Fix serializable mode with index-only scans.
Serializable Snapshot Isolation used for serializable transactions
depends on acquiring SIRead locks on all heap relation tuples which
are used to generate the query result, so that a later delete or
update of any of the tuples can flag a read-write conflict between
transactions. This is normally handled in heapam.c, with tuple level
locking. Since an index-only scan avoids heap access in many cases,
building the result from the index tuple, the necessary predicate
locks were not being acquired for all tuples in an index-only scan.
To prevent problems with tuple IDs which are vacuumed and re-used
while the transaction still matters, the xmin of the tuple is part of
the tag for the tuple lock. Since xmin is not available to the
index-only scan for result rows generated from the index tuples, it
is not possible to acquire a tuple-level predicate lock in such
cases, in spite of having the tid. If we went to the heap to get the
xmin value, it would no longer be an index-only scan. Rather than
prohibit index-only scans under serializable transaction isolation,
we acquire an SIRead lock on the page containing the tuple, when it
was not necessary to visit the heap for other reasons.
Backpatch to 9.2.
Kevin Grittner and Tom Lane
2012-09-05 04:13:11 +02:00
|
|
|
#include "storage/predicate.h"
|
2011-10-11 20:20:06 +02:00
|
|
|
#include "utils/memutils.h"
|
|
|
|
#include "utils/rel.h"
|
|
|
|
|
|
|
|
|
|
|
|
static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
|
|
|
|
static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
|
2011-10-17 01:15:04 +02:00
|
|
|
TupleDesc itupdesc);
|
2011-10-11 20:20:06 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* IndexOnlyNext
|
|
|
|
*
|
|
|
|
* Retrieve a tuple from the IndexOnlyScan node's index.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
static TupleTableSlot *
|
|
|
|
IndexOnlyNext(IndexOnlyScanState *node)
|
|
|
|
{
|
|
|
|
EState *estate;
|
|
|
|
ExprContext *econtext;
|
|
|
|
ScanDirection direction;
|
|
|
|
IndexScanDesc scandesc;
|
|
|
|
TupleTableSlot *slot;
|
|
|
|
ItemPointer tid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* extract necessary information from index scan node
|
|
|
|
*/
|
|
|
|
estate = node->ss.ps.state;
|
|
|
|
direction = estate->es_direction;
|
|
|
|
/* flip direction if this is an overall backward scan */
|
|
|
|
if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir))
|
|
|
|
{
|
|
|
|
if (ScanDirectionIsForward(direction))
|
|
|
|
direction = BackwardScanDirection;
|
|
|
|
else if (ScanDirectionIsBackward(direction))
|
|
|
|
direction = ForwardScanDirection;
|
|
|
|
}
|
|
|
|
scandesc = node->ioss_ScanDesc;
|
|
|
|
econtext = node->ss.ps.ps_ExprContext;
|
|
|
|
slot = node->ss.ss_ScanTupleSlot;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* OK, now that we have what we need, fetch the next tuple.
|
|
|
|
*/
|
|
|
|
while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
|
|
|
|
{
|
Fix serializable mode with index-only scans.
Serializable Snapshot Isolation used for serializable transactions
depends on acquiring SIRead locks on all heap relation tuples which
are used to generate the query result, so that a later delete or
update of any of the tuples can flag a read-write conflict between
transactions. This is normally handled in heapam.c, with tuple level
locking. Since an index-only scan avoids heap access in many cases,
building the result from the index tuple, the necessary predicate
locks were not being acquired for all tuples in an index-only scan.
To prevent problems with tuple IDs which are vacuumed and re-used
while the transaction still matters, the xmin of the tuple is part of
the tag for the tuple lock. Since xmin is not available to the
index-only scan for result rows generated from the index tuples, it
is not possible to acquire a tuple-level predicate lock in such
cases, in spite of having the tid. If we went to the heap to get the
xmin value, it would no longer be an index-only scan. Rather than
prohibit index-only scans under serializable transaction isolation,
we acquire an SIRead lock on the page containing the tuple, when it
was not necessary to visit the heap for other reasons.
Backpatch to 9.2.
Kevin Grittner and Tom Lane
2012-09-05 04:13:11 +02:00
|
|
|
HeapTuple tuple = NULL;
|
|
|
|
|
2011-10-11 20:20:06 +02:00
|
|
|
/*
|
|
|
|
* We can skip the heap fetch if the TID references a heap page on
|
|
|
|
* which all tuples are known visible to everybody. In any case,
|
|
|
|
* we'll use the index tuple not the heap tuple as the data source.
|
Fix more crash-safe visibility map bugs, and improve comments.
In lazy_scan_heap, we could issue bogus warnings about incorrect
information in the visibility map, because we checked the visibility
map bit before locking the heap page, creating a race condition. Fix
by rechecking the visibility map bit before we complain. Rejigger
some related logic so that we rely on the possibly-outdated
all_visible_according_to_vm value as little as possible.
In heap_multi_insert, it's not safe to clear the visibility map bit
before beginning the critical section. The visibility map is not
crash-safe unless we treat clearing the bit as a critical operation.
Specifically, if the transaction were to error out after we set the
bit and before entering the critical section, we could end up writing
the heap page to disk (with the bit cleared) and crashing before the
visibility map page made it to disk. That would be bad. heap_insert
has this correct, but somehow the order of operations got rearranged
when heap_multi_insert was added.
Also, add some more comments to visibilitymap_test, lazy_scan_heap,
and IndexOnlyNext, expounding on concurrency issues.
Per extensive code review by Andres Freund, and further review by Tom
Lane, who also made the original report about the bogus warnings.
2012-06-07 18:25:41 +02:00
|
|
|
*
|
|
|
|
* Note on Memory Ordering Effects: visibilitymap_test does not lock
|
|
|
|
* the visibility map buffer, and therefore the result we read here
|
|
|
|
* could be slightly stale. However, it can't be stale enough to
|
2014-05-06 18:12:18 +02:00
|
|
|
* matter. It suffices to show that (1) there is a read barrier
|
Fix more crash-safe visibility map bugs, and improve comments.
In lazy_scan_heap, we could issue bogus warnings about incorrect
information in the visibility map, because we checked the visibility
map bit before locking the heap page, creating a race condition. Fix
by rechecking the visibility map bit before we complain. Rejigger
some related logic so that we rely on the possibly-outdated
all_visible_according_to_vm value as little as possible.
In heap_multi_insert, it's not safe to clear the visibility map bit
before beginning the critical section. The visibility map is not
crash-safe unless we treat clearing the bit as a critical operation.
Specifically, if the transaction were to error out after we set the
bit and before entering the critical section, we could end up writing
the heap page to disk (with the bit cleared) and crashing before the
visibility map page made it to disk. That would be bad. heap_insert
has this correct, but somehow the order of operations got rearranged
when heap_multi_insert was added.
Also, add some more comments to visibilitymap_test, lazy_scan_heap,
and IndexOnlyNext, expounding on concurrency issues.
Per extensive code review by Andres Freund, and further review by Tom
Lane, who also made the original report about the bogus warnings.
2012-06-07 18:25:41 +02:00
|
|
|
* between the time we read the index TID and the time we test the
|
|
|
|
* visibility map; and (2) there is a write barrier between the time
|
|
|
|
* some other concurrent process clears the visibility map bit and the
|
|
|
|
* time it inserts the index TID. Since acquiring or releasing a
|
|
|
|
* LWLock interposes a full barrier, this is easy to show: (1) is
|
|
|
|
* satisfied by the release of the index buffer content lock after
|
|
|
|
* reading the TID; and (2) is satisfied by the acquisition of the
|
|
|
|
* buffer content lock in order to insert the TID.
|
2011-10-11 20:20:06 +02:00
|
|
|
*/
|
|
|
|
if (!visibilitymap_test(scandesc->heapRelation,
|
|
|
|
ItemPointerGetBlockNumber(tid),
|
|
|
|
&node->ioss_VMBuffer))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Rats, we have to visit the heap to check visibility.
|
|
|
|
*/
|
2012-01-26 02:40:34 +01:00
|
|
|
node->ioss_HeapFetches++;
|
2011-10-11 20:20:06 +02:00
|
|
|
tuple = index_fetch_heap(scandesc);
|
|
|
|
if (tuple == NULL)
|
2012-06-10 21:20:04 +02:00
|
|
|
continue; /* no visible tuple, try next index entry */
|
2011-10-11 20:20:06 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Only MVCC snapshots are supported here, so there should be no
|
|
|
|
* need to keep following the HOT chain once a visible entry has
|
2014-05-06 18:12:18 +02:00
|
|
|
* been found. If we did want to allow that, we'd need to keep
|
2011-10-11 20:20:06 +02:00
|
|
|
* more state to remember not to call index_getnext_tid next time.
|
|
|
|
*/
|
|
|
|
if (scandesc->xs_continue_hot)
|
|
|
|
elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: at this point we are holding a pin on the heap page, as
|
|
|
|
* recorded in scandesc->xs_cbuf. We could release that pin now,
|
2014-05-06 18:12:18 +02:00
|
|
|
* but it's not clear whether it's a win to do so. The next index
|
2011-10-11 20:20:06 +02:00
|
|
|
* entry might require a visit to the same heap page.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fill the scan tuple slot with data from the index.
|
|
|
|
*/
|
2011-10-17 01:15:04 +02:00
|
|
|
StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
|
2011-10-11 20:20:06 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the index was lossy, we have to recheck the index quals.
|
|
|
|
* (Currently, this can never happen, but we should support the case
|
|
|
|
* for possible future use, eg with GiST indexes.)
|
|
|
|
*/
|
|
|
|
if (scandesc->xs_recheck)
|
|
|
|
{
|
|
|
|
econtext->ecxt_scantuple = slot;
|
|
|
|
ResetExprContext(econtext);
|
|
|
|
if (!ExecQual(node->indexqual, econtext, false))
|
|
|
|
{
|
|
|
|
/* Fails recheck, so drop it and loop back for another */
|
|
|
|
InstrCountFiltered2(node, 1);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Fix serializable mode with index-only scans.
Serializable Snapshot Isolation used for serializable transactions
depends on acquiring SIRead locks on all heap relation tuples which
are used to generate the query result, so that a later delete or
update of any of the tuples can flag a read-write conflict between
transactions. This is normally handled in heapam.c, with tuple level
locking. Since an index-only scan avoids heap access in many cases,
building the result from the index tuple, the necessary predicate
locks were not being acquired for all tuples in an index-only scan.
To prevent problems with tuple IDs which are vacuumed and re-used
while the transaction still matters, the xmin of the tuple is part of
the tag for the tuple lock. Since xmin is not available to the
index-only scan for result rows generated from the index tuples, it
is not possible to acquire a tuple-level predicate lock in such
cases, in spite of having the tid. If we went to the heap to get the
xmin value, it would no longer be an index-only scan. Rather than
prohibit index-only scans under serializable transaction isolation,
we acquire an SIRead lock on the page containing the tuple, when it
was not necessary to visit the heap for other reasons.
Backpatch to 9.2.
Kevin Grittner and Tom Lane
2012-09-05 04:13:11 +02:00
|
|
|
/*
|
|
|
|
* Predicate locks for index-only scans must be acquired at the page
|
|
|
|
* level when the heap is not accessed, since tuple-level predicate
|
|
|
|
* locks need the tuple's xmin value. If we had to visit the tuple
|
|
|
|
* anyway, then we already have the tuple-level lock and can skip the
|
|
|
|
* page lock.
|
|
|
|
*/
|
|
|
|
if (tuple == NULL)
|
|
|
|
PredicateLockPage(scandesc->heapRelation,
|
|
|
|
ItemPointerGetBlockNumber(tid),
|
|
|
|
estate->es_snapshot);
|
|
|
|
|
2011-10-11 20:20:06 +02:00
|
|
|
return slot;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if we get here it means the index scan failed so we are at the end of
|
|
|
|
* the scan..
|
|
|
|
*/
|
|
|
|
return ExecClearTuple(slot);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* StoreIndexTuple
|
|
|
|
* Fill the slot with data from the index tuple.
|
|
|
|
*
|
|
|
|
* At some point this might be generally-useful functionality, but
|
|
|
|
* right now we don't need it elsewhere.
|
|
|
|
*/
|
|
|
|
static void
|
2011-10-17 01:15:04 +02:00
|
|
|
StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
|
2011-10-11 20:20:06 +02:00
|
|
|
{
|
2011-10-17 01:15:04 +02:00
|
|
|
int nindexatts = itupdesc->natts;
|
2011-10-11 20:20:06 +02:00
|
|
|
Datum *values = slot->tts_values;
|
|
|
|
bool *isnull = slot->tts_isnull;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
2011-10-17 01:15:04 +02:00
|
|
|
* Note: we must use the tupdesc supplied by the AM in index_getattr, not
|
2011-10-12 00:11:51 +02:00
|
|
|
* the slot's tupdesc, in case the latter has different datatypes (this
|
|
|
|
* happens for btree name_ops in particular). They'd better have the same
|
2012-06-10 21:20:04 +02:00
|
|
|
* number of columns though, as well as being datatype-compatible which is
|
|
|
|
* something we can't so easily check.
|
2011-10-11 20:20:06 +02:00
|
|
|
*/
|
|
|
|
Assert(slot->tts_tupleDescriptor->natts == nindexatts);
|
|
|
|
|
|
|
|
ExecClearTuple(slot);
|
|
|
|
for (i = 0; i < nindexatts; i++)
|
2011-10-17 01:15:04 +02:00
|
|
|
values[i] = index_getattr(itup, i + 1, itupdesc, &isnull[i]);
|
2011-10-11 20:20:06 +02:00
|
|
|
ExecStoreVirtualTuple(slot);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
|
|
|
|
*
|
|
|
|
* This can't really happen, since an index can't supply CTID which would
|
|
|
|
* be necessary data for any potential EvalPlanQual target relation. If it
|
|
|
|
* did happen, the EPQ code would pass us the wrong data, namely a heap
|
|
|
|
* tuple not an index tuple. So throw an error.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
|
|
|
|
{
|
|
|
|
elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
|
|
|
|
return false; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecIndexOnlyScan(node)
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
TupleTableSlot *
|
|
|
|
ExecIndexOnlyScan(IndexOnlyScanState *node)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If we have runtime keys and they've not already been set up, do it now.
|
|
|
|
*/
|
|
|
|
if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
|
|
|
|
ExecReScan((PlanState *) node);
|
|
|
|
|
|
|
|
return ExecScan(&node->ss,
|
|
|
|
(ExecScanAccessMtd) IndexOnlyNext,
|
|
|
|
(ExecScanRecheckMtd) IndexOnlyRecheck);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecReScanIndexOnlyScan(node)
|
|
|
|
*
|
|
|
|
* Recalculates the values of any scan keys whose value depends on
|
|
|
|
* information known at runtime, then rescans the indexed relation.
|
|
|
|
*
|
|
|
|
* Updating the scan key was formerly done separately in
|
|
|
|
* ExecUpdateIndexScanKeys. Integrating it into ReScan makes
|
|
|
|
* rescans of indices and relations/general streams more uniform.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If we are doing runtime key calculations (ie, any of the index key
|
|
|
|
* values weren't simple Consts), compute the new key values. But first,
|
|
|
|
* reset the context so we don't leak memory as each outer tuple is
|
|
|
|
* scanned. Note this assumes that we will recalculate *all* runtime keys
|
|
|
|
* on each call.
|
|
|
|
*/
|
|
|
|
if (node->ioss_NumRuntimeKeys != 0)
|
|
|
|
{
|
|
|
|
ExprContext *econtext = node->ioss_RuntimeContext;
|
|
|
|
|
|
|
|
ResetExprContext(econtext);
|
|
|
|
ExecIndexEvalRuntimeKeys(econtext,
|
|
|
|
node->ioss_RuntimeKeys,
|
|
|
|
node->ioss_NumRuntimeKeys);
|
|
|
|
}
|
|
|
|
node->ioss_RuntimeKeysReady = true;
|
|
|
|
|
|
|
|
/* reset index scan */
|
|
|
|
index_rescan(node->ioss_ScanDesc,
|
|
|
|
node->ioss_ScanKeys, node->ioss_NumScanKeys,
|
|
|
|
node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
|
|
|
|
|
|
|
|
ExecScanReScan(&node->ss);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecEndIndexOnlyScan
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecEndIndexOnlyScan(IndexOnlyScanState *node)
|
|
|
|
{
|
|
|
|
Relation indexRelationDesc;
|
|
|
|
IndexScanDesc indexScanDesc;
|
|
|
|
Relation relation;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* extract information from the node
|
|
|
|
*/
|
|
|
|
indexRelationDesc = node->ioss_RelationDesc;
|
|
|
|
indexScanDesc = node->ioss_ScanDesc;
|
|
|
|
relation = node->ss.ss_currentRelation;
|
|
|
|
|
|
|
|
/* Release VM buffer pin, if any. */
|
|
|
|
if (node->ioss_VMBuffer != InvalidBuffer)
|
|
|
|
{
|
|
|
|
ReleaseBuffer(node->ioss_VMBuffer);
|
|
|
|
node->ioss_VMBuffer = InvalidBuffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
|
|
|
|
*/
|
|
|
|
#ifdef NOT_USED
|
|
|
|
ExecFreeExprContext(&node->ss.ps);
|
|
|
|
if (node->ioss_RuntimeContext)
|
|
|
|
FreeExprContext(node->ioss_RuntimeContext, true);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* clear out tuple table slots
|
|
|
|
*/
|
|
|
|
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
|
|
|
|
ExecClearTuple(node->ss.ss_ScanTupleSlot);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* close the index relation (no-op if we didn't open it)
|
|
|
|
*/
|
|
|
|
if (indexScanDesc)
|
|
|
|
index_endscan(indexScanDesc);
|
|
|
|
if (indexRelationDesc)
|
|
|
|
index_close(indexRelationDesc, NoLock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* close the heap relation.
|
|
|
|
*/
|
|
|
|
ExecCloseScanRelation(relation);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecIndexOnlyMarkPos
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
|
|
|
|
{
|
|
|
|
index_markpos(node->ioss_ScanDesc);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecIndexOnlyRestrPos
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
|
|
|
|
{
|
|
|
|
index_restrpos(node->ioss_ScanDesc);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecInitIndexOnlyScan
|
|
|
|
*
|
|
|
|
* Initializes the index scan's state information, creates
|
|
|
|
* scan keys, and opens the base and index relations.
|
|
|
|
*
|
|
|
|
* Note: index scans have 2 sets of state information because
|
|
|
|
* we have to keep track of the base relation and the
|
|
|
|
* index relation.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
IndexOnlyScanState *
|
|
|
|
ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
|
|
|
|
{
|
|
|
|
IndexOnlyScanState *indexstate;
|
|
|
|
Relation currentRelation;
|
|
|
|
bool relistarget;
|
|
|
|
TupleDesc tupDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* create state structure
|
|
|
|
*/
|
|
|
|
indexstate = makeNode(IndexOnlyScanState);
|
|
|
|
indexstate->ss.ps.plan = (Plan *) node;
|
|
|
|
indexstate->ss.ps.state = estate;
|
2012-01-26 02:40:34 +01:00
|
|
|
indexstate->ioss_HeapFetches = 0;
|
2011-10-11 20:20:06 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Miscellaneous initialization
|
|
|
|
*
|
|
|
|
* create expression context for node
|
|
|
|
*/
|
|
|
|
ExecAssignExprContext(estate, &indexstate->ss.ps);
|
|
|
|
|
|
|
|
indexstate->ss.ps.ps_TupFromTlist = false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* initialize child expressions
|
|
|
|
*
|
|
|
|
* Note: we don't initialize all of the indexorderby expression, only the
|
|
|
|
* sub-parts corresponding to runtime keys (see below).
|
|
|
|
*/
|
|
|
|
indexstate->ss.ps.targetlist = (List *)
|
|
|
|
ExecInitExpr((Expr *) node->scan.plan.targetlist,
|
|
|
|
(PlanState *) indexstate);
|
|
|
|
indexstate->ss.ps.qual = (List *)
|
|
|
|
ExecInitExpr((Expr *) node->scan.plan.qual,
|
|
|
|
(PlanState *) indexstate);
|
|
|
|
indexstate->indexqual = (List *)
|
|
|
|
ExecInitExpr((Expr *) node->indexqual,
|
|
|
|
(PlanState *) indexstate);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* tuple table initialization
|
|
|
|
*/
|
|
|
|
ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
|
|
|
|
ExecInitScanTupleSlot(estate, &indexstate->ss);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* open the base relation and acquire appropriate lock on it.
|
|
|
|
*/
|
2013-04-27 23:48:57 +02:00
|
|
|
currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
|
2011-10-11 20:20:06 +02:00
|
|
|
|
|
|
|
indexstate->ss.ss_currentRelation = currentRelation;
|
|
|
|
indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
|
|
|
|
|
|
|
|
/*
|
2011-10-12 00:11:51 +02:00
|
|
|
* Build the scan tuple type using the indextlist generated by the
|
|
|
|
* planner. We use this, rather than the index's physical tuple
|
|
|
|
* descriptor, because the latter contains storage column types not the
|
|
|
|
* types of the original datums. (It's the AM's responsibility to return
|
|
|
|
* suitable data anyway.)
|
|
|
|
*/
|
|
|
|
tupDesc = ExecTypeFromTL(node->indextlist, false);
|
|
|
|
ExecAssignScanType(&indexstate->ss, tupDesc);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize result tuple type and projection info.
|
2011-10-11 20:20:06 +02:00
|
|
|
*/
|
|
|
|
ExecAssignResultTypeFromTL(&indexstate->ss.ps);
|
2011-10-12 00:11:51 +02:00
|
|
|
ExecAssignScanProjectionInfo(&indexstate->ss);
|
2011-10-11 20:20:06 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
|
|
|
|
* here. This allows an index-advisor plugin to EXPLAIN a plan containing
|
|
|
|
* references to nonexistent indexes.
|
|
|
|
*/
|
|
|
|
if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
|
|
|
|
return indexstate;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Open the index relation.
|
|
|
|
*
|
|
|
|
* If the parent table is one of the target relations of the query, then
|
|
|
|
* InitPlan already opened and write-locked the index, so we can avoid
|
|
|
|
* taking another lock here. Otherwise we need a normal reader's lock.
|
|
|
|
*/
|
|
|
|
relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
|
|
|
|
indexstate->ioss_RelationDesc = index_open(node->indexid,
|
|
|
|
relistarget ? NoLock : AccessShareLock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize index-specific scan state
|
|
|
|
*/
|
|
|
|
indexstate->ioss_RuntimeKeysReady = false;
|
|
|
|
indexstate->ioss_RuntimeKeys = NULL;
|
|
|
|
indexstate->ioss_NumRuntimeKeys = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* build the index scan keys from the index qualification
|
|
|
|
*/
|
|
|
|
ExecIndexBuildScanKeys((PlanState *) indexstate,
|
|
|
|
indexstate->ioss_RelationDesc,
|
|
|
|
node->indexqual,
|
|
|
|
false,
|
|
|
|
&indexstate->ioss_ScanKeys,
|
|
|
|
&indexstate->ioss_NumScanKeys,
|
|
|
|
&indexstate->ioss_RuntimeKeys,
|
|
|
|
&indexstate->ioss_NumRuntimeKeys,
|
|
|
|
NULL, /* no ArrayKeys */
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* any ORDER BY exprs have to be turned into scankeys in the same way
|
|
|
|
*/
|
|
|
|
ExecIndexBuildScanKeys((PlanState *) indexstate,
|
|
|
|
indexstate->ioss_RelationDesc,
|
|
|
|
node->indexorderby,
|
|
|
|
true,
|
|
|
|
&indexstate->ioss_OrderByKeys,
|
|
|
|
&indexstate->ioss_NumOrderByKeys,
|
|
|
|
&indexstate->ioss_RuntimeKeys,
|
|
|
|
&indexstate->ioss_NumRuntimeKeys,
|
|
|
|
NULL, /* no ArrayKeys */
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have runtime keys, we need an ExprContext to evaluate them. The
|
|
|
|
* node's standard context won't do because we want to reset that context
|
|
|
|
* for every tuple. So, build another context just like the other one...
|
|
|
|
* -tgl 7/11/00
|
|
|
|
*/
|
|
|
|
if (indexstate->ioss_NumRuntimeKeys != 0)
|
|
|
|
{
|
|
|
|
ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
|
|
|
|
|
|
|
|
ExecAssignExprContext(estate, &indexstate->ss.ps);
|
|
|
|
indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
|
|
|
|
indexstate->ss.ps.ps_ExprContext = stdecontext;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
indexstate->ioss_RuntimeContext = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize scan descriptor.
|
|
|
|
*/
|
|
|
|
indexstate->ioss_ScanDesc = index_beginscan(currentRelation,
|
2012-06-10 21:20:04 +02:00
|
|
|
indexstate->ioss_RelationDesc,
|
|
|
|
estate->es_snapshot,
|
|
|
|
indexstate->ioss_NumScanKeys,
|
|
|
|
indexstate->ioss_NumOrderByKeys);
|
2011-10-11 20:20:06 +02:00
|
|
|
|
|
|
|
/* Set it up for index-only scan */
|
|
|
|
indexstate->ioss_ScanDesc->xs_want_itup = true;
|
|
|
|
indexstate->ioss_VMBuffer = InvalidBuffer;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If no run-time keys to calculate, go ahead and pass the scankeys to the
|
|
|
|
* index AM.
|
|
|
|
*/
|
|
|
|
if (indexstate->ioss_NumRuntimeKeys == 0)
|
|
|
|
index_rescan(indexstate->ioss_ScanDesc,
|
|
|
|
indexstate->ioss_ScanKeys,
|
|
|
|
indexstate->ioss_NumScanKeys,
|
|
|
|
indexstate->ioss_OrderByKeys,
|
|
|
|
indexstate->ioss_NumOrderByKeys);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* all done.
|
|
|
|
*/
|
|
|
|
return indexstate;
|
|
|
|
}
|