postgresql/src/backend/executor/nodeIndexonlyscan.c

702 lines
22 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* nodeIndexonlyscan.c
* Routines to support index-only scans
*
2017-01-03 19:48:53 +01:00
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/executor/nodeIndexonlyscan.c
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* ExecIndexOnlyScan scans an index
* IndexOnlyNext retrieve next tuple
* ExecInitIndexOnlyScan creates and initializes state info.
* ExecReScanIndexOnlyScan rescans the indexed relation.
* ExecEndIndexOnlyScan releases all storage.
* ExecIndexOnlyMarkPos marks scan position.
* ExecIndexOnlyRestrPos restores scan position.
* ExecIndexOnlyScanEstimate estimates DSM space needed for
* parallel index-only scan
* ExecIndexOnlyScanInitializeDSM initialize DSM for parallel
* index-only scan
* ExecIndexOnlyScanReInitializeDSM reinitialize DSM for fresh scan
* ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
*/
#include "postgres.h"
#include "access/relscan.h"
#include "access/visibilitymap.h"
#include "executor/execdebug.h"
#include "executor/nodeIndexonlyscan.h"
#include "executor/nodeIndexscan.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/predicate.h"
#include "utils/memutils.h"
#include "utils/rel.h"
static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
TupleDesc itupdesc);
/* ----------------------------------------------------------------
* IndexOnlyNext
*
* Retrieve a tuple from the IndexOnlyScan node's index.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
IndexOnlyNext(IndexOnlyScanState *node)
{
EState *estate;
ExprContext *econtext;
ScanDirection direction;
IndexScanDesc scandesc;
TupleTableSlot *slot;
ItemPointer tid;
/*
* extract necessary information from index scan node
*/
estate = node->ss.ps.state;
direction = estate->es_direction;
/* flip direction if this is an overall backward scan */
if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir))
{
if (ScanDirectionIsForward(direction))
direction = BackwardScanDirection;
else if (ScanDirectionIsBackward(direction))
direction = ForwardScanDirection;
}
scandesc = node->ioss_ScanDesc;
econtext = node->ss.ps.ps_ExprContext;
slot = node->ss.ss_ScanTupleSlot;
if (scandesc == NULL)
{
/*
* We reach here if the index only scan is not parallel, or if we're
* executing a index only scan that was intended to be parallel
* serially.
*/
scandesc = index_beginscan(node->ss.ss_currentRelation,
node->ioss_RelationDesc,
estate->es_snapshot,
node->ioss_NumScanKeys,
node->ioss_NumOrderByKeys);
node->ioss_ScanDesc = scandesc;
/* Set it up for index-only scan */
node->ioss_ScanDesc->xs_want_itup = true;
node->ioss_VMBuffer = InvalidBuffer;
/*
* If no run-time keys to calculate or they are ready, go ahead and
* pass the scankeys to the index AM.
*/
if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
index_rescan(scandesc,
node->ioss_ScanKeys,
node->ioss_NumScanKeys,
node->ioss_OrderByKeys,
node->ioss_NumOrderByKeys);
}
/*
* OK, now that we have what we need, fetch the next tuple.
*/
while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
{
HeapTuple tuple = NULL;
CHECK_FOR_INTERRUPTS();
/*
* We can skip the heap fetch if the TID references a heap page on
* which all tuples are known visible to everybody. In any case,
* we'll use the index tuple not the heap tuple as the data source.
*
* Note on Memory Ordering Effects: visibilitymap_get_status does not
* lock the visibility map buffer, and therefore the result we read
* here could be slightly stale. However, it can't be stale enough to
* matter.
*
* We need to detect clearing a VM bit due to an insert right away,
* because the tuple is present in the index page but not visible. The
* reading of the TID by this scan (using a shared lock on the index
* buffer) is serialized with the insert of the TID into the index
* (using an exclusive lock on the index buffer). Because the VM bit
* is cleared before updating the index, and locking/unlocking of the
* index page acts as a full memory barrier, we are sure to see the
* cleared bit if we see a recently-inserted TID.
*
* Deletes do not update the index page (only VACUUM will clear out
* the TID), so the clearing of the VM bit by a delete is not
* serialized with this test below, and we may see a value that is
* significantly stale. However, we don't care about the delete right
* away, because the tuple is still visible until the deleting
* transaction commits or the statement ends (if it's our
* transaction). In either case, the lock on the VM buffer will have
2015-05-24 03:35:49 +02:00
* been released (acting as a write barrier) after clearing the bit.
* And for us to have a snapshot that includes the deleting
* transaction (making the tuple invisible), we must have acquired
* ProcArrayLock after that time, acting as a read barrier.
*
* It's worth going through this complexity to avoid needing to lock
* the VM buffer, which could cause significant contention.
*/
if (!VM_ALL_VISIBLE(scandesc->heapRelation,
ItemPointerGetBlockNumber(tid),
&node->ioss_VMBuffer))
{
/*
* Rats, we have to visit the heap to check visibility.
*/
node->ioss_HeapFetches++;
tuple = index_fetch_heap(scandesc);
if (tuple == NULL)
continue; /* no visible tuple, try next index entry */
/*
* Only MVCC snapshots are supported here, so there should be no
* need to keep following the HOT chain once a visible entry has
* been found. If we did want to allow that, we'd need to keep
* more state to remember not to call index_getnext_tid next time.
*/
if (scandesc->xs_continue_hot)
elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
/*
* Note: at this point we are holding a pin on the heap page, as
* recorded in scandesc->xs_cbuf. We could release that pin now,
* but it's not clear whether it's a win to do so. The next index
* entry might require a visit to the same heap page.
*/
}
/*
* Fill the scan tuple slot with data from the index. This might be
* provided in either HeapTuple or IndexTuple format. Conceivably an
* index AM might fill both fields, in which case we prefer the heap
* format, since it's probably a bit cheaper to fill a slot from.
*/
if (scandesc->xs_hitup)
{
/*
* We don't take the trouble to verify that the provided tuple has
* exactly the slot's format, but it seems worth doing a quick
* check on the number of fields.
*/
Assert(slot->tts_tupleDescriptor->natts ==
scandesc->xs_hitupdesc->natts);
ExecStoreTuple(scandesc->xs_hitup, slot, InvalidBuffer, false);
}
else if (scandesc->xs_itup)
StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
else
elog(ERROR, "no data returned for index-only scan");
/*
* If the index was lossy, we have to recheck the index quals.
* (Currently, this can never happen, but we should support the case
* for possible future use, eg with GiST indexes.)
*/
if (scandesc->xs_recheck)
{
econtext->ecxt_scantuple = slot;
ResetExprContext(econtext);
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
if (!ExecQual(node->indexqual, econtext))
{
/* Fails recheck, so drop it and loop back for another */
InstrCountFiltered2(node, 1);
continue;
}
}
/*
* We don't currently support rechecking ORDER BY distances. (In
* principle, if the index can support retrieval of the originally
* indexed value, it should be able to produce an exact distance
* calculation too. So it's not clear that adding code here for
* recheck/re-sort would be worth the trouble. But we should at least
* throw an error if someone tries it.)
*/
if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("lossy distance functions are not supported in index-only scans")));
/*
* Predicate locks for index-only scans must be acquired at the page
* level when the heap is not accessed, since tuple-level predicate
* locks need the tuple's xmin value. If we had to visit the tuple
* anyway, then we already have the tuple-level lock and can skip the
* page lock.
*/
if (tuple == NULL)
PredicateLockPage(scandesc->heapRelation,
ItemPointerGetBlockNumber(tid),
estate->es_snapshot);
return slot;
}
/*
* if we get here it means the index scan failed so we are at the end of
* the scan..
*/
return ExecClearTuple(slot);
}
/*
* StoreIndexTuple
* Fill the slot with data from the index tuple.
*
* At some point this might be generally-useful functionality, but
* right now we don't need it elsewhere.
*/
static void
StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
{
int nindexatts = itupdesc->natts;
Datum *values = slot->tts_values;
bool *isnull = slot->tts_isnull;
int i;
/*
* Note: we must use the tupdesc supplied by the AM in index_getattr, not
* the slot's tupdesc, in case the latter has different datatypes (this
* happens for btree name_ops in particular). They'd better have the same
* number of columns though, as well as being datatype-compatible which is
* something we can't so easily check.
*/
Assert(slot->tts_tupleDescriptor->natts == nindexatts);
ExecClearTuple(slot);
for (i = 0; i < nindexatts; i++)
values[i] = index_getattr(itup, i + 1, itupdesc, &isnull[i]);
ExecStoreVirtualTuple(slot);
}
/*
* IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
*
* This can't really happen, since an index can't supply CTID which would
* be necessary data for any potential EvalPlanQual target relation. If it
* did happen, the EPQ code would pass us the wrong data, namely a heap
* tuple not an index tuple. So throw an error.
*/
static bool
IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
{
elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
return false; /* keep compiler quiet */
}
/* ----------------------------------------------------------------
* ExecIndexOnlyScan(node)
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecIndexOnlyScan(PlanState *pstate)
{
IndexOnlyScanState *node = castNode(IndexOnlyScanState, pstate);
/*
* If we have runtime keys and they've not already been set up, do it now.
*/
if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
ExecReScan((PlanState *) node);
return ExecScan(&node->ss,
(ExecScanAccessMtd) IndexOnlyNext,
(ExecScanRecheckMtd) IndexOnlyRecheck);
}
/* ----------------------------------------------------------------
* ExecReScanIndexOnlyScan(node)
*
* Recalculates the values of any scan keys whose value depends on
* information known at runtime, then rescans the indexed relation.
*
* Updating the scan key was formerly done separately in
* ExecUpdateIndexScanKeys. Integrating it into ReScan makes
* rescans of indices and relations/general streams more uniform.
* ----------------------------------------------------------------
*/
void
ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
{
/*
* If we are doing runtime key calculations (ie, any of the index key
* values weren't simple Consts), compute the new key values. But first,
* reset the context so we don't leak memory as each outer tuple is
* scanned. Note this assumes that we will recalculate *all* runtime keys
* on each call.
*/
if (node->ioss_NumRuntimeKeys != 0)
{
ExprContext *econtext = node->ioss_RuntimeContext;
ResetExprContext(econtext);
ExecIndexEvalRuntimeKeys(econtext,
node->ioss_RuntimeKeys,
node->ioss_NumRuntimeKeys);
}
node->ioss_RuntimeKeysReady = true;
/* reset index scan */
if (node->ioss_ScanDesc)
index_rescan(node->ioss_ScanDesc,
node->ioss_ScanKeys, node->ioss_NumScanKeys,
node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
ExecScanReScan(&node->ss);
}
/* ----------------------------------------------------------------
* ExecEndIndexOnlyScan
* ----------------------------------------------------------------
*/
void
ExecEndIndexOnlyScan(IndexOnlyScanState *node)
{
Relation indexRelationDesc;
IndexScanDesc indexScanDesc;
Relation relation;
/*
* extract information from the node
*/
indexRelationDesc = node->ioss_RelationDesc;
indexScanDesc = node->ioss_ScanDesc;
relation = node->ss.ss_currentRelation;
/* Release VM buffer pin, if any. */
if (node->ioss_VMBuffer != InvalidBuffer)
{
ReleaseBuffer(node->ioss_VMBuffer);
node->ioss_VMBuffer = InvalidBuffer;
}
/*
* Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
*/
#ifdef NOT_USED
ExecFreeExprContext(&node->ss.ps);
if (node->ioss_RuntimeContext)
FreeExprContext(node->ioss_RuntimeContext, true);
#endif
/*
* clear out tuple table slots
*/
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
ExecClearTuple(node->ss.ss_ScanTupleSlot);
/*
* close the index relation (no-op if we didn't open it)
*/
if (indexScanDesc)
index_endscan(indexScanDesc);
if (indexRelationDesc)
index_close(indexRelationDesc, NoLock);
/*
* close the heap relation.
*/
ExecCloseScanRelation(relation);
}
/* ----------------------------------------------------------------
* ExecIndexOnlyMarkPos
* ----------------------------------------------------------------
*/
void
ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
{
index_markpos(node->ioss_ScanDesc);
}
/* ----------------------------------------------------------------
* ExecIndexOnlyRestrPos
* ----------------------------------------------------------------
*/
void
ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
{
index_restrpos(node->ioss_ScanDesc);
}
/* ----------------------------------------------------------------
* ExecInitIndexOnlyScan
*
* Initializes the index scan's state information, creates
* scan keys, and opens the base and index relations.
*
* Note: index scans have 2 sets of state information because
* we have to keep track of the base relation and the
* index relation.
* ----------------------------------------------------------------
*/
IndexOnlyScanState *
ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
{
IndexOnlyScanState *indexstate;
Relation currentRelation;
bool relistarget;
TupleDesc tupDesc;
/*
* create state structure
*/
indexstate = makeNode(IndexOnlyScanState);
indexstate->ss.ps.plan = (Plan *) node;
indexstate->ss.ps.state = estate;
indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
indexstate->ioss_HeapFetches = 0;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &indexstate->ss.ps);
/*
* initialize child expressions
*
* Note: we don't initialize all of the indexorderby expression, only the
* sub-parts corresponding to runtime keys (see below).
*/
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
indexstate->ss.ps.qual =
ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
indexstate->indexqual =
ExecInitQual(node->indexqual, (PlanState *) indexstate);
/*
* tuple table initialization
*/
ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
ExecInitScanTupleSlot(estate, &indexstate->ss);
/*
* open the base relation and acquire appropriate lock on it.
*/
currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
indexstate->ss.ss_currentRelation = currentRelation;
indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
/*
* Build the scan tuple type using the indextlist generated by the
* planner. We use this, rather than the index's physical tuple
* descriptor, because the latter contains storage column types not the
* types of the original datums. (It's the AM's responsibility to return
* suitable data anyway.)
*/
tupDesc = ExecTypeFromTL(node->indextlist, false);
ExecAssignScanType(&indexstate->ss, tupDesc);
/*
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
* Initialize result tuple type and projection info. The node's
* targetlist will contain Vars with varno = INDEX_VAR, referencing the
* scan tuple.
*/
ExecAssignResultTypeFromTL(&indexstate->ss.ps);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR);
/*
* If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
* here. This allows an index-advisor plugin to EXPLAIN a plan containing
* references to nonexistent indexes.
*/
if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
return indexstate;
/*
* Open the index relation.
*
* If the parent table is one of the target relations of the query, then
* InitPlan already opened and write-locked the index, so we can avoid
* taking another lock here. Otherwise we need a normal reader's lock.
*/
relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
indexstate->ioss_RelationDesc = index_open(node->indexid,
relistarget ? NoLock : AccessShareLock);
/*
* Initialize index-specific scan state
*/
indexstate->ioss_RuntimeKeysReady = false;
indexstate->ioss_RuntimeKeys = NULL;
indexstate->ioss_NumRuntimeKeys = 0;
/*
* build the index scan keys from the index qualification
*/
ExecIndexBuildScanKeys((PlanState *) indexstate,
indexstate->ioss_RelationDesc,
node->indexqual,
false,
&indexstate->ioss_ScanKeys,
&indexstate->ioss_NumScanKeys,
&indexstate->ioss_RuntimeKeys,
&indexstate->ioss_NumRuntimeKeys,
NULL, /* no ArrayKeys */
NULL);
/*
* any ORDER BY exprs have to be turned into scankeys in the same way
*/
ExecIndexBuildScanKeys((PlanState *) indexstate,
indexstate->ioss_RelationDesc,
node->indexorderby,
true,
&indexstate->ioss_OrderByKeys,
&indexstate->ioss_NumOrderByKeys,
&indexstate->ioss_RuntimeKeys,
&indexstate->ioss_NumRuntimeKeys,
NULL, /* no ArrayKeys */
NULL);
/*
* If we have runtime keys, we need an ExprContext to evaluate them. The
* node's standard context won't do because we want to reset that context
* for every tuple. So, build another context just like the other one...
* -tgl 7/11/00
*/
if (indexstate->ioss_NumRuntimeKeys != 0)
{
ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
ExecAssignExprContext(estate, &indexstate->ss.ps);
indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
indexstate->ss.ps.ps_ExprContext = stdecontext;
}
else
{
indexstate->ioss_RuntimeContext = NULL;
}
/*
* all done.
*/
return indexstate;
}
/* ----------------------------------------------------------------
* Parallel Index-only Scan Support
* ----------------------------------------------------------------
*/
/* ----------------------------------------------------------------
* ExecIndexOnlyScanEstimate
*
* estimates the space required to serialize index-only scan node.
* ----------------------------------------------------------------
*/
void
ExecIndexOnlyScanEstimate(IndexOnlyScanState *node,
ParallelContext *pcxt)
{
EState *estate = node->ss.ps.state;
node->ioss_PscanLen = index_parallelscan_estimate(node->ioss_RelationDesc,
estate->es_snapshot);
shm_toc_estimate_chunk(&pcxt->estimator, node->ioss_PscanLen);
shm_toc_estimate_keys(&pcxt->estimator, 1);
}
/* ----------------------------------------------------------------
* ExecIndexOnlyScanInitializeDSM
*
* Set up a parallel index-only scan descriptor.
* ----------------------------------------------------------------
*/
void
ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
ParallelContext *pcxt)
{
EState *estate = node->ss.ps.state;
ParallelIndexScanDesc piscan;
piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen);
index_parallelscan_initialize(node->ss.ss_currentRelation,
node->ioss_RelationDesc,
estate->es_snapshot,
piscan);
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
node->ioss_ScanDesc =
index_beginscan_parallel(node->ss.ss_currentRelation,
node->ioss_RelationDesc,
node->ioss_NumScanKeys,
node->ioss_NumOrderByKeys,
piscan);
node->ioss_ScanDesc->xs_want_itup = true;
node->ioss_VMBuffer = InvalidBuffer;
/*
* If no run-time keys to calculate or they are ready, go ahead and pass
* the scankeys to the index AM.
*/
if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
index_rescan(node->ioss_ScanDesc,
node->ioss_ScanKeys, node->ioss_NumScanKeys,
node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
}
/* ----------------------------------------------------------------
* ExecIndexOnlyScanReInitializeDSM
*
* Reset shared state before beginning a fresh scan.
* ----------------------------------------------------------------
*/
void
ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node,
ParallelContext *pcxt)
{
index_parallelrescan(node->ioss_ScanDesc);
}
/* ----------------------------------------------------------------
* ExecIndexOnlyScanInitializeWorker
*
* Copy relevant information from TOC into planstate.
* ----------------------------------------------------------------
*/
void
ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, shm_toc *toc)
{
ParallelIndexScanDesc piscan;
piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
node->ioss_ScanDesc =
index_beginscan_parallel(node->ss.ss_currentRelation,
node->ioss_RelationDesc,
node->ioss_NumScanKeys,
node->ioss_NumOrderByKeys,
piscan);
node->ioss_ScanDesc->xs_want_itup = true;
/*
* If no run-time keys to calculate or they are ready, go ahead and pass
* the scankeys to the index AM.
*/
if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
index_rescan(node->ioss_ScanDesc,
node->ioss_ScanKeys, node->ioss_NumScanKeys,
node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
}