postgresql/src/backend/executor/execUtils.c

1506 lines
42 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* execUtils.c
* miscellaneous executor utility routines
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/executor/execUtils.c
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* CreateExecutorState Create/delete executor working state
* FreeExecutorState
* CreateExprContext
* CreateStandaloneExprContext
* FreeExprContext
* ReScanExprContext
*
* ExecAssignExprContext Common code for plan node init routines.
* ExecAssignResultType
* etc
*
* ExecOpenScanRelation Common code for scan node init routines.
* ExecCloseScanRelation
*
* ExecOpenIndices \
* ExecCloseIndices | referenced by InitPlan, EndPlan,
* ExecInsertIndexTuples / ExecInsert, ExecUpdate
*
* RegisterExprContextCallback Register function shutdown callback
* UnregisterExprContextCallback Deregister function shutdown callback
*
* NOTES
* This file has traditionally been the place to stick misc.
* executor support stuff that doesn't really go anyplace else.
*/
#include "postgres.h"
#include "access/relscan.h"
#include "access/transam.h"
#include "catalog/index.h"
#include "executor/execdebug.h"
#include "nodes/nodeFuncs.h"
#include "parser/parsetree.h"
#include "storage/lmgr.h"
#include "utils/memutils.h"
#include "utils/tqual.h"
static bool get_last_attnums(Node *node, ProjectionInfo *projInfo);
static bool index_recheck_constraint(Relation index, Oid *constr_procs,
Datum *existing_values, bool *existing_isnull,
Datum *new_values);
static void ShutdownExprContext(ExprContext *econtext, bool isCommit);
/* ----------------------------------------------------------------
* Executor state and memory management functions
* ----------------------------------------------------------------
*/
/* ----------------
* CreateExecutorState
*
* Create and initialize an EState node, which is the root of
* working storage for an entire Executor invocation.
*
* Principally, this creates the per-query memory context that will be
* used to hold all working data that lives till the end of the query.
* Note that the per-query context will become a child of the caller's
* CurrentMemoryContext.
* ----------------
*/
EState *
CreateExecutorState(void)
{
EState *estate;
MemoryContext qcontext;
MemoryContext oldcontext;
/*
* Create the per-query context for this Executor run.
*/
qcontext = AllocSetContextCreate(CurrentMemoryContext,
"ExecutorState",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
2001-03-22 05:01:46 +01:00
/*
2005-10-15 04:49:52 +02:00
* Make the EState node within the per-query context. This way, we don't
* need a separate pfree() operation for it at shutdown.
*/
oldcontext = MemoryContextSwitchTo(qcontext);
estate = makeNode(EState);
/*
* Initialize all fields of the Executor State structure
*/
estate->es_direction = ForwardScanDirection;
estate->es_snapshot = SnapshotNow;
estate->es_crosscheck_snapshot = InvalidSnapshot; /* no crosscheck */
estate->es_range_table = NIL;
estate->es_plannedstmt = NULL;
estate->es_junkFilter = NULL;
estate->es_output_cid = (CommandId) 0;
estate->es_result_relations = NULL;
estate->es_num_result_relations = 0;
estate->es_result_relation_info = NULL;
estate->es_trig_target_relations = NIL;
estate->es_trig_tuple_slot = NULL;
estate->es_trig_oldtup_slot = NULL;
estate->es_trig_newtup_slot = NULL;
estate->es_param_list_info = NULL;
estate->es_param_exec_vals = NULL;
estate->es_query_cxt = qcontext;
estate->es_tupleTable = NIL;
estate->es_rowMarks = NIL;
estate->es_processed = 0;
estate->es_lastoid = InvalidOid;
estate->es_top_eflags = 0;
estate->es_instrument = 0;
estate->es_finished = false;
estate->es_exprcontexts = NIL;
estate->es_subplanstates = NIL;
estate->es_auxmodifytables = NIL;
estate->es_per_tuple_exprcontext = NULL;
estate->es_epqTuple = NULL;
estate->es_epqTupleSet = NULL;
estate->es_epqScanDone = NULL;
/*
* Return the executor state structure
*/
MemoryContextSwitchTo(oldcontext);
return estate;
}
/* ----------------
* FreeExecutorState
*
* Release an EState along with all remaining working storage.
*
* Note: this is not responsible for releasing non-memory resources,
* such as open relations or buffer pins. But it will shut down any
* still-active ExprContexts within the EState. That is sufficient
* cleanup for situations where the EState has only been used for expression
* evaluation, and not to run a complete Plan.
*
* This can be called in any memory context ... so long as it's not one
* of the ones to be freed.
* ----------------
*/
void
FreeExecutorState(EState *estate)
{
/*
2005-10-15 04:49:52 +02:00
* Shut down and free any remaining ExprContexts. We do this explicitly
* to ensure that any remaining shutdown callbacks get called (since they
* might need to release resources that aren't simply memory within the
* per-query memory context).
*/
while (estate->es_exprcontexts)
{
2004-08-29 07:07:03 +02:00
/*
2005-10-15 04:49:52 +02:00
* XXX: seems there ought to be a faster way to implement this than
* repeated list_delete(), no?
*/
FreeExprContext((ExprContext *) linitial(estate->es_exprcontexts),
true);
/* FreeExprContext removed the list link for us */
}
2003-08-04 02:43:34 +02:00
/*
* Free the per-query memory context, thereby releasing all working
* memory, including the EState node itself.
*/
MemoryContextDelete(estate->es_query_cxt);
}
/* ----------------
* CreateExprContext
*
* Create a context for expression evaluation within an EState.
*
* An executor run may require multiple ExprContexts (we usually make one
* for each Plan node, and a separate one for per-output-tuple processing
* such as constraint checking). Each ExprContext has its own "per-tuple"
* memory context.
*
* Note we make no assumption about the caller's memory context.
* ----------------
*/
ExprContext *
CreateExprContext(EState *estate)
{
ExprContext *econtext;
MemoryContext oldcontext;
/* Create the ExprContext node within the per-query memory context */
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
econtext = makeNode(ExprContext);
/* Initialize fields of ExprContext */
econtext->ecxt_scantuple = NULL;
econtext->ecxt_innertuple = NULL;
econtext->ecxt_outertuple = NULL;
econtext->ecxt_per_query_memory = estate->es_query_cxt;
2001-03-22 05:01:46 +01:00
/*
* Create working memory for expression evaluation in this context.
*/
econtext->ecxt_per_tuple_memory =
AllocSetContextCreate(estate->es_query_cxt,
"ExprContext",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
econtext->ecxt_param_exec_vals = estate->es_param_exec_vals;
econtext->ecxt_param_list_info = estate->es_param_list_info;
econtext->ecxt_aggvalues = NULL;
econtext->ecxt_aggnulls = NULL;
econtext->caseValue_datum = (Datum) 0;
econtext->caseValue_isNull = true;
econtext->domainValue_datum = (Datum) 0;
econtext->domainValue_isNull = true;
econtext->ecxt_estate = estate;
econtext->ecxt_callbacks = NULL;
/*
2005-10-15 04:49:52 +02:00
* Link the ExprContext into the EState to ensure it is shut down when the
* EState is freed. Because we use lcons(), shutdowns will occur in
* reverse order of creation, which may not be essential but can't hurt.
*/
estate->es_exprcontexts = lcons(econtext, estate->es_exprcontexts);
MemoryContextSwitchTo(oldcontext);
return econtext;
}
/* ----------------
* CreateStandaloneExprContext
*
* Create a context for standalone expression evaluation.
*
* An ExprContext made this way can be used for evaluation of expressions
* that contain no Params, subplans, or Var references (it might work to
* put tuple references into the scantuple field, but it seems unwise).
*
* The ExprContext struct is allocated in the caller's current memory
* context, which also becomes its "per query" context.
*
* It is caller's responsibility to free the ExprContext when done,
* or at least ensure that any shutdown callbacks have been called
* (ReScanExprContext() is suitable). Otherwise, non-memory resources
* might be leaked.
* ----------------
*/
ExprContext *
CreateStandaloneExprContext(void)
{
ExprContext *econtext;
/* Create the ExprContext node within the caller's memory context */
econtext = makeNode(ExprContext);
/* Initialize fields of ExprContext */
econtext->ecxt_scantuple = NULL;
econtext->ecxt_innertuple = NULL;
econtext->ecxt_outertuple = NULL;
econtext->ecxt_per_query_memory = CurrentMemoryContext;
/*
* Create working memory for expression evaluation in this context.
*/
econtext->ecxt_per_tuple_memory =
AllocSetContextCreate(CurrentMemoryContext,
"ExprContext",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
econtext->ecxt_param_exec_vals = NULL;
econtext->ecxt_param_list_info = NULL;
econtext->ecxt_aggvalues = NULL;
econtext->ecxt_aggnulls = NULL;
econtext->caseValue_datum = (Datum) 0;
econtext->caseValue_isNull = true;
econtext->domainValue_datum = (Datum) 0;
econtext->domainValue_isNull = true;
econtext->ecxt_estate = NULL;
econtext->ecxt_callbacks = NULL;
return econtext;
}
/* ----------------
* FreeExprContext
*
* Free an expression context, including calling any remaining
* shutdown callbacks.
*
* Since we free the temporary context used for expression evaluation,
* any previously computed pass-by-reference expression result will go away!
*
* If isCommit is false, we are being called in error cleanup, and should
2010-02-26 03:01:40 +01:00
* not call callbacks but only release memory. (It might be better to call
* the callbacks and pass the isCommit flag to them, but that would require
* more invasive code changes than currently seems justified.)
*
* Note we make no assumption about the caller's memory context.
* ----------------
*/
void
FreeExprContext(ExprContext *econtext, bool isCommit)
{
EState *estate;
/* Call any registered callbacks */
ShutdownExprContext(econtext, isCommit);
/* And clean up the memory used */
MemoryContextDelete(econtext->ecxt_per_tuple_memory);
/* Unlink self from owning EState, if any */
estate = econtext->ecxt_estate;
if (estate)
estate->es_exprcontexts = list_delete_ptr(estate->es_exprcontexts,
econtext);
/* And delete the ExprContext node */
pfree(econtext);
}
/*
* ReScanExprContext
*
* Reset an expression context in preparation for a rescan of its
2004-08-29 07:07:03 +02:00
* plan node. This requires calling any registered shutdown callbacks,
* since any partially complete set-returning-functions must be canceled.
*
* Note we make no assumption about the caller's memory context.
*/
void
ReScanExprContext(ExprContext *econtext)
{
/* Call any registered callbacks */
ShutdownExprContext(econtext, true);
/* And clean up the memory used */
MemoryContextReset(econtext->ecxt_per_tuple_memory);
}
/*
* Build a per-output-tuple ExprContext for an EState.
*
* This is normally invoked via GetPerTupleExprContext() macro,
* not directly.
*/
ExprContext *
MakePerTupleExprContext(EState *estate)
{
if (estate->es_per_tuple_exprcontext == NULL)
estate->es_per_tuple_exprcontext = CreateExprContext(estate);
return estate->es_per_tuple_exprcontext;
}
/* ----------------------------------------------------------------
* miscellaneous node-init support functions
*
* Note: all of these are expected to be called with CurrentMemoryContext
* equal to the per-query memory context.
* ----------------------------------------------------------------
*/
/* ----------------
* ExecAssignExprContext
*
2003-08-04 02:43:34 +02:00
* This initializes the ps_ExprContext field. It is only necessary
* to do this for nodes which use ExecQual or ExecProject
2003-08-04 02:43:34 +02:00
* because those routines require an econtext. Other nodes that
* don't have to evaluate expressions don't need to do this.
* ----------------
*/
void
ExecAssignExprContext(EState *estate, PlanState *planstate)
{
planstate->ps_ExprContext = CreateExprContext(estate);
}
/* ----------------
* ExecAssignResultType
* ----------------
*/
void
ExecAssignResultType(PlanState *planstate, TupleDesc tupDesc)
{
TupleTableSlot *slot = planstate->ps_ResultTupleSlot;
ExecSetSlotDescriptor(slot, tupDesc);
}
/* ----------------
* ExecAssignResultTypeFromTL
* ----------------
*/
void
ExecAssignResultTypeFromTL(PlanState *planstate)
{
bool hasoid;
TupleDesc tupDesc;
if (ExecContextForcesOids(planstate, &hasoid))
{
/* context forces OID choice; hasoid is now set correctly */
}
else
{
/* given free choice, don't leave space for OIDs in result tuples */
hasoid = false;
}
2002-09-04 22:31:48 +02:00
/*
2005-10-15 04:49:52 +02:00
* ExecTypeFromTL needs the parse-time representation of the tlist, not a
* list of ExprStates. This is good because some plan nodes don't bother
* to set up planstate->targetlist ...
*/
tupDesc = ExecTypeFromTL(planstate->plan->targetlist, hasoid);
ExecAssignResultType(planstate, tupDesc);
}
/* ----------------
* ExecGetResultType
* ----------------
*/
TupleDesc
ExecGetResultType(PlanState *planstate)
{
TupleTableSlot *slot = planstate->ps_ResultTupleSlot;
return slot->tts_tupleDescriptor;
}
/* ----------------
* ExecBuildProjectionInfo
*
* Build a ProjectionInfo node for evaluating the given tlist in the given
* econtext, and storing the result into the tuple slot. (Caller must have
* ensured that tuple slot has a descriptor matching the tlist!) Note that
* the given tlist should be a list of ExprState nodes, not Expr nodes.
*
* inputDesc can be NULL, but if it is not, we check to see whether simple
2007-11-15 22:14:46 +01:00
* Vars in the tlist match the descriptor. It is important to provide
* inputDesc for relation-scan plan nodes, as a cross check that the relation
* hasn't been changed since the plan was made. At higher levels of a plan,
* there is no need to recheck.
* ----------------
*/
ProjectionInfo *
ExecBuildProjectionInfo(List *targetList,
ExprContext *econtext,
TupleTableSlot *slot,
TupleDesc inputDesc)
{
ProjectionInfo *projInfo = makeNode(ProjectionInfo);
int len = ExecTargetListLength(targetList);
int *workspace;
int *varSlotOffsets;
int *varNumbers;
int *varOutputCols;
List *exprlist;
int numSimpleVars;
bool directMap;
ListCell *tl;
projInfo->pi_exprContext = econtext;
projInfo->pi_slot = slot;
/* since these are all int arrays, we need do just one palloc */
workspace = (int *) palloc(len * 3 * sizeof(int));
projInfo->pi_varSlotOffsets = varSlotOffsets = workspace;
projInfo->pi_varNumbers = varNumbers = workspace + len;
projInfo->pi_varOutputCols = varOutputCols = workspace + len * 2;
projInfo->pi_lastInnerVar = 0;
projInfo->pi_lastOuterVar = 0;
projInfo->pi_lastScanVar = 0;
/*
* We separate the target list elements into simple Var references and
* expressions which require the full ExecTargetList machinery. To be a
* simple Var, a Var has to be a user attribute and not mismatch the
* inputDesc. (Note: if there is a type mismatch then ExecEvalScalarVar
* will probably throw an error at runtime, but we leave that to it.)
*/
exprlist = NIL;
numSimpleVars = 0;
directMap = true;
foreach(tl, targetList)
{
GenericExprState *gstate = (GenericExprState *) lfirst(tl);
Var *variable = (Var *) gstate->arg->expr;
bool isSimpleVar = false;
if (variable != NULL &&
IsA(variable, Var) &&
variable->varattno > 0)
{
if (!inputDesc)
isSimpleVar = true; /* can't check type, assume OK */
else if (variable->varattno <= inputDesc->natts)
{
Form_pg_attribute attr;
attr = inputDesc->attrs[variable->varattno - 1];
if (!attr->attisdropped && variable->vartype == attr->atttypid)
isSimpleVar = true;
}
}
if (isSimpleVar)
{
TargetEntry *tle = (TargetEntry *) gstate->xprstate.expr;
AttrNumber attnum = variable->varattno;
varNumbers[numSimpleVars] = attnum;
varOutputCols[numSimpleVars] = tle->resno;
if (tle->resno != numSimpleVars + 1)
directMap = false;
switch (variable->varno)
{
case INNER_VAR:
varSlotOffsets[numSimpleVars] = offsetof(ExprContext,
ecxt_innertuple);
if (projInfo->pi_lastInnerVar < attnum)
projInfo->pi_lastInnerVar = attnum;
break;
case OUTER_VAR:
varSlotOffsets[numSimpleVars] = offsetof(ExprContext,
ecxt_outertuple);
if (projInfo->pi_lastOuterVar < attnum)
projInfo->pi_lastOuterVar = attnum;
break;
/* INDEX_VAR is handled by default case */
default:
varSlotOffsets[numSimpleVars] = offsetof(ExprContext,
ecxt_scantuple);
if (projInfo->pi_lastScanVar < attnum)
projInfo->pi_lastScanVar = attnum;
break;
}
numSimpleVars++;
}
else
{
/* Not a simple variable, add it to generic targetlist */
exprlist = lappend(exprlist, gstate);
/* Examine expr to include contained Vars in lastXXXVar counts */
get_last_attnums((Node *) variable, projInfo);
}
}
projInfo->pi_targetlist = exprlist;
projInfo->pi_numSimpleVars = numSimpleVars;
projInfo->pi_directMap = directMap;
if (exprlist == NIL)
projInfo->pi_itemIsDone = NULL; /* not needed */
else
projInfo->pi_itemIsDone = (ExprDoneCond *)
palloc(len * sizeof(ExprDoneCond));
return projInfo;
}
/*
* get_last_attnums: expression walker for ExecBuildProjectionInfo
*
* Update the lastXXXVar counts to be at least as large as the largest
* attribute numbers found in the expression
*/
static bool
get_last_attnums(Node *node, ProjectionInfo *projInfo)
{
if (node == NULL)
return false;
if (IsA(node, Var))
{
Var *variable = (Var *) node;
AttrNumber attnum = variable->varattno;
switch (variable->varno)
{
case INNER_VAR:
if (projInfo->pi_lastInnerVar < attnum)
projInfo->pi_lastInnerVar = attnum;
break;
case OUTER_VAR:
if (projInfo->pi_lastOuterVar < attnum)
projInfo->pi_lastOuterVar = attnum;
break;
/* INDEX_VAR is handled by default case */
default:
if (projInfo->pi_lastScanVar < attnum)
projInfo->pi_lastScanVar = attnum;
break;
}
return false;
}
/*
* Don't examine the arguments of Aggrefs or WindowFuncs, because those do
* not represent expressions to be evaluated within the overall
* targetlist's econtext.
*/
if (IsA(node, Aggref))
return false;
if (IsA(node, WindowFunc))
return false;
return expression_tree_walker(node, get_last_attnums,
(void *) projInfo);
}
/* ----------------
* ExecAssignProjectionInfo
*
* forms the projection information from the node's targetlist
*
* Notes for inputDesc are same as for ExecBuildProjectionInfo: supply it
* for a relation-scan node, can pass NULL for upper-level nodes
* ----------------
*/
void
ExecAssignProjectionInfo(PlanState *planstate,
TupleDesc inputDesc)
{
planstate->ps_ProjInfo =
ExecBuildProjectionInfo(planstate->targetlist,
planstate->ps_ExprContext,
planstate->ps_ResultTupleSlot,
inputDesc);
}
/* ----------------
* ExecFreeExprContext
*
* A plan node's ExprContext should be freed explicitly during executor
* shutdown because there may be shutdown callbacks to call. (Other resources
* made by the above routines, such as projection info, don't need to be freed
* explicitly because they're just memory in the per-query memory context.)
*
* However ... there is no particular need to do it during ExecEndNode,
* because FreeExecutorState will free any remaining ExprContexts within
2005-10-15 04:49:52 +02:00
* the EState. Letting FreeExecutorState do it allows the ExprContexts to
* be freed in reverse order of creation, rather than order of creation as
* will happen if we delete them here, which saves O(N^2) work in the list
* cleanup inside FreeExprContext.
* ----------------
*/
void
ExecFreeExprContext(PlanState *planstate)
{
/*
2005-10-15 04:49:52 +02:00
* Per above discussion, don't actually delete the ExprContext. We do
* unlink it from the plan node, though.
*/
planstate->ps_ExprContext = NULL;
}
/* ----------------------------------------------------------------
* the following scan type support functions are for
* those nodes which are stubborn and return tuples in
* their Scan tuple slot instead of their Result tuple
* slot.. luck fur us, these nodes do not do projections
* so we don't have to worry about getting the ProjectionInfo
* right for them... -cim 6/3/91
* ----------------------------------------------------------------
*/
/* ----------------
* ExecGetScanType
* ----------------
*/
TupleDesc
ExecGetScanType(ScanState *scanstate)
{
TupleTableSlot *slot = scanstate->ss_ScanTupleSlot;
return slot->tts_tupleDescriptor;
}
/* ----------------
* ExecAssignScanType
* ----------------
*/
void
ExecAssignScanType(ScanState *scanstate, TupleDesc tupDesc)
{
TupleTableSlot *slot = scanstate->ss_ScanTupleSlot;
ExecSetSlotDescriptor(slot, tupDesc);
}
/* ----------------
* ExecAssignScanTypeFromOuterPlan
* ----------------
*/
void
ExecAssignScanTypeFromOuterPlan(ScanState *scanstate)
{
PlanState *outerPlan;
TupleDesc tupDesc;
outerPlan = outerPlanState(scanstate);
tupDesc = ExecGetResultType(outerPlan);
ExecAssignScanType(scanstate, tupDesc);
}
/* ----------------------------------------------------------------
* Scan node support
* ----------------------------------------------------------------
*/
/* ----------------------------------------------------------------
* ExecRelationIsTargetRelation
*
* Detect whether a relation (identified by rangetable index)
* is one of the target relations of the query.
* ----------------------------------------------------------------
*/
bool
ExecRelationIsTargetRelation(EState *estate, Index scanrelid)
{
ResultRelInfo *resultRelInfos;
int i;
resultRelInfos = estate->es_result_relations;
for (i = 0; i < estate->es_num_result_relations; i++)
{
if (resultRelInfos[i].ri_RangeTableIndex == scanrelid)
return true;
}
return false;
}
/* ----------------------------------------------------------------
* ExecOpenScanRelation
*
* Open the heap relation to be scanned by a base-level scan plan node.
* This should be called during the node's ExecInit routine.
*
* By default, this acquires AccessShareLock on the relation. However,
* if the relation was already locked by InitPlan, we don't need to acquire
* any additional lock. This saves trips to the shared lock manager.
* ----------------------------------------------------------------
*/
Relation
ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
{
Relation rel;
Oid reloid;
LOCKMODE lockmode;
/*
* Determine the lock type we need. First, scan to see if target relation
* is a result relation. If not, check if it's a FOR UPDATE/FOR SHARE
* relation. In either of those cases, we got the lock already.
*/
lockmode = AccessShareLock;
if (ExecRelationIsTargetRelation(estate, scanrelid))
lockmode = NoLock;
else
{
ListCell *l;
foreach(l, estate->es_rowMarks)
{
ExecRowMark *erm = lfirst(l);
if (erm->rti == scanrelid)
{
lockmode = NoLock;
break;
}
}
}
/* Open the relation and acquire lock as needed */
reloid = getrelid(scanrelid, estate->es_range_table);
rel = heap_open(reloid, lockmode);
/*
* Complain if we're attempting a scan of an unscannable relation, except
* when the query won't actually be run. This is a slightly klugy place
* to do this, perhaps, but there is no better place.
*/
if ((eflags & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA)) == 0 &&
!RelationIsScannable(rel))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("materialized view \"%s\" has not been populated",
RelationGetRelationName(rel)),
errhint("Use the REFRESH MATERIALIZED VIEW command.")));
return rel;
}
/* ----------------------------------------------------------------
* ExecCloseScanRelation
*
* Close the heap relation scanned by a base-level scan plan node.
* This should be called during the node's ExecEnd routine.
*
* Currently, we do not release the lock acquired by ExecOpenScanRelation.
* This lock should be held till end of transaction. (There is a faction
* that considers this too much locking, however.)
*
* If we did want to release the lock, we'd have to repeat the logic in
* ExecOpenScanRelation in order to figure out what to release.
* ----------------------------------------------------------------
*/
void
ExecCloseScanRelation(Relation scanrel)
{
heap_close(scanrel, NoLock);
}
/* ----------------------------------------------------------------
* ExecInsertIndexTuples support
* ----------------------------------------------------------------
*/
/* ----------------------------------------------------------------
* ExecOpenIndices
*
* Find the indices associated with a result relation, open them,
* and save information about them in the result ResultRelInfo.
*
* At entry, caller has already opened and locked
* resultRelInfo->ri_RelationDesc.
* ----------------------------------------------------------------
*/
void
ExecOpenIndices(ResultRelInfo *resultRelInfo)
{
Relation resultRelation = resultRelInfo->ri_RelationDesc;
List *indexoidlist;
ListCell *l;
int len,
i;
RelationPtr relationDescs;
IndexInfo **indexInfoArray;
resultRelInfo->ri_NumIndices = 0;
/* fast path if no indexes */
2001-03-22 05:01:46 +01:00
if (!RelationGetForm(resultRelation)->relhasindex)
2000-02-18 10:30:20 +01:00
return;
/*
* Get cached list of index OIDs
*/
indexoidlist = RelationGetIndexList(resultRelation);
len = list_length(indexoidlist);
if (len == 0)
return;
/*
* allocate space for result arrays
*/
relationDescs = (RelationPtr) palloc(len * sizeof(Relation));
indexInfoArray = (IndexInfo **) palloc(len * sizeof(IndexInfo *));
resultRelInfo->ri_NumIndices = len;
resultRelInfo->ri_IndexRelationDescs = relationDescs;
resultRelInfo->ri_IndexRelationInfo = indexInfoArray;
/*
2006-10-04 02:30:14 +02:00
* For each index, open the index relation and save pg_index info. We
* acquire RowExclusiveLock, signifying we will update the index.
Fix assorted bugs in CREATE/DROP INDEX CONCURRENTLY. Commit 8cb53654dbdb4c386369eb988062d0bbb6de725e, which introduced DROP INDEX CONCURRENTLY, managed to break CREATE INDEX CONCURRENTLY via a poor choice of catalog state representation. The pg_index state for an index that's reached the final pre-drop stage was the same as the state for an index just created by CREATE INDEX CONCURRENTLY. This meant that the (necessary) change to make RelationGetIndexList ignore about-to-die indexes also made it ignore freshly-created indexes; which is catastrophic because the latter do need to be considered in HOT-safety decisions. Failure to do so leads to incorrect index entries and subsequently wrong results from queries depending on the concurrently-created index. To fix, add an additional boolean column "indislive" to pg_index, so that the freshly-created and about-to-die states can be distinguished. (This change obviously is only possible in HEAD. This patch will need to be back-patched, but in 9.2 we'll use a kluge consisting of overloading the formerly-impossible state of indisvalid = true and indisready = false.) In addition, change CREATE/DROP INDEX CONCURRENTLY so that the pg_index flag changes they make without exclusive lock on the index are made via heap_inplace_update() rather than a normal transactional update. The latter is not very safe because moving the pg_index tuple could result in concurrent SnapshotNow scans finding it twice or not at all, thus possibly resulting in index corruption. This is a pre-existing bug in CREATE INDEX CONCURRENTLY, which was copied into the DROP code. In addition, fix various places in the code that ought to check to make sure that the indexes they are manipulating are valid and/or ready as appropriate. These represent bugs that have existed since 8.2, since a failed CREATE INDEX CONCURRENTLY could leave a corrupt or invalid index behind, and we ought not try to do anything that might fail with such an index. Also fix RelationReloadIndexInfo to ensure it copies all the pg_index columns that are allowed to change after initial creation. Previously we could have been left with stale values of some fields in an index relcache entry. It's not clear whether this actually had any user-visible consequences, but it's at least a bug waiting to happen. In addition, do some code and docs review for DROP INDEX CONCURRENTLY; some cosmetic code cleanup but mostly addition and revision of comments. This will need to be back-patched, but in a noticeably different form, so I'm committing it to HEAD before working on the back-patch. Problem reported by Amit Kapila, diagnosis by Pavan Deolassee, fix by Tom Lane and Andres Freund.
2012-11-29 03:25:27 +01:00
*
* Note: we do this even if the index is not IndexIsReady; it's not worth
* the trouble to optimize for the case where it isn't.
*/
i = 0;
foreach(l, indexoidlist)
{
Oid indexOid = lfirst_oid(l);
Relation indexDesc;
IndexInfo *ii;
indexDesc = index_open(indexOid, RowExclusiveLock);
/* extract index key information from the index's pg_index info */
ii = BuildIndexInfo(indexDesc);
relationDescs[i] = indexDesc;
indexInfoArray[i] = ii;
i++;
}
list_free(indexoidlist);
}
/* ----------------------------------------------------------------
* ExecCloseIndices
*
* Close the index relations stored in resultRelInfo
* ----------------------------------------------------------------
*/
void
ExecCloseIndices(ResultRelInfo *resultRelInfo)
{
int i;
int numIndices;
RelationPtr indexDescs;
numIndices = resultRelInfo->ri_NumIndices;
indexDescs = resultRelInfo->ri_IndexRelationDescs;
for (i = 0; i < numIndices; i++)
{
if (indexDescs[i] == NULL)
continue; /* shouldn't happen? */
1999-05-25 18:15:34 +02:00
/* Drop lock acquired by ExecOpenIndices */
index_close(indexDescs[i], RowExclusiveLock);
}
1999-05-25 18:15:34 +02:00
/*
* XXX should free indexInfo array here too? Currently we assume that
* such stuff will be cleaned up automatically in FreeExecutorState.
*/
}
/* ----------------------------------------------------------------
* ExecInsertIndexTuples
*
* This routine takes care of inserting index tuples
* into all the relations indexing the result relation
* when a heap tuple is inserted into the result relation.
* Much of this code should be moved into the genam
* stuff as it only exists here because the genam stuff
* doesn't provide the functionality needed by the
* executor.. -cim 9/27/89
*
* This returns a list of index OIDs for any unique or exclusion
* constraints that are deferred and that had
* potential (unconfirmed) conflicts.
*
* CAUTION: this must not be called for a HOT update.
* We can't defend against that here for lack of info.
* Should we change the API to make it safer?
* ----------------------------------------------------------------
*/
List *
ExecInsertIndexTuples(TupleTableSlot *slot,
ItemPointer tupleid,
EState *estate)
{
List *result = NIL;
ResultRelInfo *resultRelInfo;
int i;
int numIndices;
RelationPtr relationDescs;
Relation heapRelation;
IndexInfo **indexInfoArray;
ExprContext *econtext;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
/*
* Get information from the result relation info structure.
*/
resultRelInfo = estate->es_result_relation_info;
numIndices = resultRelInfo->ri_NumIndices;
relationDescs = resultRelInfo->ri_IndexRelationDescs;
indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
heapRelation = resultRelInfo->ri_RelationDesc;
/*
2005-10-15 04:49:52 +02:00
* We will use the EState's per-tuple context for evaluating predicates
* and index expressions (creating it if it's not already there).
*/
econtext = GetPerTupleExprContext(estate);
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/*
* for each index, form and insert the index tuple
*/
for (i = 0; i < numIndices; i++)
{
Relation indexRelation = relationDescs[i];
IndexInfo *indexInfo;
IndexUniqueCheck checkUnique;
bool satisfiesConstraint;
if (indexRelation == NULL)
continue;
indexInfo = indexInfoArray[i];
/* If the index is marked as read-only, ignore it */
if (!indexInfo->ii_ReadyForInserts)
continue;
/* Check for partial index */
if (indexInfo->ii_Predicate != NIL)
{
List *predicate;
/*
2005-10-15 04:49:52 +02:00
* If predicate state not set up yet, create it (in the estate's
* per-query context)
*/
predicate = indexInfo->ii_PredicateState;
if (predicate == NIL)
{
predicate = (List *)
ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
estate);
indexInfo->ii_PredicateState = predicate;
}
/* Skip this index-update if the predicate isn't satisfied */
if (!ExecQual(predicate, econtext, false))
continue;
}
/*
2005-10-15 04:49:52 +02:00
* FormIndexDatum fills in its values and isnull parameters with the
* appropriate values for the column(s) of the index.
*/
FormIndexDatum(indexInfo,
slot,
estate,
values,
isnull);
/*
* The index AM does the actual insertion, plus uniqueness checking.
*
* For an immediate-mode unique index, we just tell the index AM to
* throw error if not unique.
*
* For a deferrable unique index, we tell the index AM to just detect
* possible non-uniqueness, and we add the index OID to the result
* list if further checking is needed.
*/
if (!indexRelation->rd_index->indisunique)
checkUnique = UNIQUE_CHECK_NO;
else if (indexRelation->rd_index->indimmediate)
checkUnique = UNIQUE_CHECK_YES;
else
checkUnique = UNIQUE_CHECK_PARTIAL;
satisfiesConstraint =
2010-02-26 03:01:40 +01:00
index_insert(indexRelation, /* index relation */
values, /* array of index Datums */
isnull, /* null flags */
tupleid, /* tid of heap tuple */
heapRelation, /* heap relation */
checkUnique); /* type of uniqueness check to do */
/*
* If the index has an associated exclusion constraint, check that.
* This is simpler than the process for uniqueness checks since we
2010-02-26 03:01:40 +01:00
* always insert first and then check. If the constraint is deferred,
* we check now anyway, but don't throw error on violation; instead
* we'll queue a recheck event.
*
* An index for an exclusion constraint can't also be UNIQUE (not an
* essential property, we just don't allow it in the grammar), so no
* need to preserve the prior state of satisfiesConstraint.
*/
if (indexInfo->ii_ExclusionOps != NULL)
{
2010-02-26 03:01:40 +01:00
bool errorOK = !indexRelation->rd_index->indimmediate;
satisfiesConstraint =
check_exclusion_constraint(heapRelation,
indexRelation, indexInfo,
tupleid, values, isnull,
estate, false, errorOK);
}
if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
indexInfo->ii_ExclusionOps != NULL) &&
!satisfiesConstraint)
{
/*
* The tuple potentially violates the uniqueness or exclusion
* constraint, so make a note of the index so that we can re-check
* it later.
*/
result = lappend_oid(result, RelationGetRelid(indexRelation));
}
}
return result;
}
/*
* Check for violation of an exclusion constraint
*
* heap: the table containing the new tuple
* index: the index supporting the exclusion constraint
* indexInfo: info about the index, including the exclusion properties
* tupleid: heap TID of the new tuple we have just inserted
* values, isnull: the *index* column values computed for the new tuple
* estate: an EState we can do evaluation in
* newIndex: if true, we are trying to build a new index (this affects
* only the wording of error messages)
* errorOK: if true, don't throw error for violation
*
* Returns true if OK, false if actual or potential violation
*
* When errorOK is true, we report violation without waiting to see if any
* concurrent transaction has committed or not; so the violation is only
* potential, and the caller must recheck sometime later. This behavior
* is convenient for deferred exclusion checks; we need not bother queuing
* a deferred event if there is definitely no conflict at insertion time.
*
* When errorOK is false, we'll throw error on violation, so a false result
* is impossible.
*/
bool
check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
ItemPointer tupleid, Datum *values, bool *isnull,
EState *estate, bool newIndex, bool errorOK)
{
2010-02-26 03:01:40 +01:00
Oid *constr_procs = indexInfo->ii_ExclusionProcs;
uint16 *constr_strats = indexInfo->ii_ExclusionStrats;
Oid *index_collations = index->rd_indcollation;
2010-02-26 03:01:40 +01:00
int index_natts = index->rd_index->indnatts;
IndexScanDesc index_scan;
HeapTuple tup;
ScanKeyData scankeys[INDEX_MAX_KEYS];
SnapshotData DirtySnapshot;
int i;
bool conflict;
bool found_self;
ExprContext *econtext;
TupleTableSlot *existing_slot;
TupleTableSlot *save_scantuple;
/*
2010-02-26 03:01:40 +01:00
* If any of the input values are NULL, the constraint check is assumed to
* pass (i.e., we assume the operators are strict).
*/
for (i = 0; i < index_natts; i++)
{
if (isnull[i])
return true;
}
/*
2010-02-26 03:01:40 +01:00
* Search the tuples that are in the index for any violations, including
* tuples that aren't visible yet.
*/
InitDirtySnapshot(DirtySnapshot);
for (i = 0; i < index_natts; i++)
{
ScanKeyEntryInitialize(&scankeys[i],
0,
i + 1,
constr_strats[i],
InvalidOid,
index_collations[i],
constr_procs[i],
values[i]);
}
/*
* Need a TupleTableSlot to put existing tuples in.
*
* To use FormIndexDatum, we have to make the econtext's scantuple point
* to this slot. Be sure to save and restore caller's value for
* scantuple.
*/
existing_slot = MakeSingleTupleTableSlot(RelationGetDescr(heap));
econtext = GetPerTupleExprContext(estate);
save_scantuple = econtext->ecxt_scantuple;
econtext->ecxt_scantuple = existing_slot;
/*
2010-02-26 03:01:40 +01:00
* May have to restart scan from this point if a potential conflict is
* found.
*/
retry:
conflict = false;
found_self = false;
index_scan = index_beginscan(heap, index, &DirtySnapshot, index_natts, 0);
index_rescan(index_scan, scankeys, index_natts, NULL, 0);
while ((tup = index_getnext(index_scan,
ForwardScanDirection)) != NULL)
{
2010-02-26 03:01:40 +01:00
TransactionId xwait;
Datum existing_values[INDEX_MAX_KEYS];
bool existing_isnull[INDEX_MAX_KEYS];
2010-02-26 03:01:40 +01:00
char *error_new;
char *error_existing;
/*
* Ignore the entry for the tuple we're trying to check.
*/
if (ItemPointerEquals(tupleid, &tup->t_self))
{
if (found_self) /* should not happen */
elog(ERROR, "found self tuple multiple times in index \"%s\"",
RelationGetRelationName(index));
found_self = true;
continue;
}
/*
* Extract the index column values and isnull flags from the existing
* tuple.
*/
2010-02-26 03:01:40 +01:00
ExecStoreTuple(tup, existing_slot, InvalidBuffer, false);
FormIndexDatum(indexInfo, existing_slot, estate,
existing_values, existing_isnull);
/* If lossy indexscan, must recheck the condition */
if (index_scan->xs_recheck)
{
if (!index_recheck_constraint(index,
constr_procs,
existing_values,
existing_isnull,
values))
2010-02-26 03:01:40 +01:00
continue; /* tuple doesn't actually match, so no
* conflict */
}
/*
2010-02-26 03:01:40 +01:00
* At this point we have either a conflict or a potential conflict. If
* we're not supposed to raise error, just return the fact of the
* potential conflict without waiting to see if it's real.
*/
if (errorOK)
{
conflict = true;
break;
}
/*
* If an in-progress transaction is affecting the visibility of this
2010-02-26 03:01:40 +01:00
* tuple, we need to wait for it to complete and then recheck. For
* simplicity we do rechecking by just restarting the whole scan ---
* this case probably doesn't happen often enough to be worth trying
* harder, and anyway we don't want to hold any index internal locks
* while waiting.
*/
xwait = TransactionIdIsValid(DirtySnapshot.xmin) ?
DirtySnapshot.xmin : DirtySnapshot.xmax;
if (TransactionIdIsValid(xwait))
{
index_endscan(index_scan);
XactLockTableWait(xwait);
goto retry;
}
/*
* We have a definite conflict. Report it.
*/
error_new = BuildIndexValueDescription(index, values, isnull);
error_existing = BuildIndexValueDescription(index, existing_values,
existing_isnull);
if (newIndex)
ereport(ERROR,
(errcode(ERRCODE_EXCLUSION_VIOLATION),
errmsg("could not create exclusion constraint \"%s\"",
RelationGetRelationName(index)),
errdetail("Key %s conflicts with key %s.",
error_new, error_existing),
errtableconstraint(heap,
RelationGetRelationName(index))));
else
ereport(ERROR,
(errcode(ERRCODE_EXCLUSION_VIOLATION),
errmsg("conflicting key value violates exclusion constraint \"%s\"",
RelationGetRelationName(index)),
errdetail("Key %s conflicts with existing key %s.",
error_new, error_existing),
errtableconstraint(heap,
RelationGetRelationName(index))));
}
index_endscan(index_scan);
/*
* Ordinarily, at this point the search should have found the originally
* inserted tuple, unless we exited the loop early because of conflict.
2011-04-10 17:42:00 +02:00
* However, it is possible to define exclusion constraints for which that
* wouldn't be true --- for instance, if the operator is <>. So we no
* longer complain if found_self is still false.
*/
econtext->ecxt_scantuple = save_scantuple;
ExecDropSingleTupleTableSlot(existing_slot);
return !conflict;
}
/*
* Check existing tuple's index values to see if it really matches the
2010-02-26 03:01:40 +01:00
* exclusion condition against the new_values. Returns true if conflict.
*/
static bool
index_recheck_constraint(Relation index, Oid *constr_procs,
Datum *existing_values, bool *existing_isnull,
Datum *new_values)
{
int index_natts = index->rd_index->indnatts;
int i;
for (i = 0; i < index_natts; i++)
{
/* Assume the exclusion operators are strict */
if (existing_isnull[i])
return false;
if (!DatumGetBool(OidFunctionCall2Coll(constr_procs[i],
index->rd_indcollation[i],
existing_values[i],
new_values[i])))
return false;
}
return true;
}
/*
* UpdateChangedParamSet
* Add changed parameters to a plan node's chgParam set
*/
void
UpdateChangedParamSet(PlanState *node, Bitmapset *newchg)
{
Bitmapset *parmset;
/*
2005-10-15 04:49:52 +02:00
* The plan node only depends on params listed in its allParam set. Don't
* include anything else into its chgParam set.
*/
parmset = bms_intersect(node->plan->allParam, newchg);
2003-08-04 02:43:34 +02:00
/*
2005-10-15 04:49:52 +02:00
* Keep node->chgParam == NULL if there's not actually any members; this
* allows the simplest possible tests in executor node files.
*/
if (!bms_is_empty(parmset))
node->chgParam = bms_join(node->chgParam, parmset);
else
bms_free(parmset);
}
/*
* Register a shutdown callback in an ExprContext.
*
* Shutdown callbacks will be called (in reverse order of registration)
* when the ExprContext is deleted or rescanned. This provides a hook
* for functions called in the context to do any cleanup needed --- it's
* particularly useful for functions returning sets. Note that the
* callback will *not* be called in the event that execution is aborted
* by an error.
*/
void
RegisterExprContextCallback(ExprContext *econtext,
ExprContextCallbackFunction function,
Datum arg)
{
2002-09-04 22:31:48 +02:00
ExprContext_CB *ecxt_callback;
/* Save the info in appropriate memory context */
ecxt_callback = (ExprContext_CB *)
MemoryContextAlloc(econtext->ecxt_per_query_memory,
sizeof(ExprContext_CB));
ecxt_callback->function = function;
ecxt_callback->arg = arg;
/* link to front of list for appropriate execution order */
ecxt_callback->next = econtext->ecxt_callbacks;
econtext->ecxt_callbacks = ecxt_callback;
}
/*
* Deregister a shutdown callback in an ExprContext.
*
* Any list entries matching the function and arg will be removed.
* This can be used if it's no longer necessary to call the callback.
*/
void
UnregisterExprContextCallback(ExprContext *econtext,
ExprContextCallbackFunction function,
Datum arg)
{
2002-09-04 22:31:48 +02:00
ExprContext_CB **prev_callback;
ExprContext_CB *ecxt_callback;
prev_callback = &econtext->ecxt_callbacks;
while ((ecxt_callback = *prev_callback) != NULL)
{
if (ecxt_callback->function == function && ecxt_callback->arg == arg)
{
*prev_callback = ecxt_callback->next;
pfree(ecxt_callback);
}
else
prev_callback = &ecxt_callback->next;
}
}
/*
* Call all the shutdown callbacks registered in an ExprContext.
*
* The callback list is emptied (important in case this is only a rescan
* reset, and not deletion of the ExprContext).
*
* If isCommit is false, just clean the callback list but don't call 'em.
* (See comment for FreeExprContext.)
*/
static void
ShutdownExprContext(ExprContext *econtext, bool isCommit)
{
2002-09-04 22:31:48 +02:00
ExprContext_CB *ecxt_callback;
MemoryContext oldcontext;
/* Fast path in normal case where there's nothing to do. */
if (econtext->ecxt_callbacks == NULL)
return;
/*
2005-10-15 04:49:52 +02:00
* Call the callbacks in econtext's per-tuple context. This ensures that
* any memory they might leak will get cleaned up.
*/
oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
/*
* Call each callback function in reverse registration order.
*/
while ((ecxt_callback = econtext->ecxt_callbacks) != NULL)
{
econtext->ecxt_callbacks = ecxt_callback->next;
if (isCommit)
(*ecxt_callback->function) (ecxt_callback->arg);
pfree(ecxt_callback);
}
MemoryContextSwitchTo(oldcontext);
}