postgresql/src/backend/executor/nodeNestloop.c

411 lines
11 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* nodeNestloop.c
* routines to support nest-loop joins
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/executor/nodeNestloop.c
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* ExecNestLoop - process a nestloop join of two plans
* ExecInitNestLoop - initialize the join
* ExecEndNestLoop - shut down the join
*/
#include "postgres.h"
1996-11-08 07:02:30 +01:00
#include "executor/execdebug.h"
#include "executor/nodeNestloop.h"
#include "miscadmin.h"
#include "utils/memutils.h"
/* ----------------------------------------------------------------
* ExecNestLoop(node)
*
* old comments
* Returns the tuple joined from inner and outer tuples which
* satisfies the qualification clause.
*
* It scans the inner relation to join with current outer tuple.
*
* If none is found, next tuple from the outer relation is retrieved
* and the inner relation is scanned from the beginning again to join
* with the outer tuple.
*
* NULL is returned if all the remaining outer tuples are tried and
* all fail to join with the inner tuples.
*
* NULL is also returned if there is no tuple from inner relation.
*
* Conditions:
* -- outerTuple contains current tuple from outer relation and
* the right son(inner relation) maintains "cursor" at the tuple
* returned previously.
* This is achieved by maintaining a scan position on the outer
* relation.
*
* Initial States:
* -- the outer child and the inner child
* are prepared to return the first tuple.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecNestLoop(PlanState *pstate)
{
NestLoopState *node = castNode(NestLoopState, pstate);
NestLoop *nl;
PlanState *innerPlan;
PlanState *outerPlan;
TupleTableSlot *outerTupleSlot;
TupleTableSlot *innerTupleSlot;
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
ExprState *joinqual;
ExprState *otherqual;
ExprContext *econtext;
ListCell *lc;
CHECK_FOR_INTERRUPTS();
/*
* get information from the node
*/
ENL1_printf("getting info from node");
nl = (NestLoop *) node->js.ps.plan;
joinqual = node->js.joinqual;
otherqual = node->js.ps.qual;
outerPlan = outerPlanState(node);
innerPlan = innerPlanState(node);
econtext = node->js.ps.ps_ExprContext;
/*
* Reset per-tuple memory context to free any expression evaluation
* storage allocated in the previous tuple cycle.
*/
ResetExprContext(econtext);
/*
* Ok, everything is setup for the join so now loop until we return a
* qualifying join tuple.
*/
ENL1_printf("entering main loop");
for (;;)
{
/*
* If we don't have an outer tuple, get the next one and reset the
* inner scan.
*/
if (node->nl_NeedNewOuter)
{
ENL1_printf("getting new outer tuple");
outerTupleSlot = ExecProcNode(outerPlan);
/*
2005-10-15 04:49:52 +02:00
* if there are no more outer tuples, then the join is complete..
*/
if (TupIsNull(outerTupleSlot))
{
ENL1_printf("no outer tuple, ending join");
return NULL;
}
ENL1_printf("saving new outer tuple information");
econtext->ecxt_outertuple = outerTupleSlot;
node->nl_NeedNewOuter = false;
node->nl_MatchedOuter = false;
/*
2011-04-10 17:42:00 +02:00
* fetch the values of any outer Vars that must be passed to the
* inner scan, and store them in the appropriate PARAM_EXEC slots.
*/
foreach(lc, nl->nestParams)
{
NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
int paramno = nlp->paramno;
ParamExecData *prm;
prm = &(econtext->ecxt_param_exec_vals[paramno]);
/* Param value should be an OUTER_VAR var */
Assert(IsA(nlp->paramval, Var));
Assert(nlp->paramval->varno == OUTER_VAR);
Assert(nlp->paramval->varattno > 0);
prm->value = slot_getattr(outerTupleSlot,
nlp->paramval->varattno,
&(prm->isnull));
/* Flag parameter value as changed */
innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
paramno);
}
/*
* now rescan the inner plan
*/
ENL1_printf("rescanning inner plan");
ExecReScan(innerPlan);
}
/*
* we have an outerTuple, try to get the next inner tuple.
*/
ENL1_printf("getting new inner tuple");
innerTupleSlot = ExecProcNode(innerPlan);
econtext->ecxt_innertuple = innerTupleSlot;
if (TupIsNull(innerTupleSlot))
{
ENL1_printf("no inner tuple, need new outer tuple");
node->nl_NeedNewOuter = true;
if (!node->nl_MatchedOuter &&
(node->js.jointype == JOIN_LEFT ||
node->js.jointype == JOIN_ANTI))
{
/*
2005-10-15 04:49:52 +02:00
* We are doing an outer join and there were no join matches
* for this outer tuple. Generate a fake join tuple with
* nulls for the inner tuple, and return it if it passes the
* non-join quals.
*/
econtext->ecxt_innertuple = node->nl_NullInnerTupleSlot;
ENL1_printf("testing qualification for outer-join tuple");
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
if (otherqual == NULL || ExecQual(otherqual, econtext))
{
/*
2005-10-15 04:49:52 +02:00
* qualification was satisfied so we project and return
* the slot containing the result tuple using
* ExecProject().
*/
ENL1_printf("qualification succeeded, projecting tuple");
return ExecProject(node->js.ps.ps_ProjInfo);
}
else
InstrCountFiltered2(node, 1);
}
2001-03-22 05:01:46 +01:00
/*
* Otherwise just return to top of loop for a new outer tuple.
*/
continue;
}
/*
2005-10-15 04:49:52 +02:00
* at this point we have a new pair of inner and outer tuples so we
* test the inner and outer tuples to see if they satisfy the node's
* qualification.
*
* Only the joinquals determine MatchedOuter status, but all quals
* must pass to actually return the tuple.
*/
ENL1_printf("testing qualification");
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
if (ExecQual(joinqual, econtext))
{
node->nl_MatchedOuter = true;
/* In an antijoin, we never return a matched tuple */
if (node->js.jointype == JOIN_ANTI)
{
node->nl_NeedNewOuter = true;
continue; /* return to top of loop */
}
/*
* If we only need to join to the first matching inner tuple, then
* consider returning this one, but after that continue with next
* outer tuple.
*/
if (node->js.single_match)
node->nl_NeedNewOuter = true;
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
if (otherqual == NULL || ExecQual(otherqual, econtext))
{
/*
* qualification was satisfied so we project and return the
* slot containing the result tuple using ExecProject().
*/
ENL1_printf("qualification succeeded, projecting tuple");
return ExecProject(node->js.ps.ps_ProjInfo);
}
else
InstrCountFiltered2(node, 1);
}
else
InstrCountFiltered1(node, 1);
/*
* Tuple fails qual, so free per-tuple memory and try again.
*/
ResetExprContext(econtext);
ENL1_printf("qualification failed, looping");
}
}
/* ----------------------------------------------------------------
* ExecInitNestLoop
* ----------------------------------------------------------------
*/
NestLoopState *
ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
{
NestLoopState *nlstate;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
NL1_printf("ExecInitNestLoop: %s\n",
"initializing node");
/*
* create state structure
*/
nlstate = makeNode(NestLoopState);
nlstate->js.ps.plan = (Plan *) node;
nlstate->js.ps.state = estate;
nlstate->js.ps.ExecProcNode = ExecNestLoop;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &nlstate->js.ps);
/*
* initialize child nodes
*
* If we have no parameters to pass into the inner rel from the outer,
* tell the inner child that cheap rescans would be good. If we do have
2011-04-10 17:42:00 +02:00
* such parameters, then there is no point in REWIND support at all in the
* inner child, because it will always be rescanned with fresh parameter
* values.
*/
outerPlanState(nlstate) = ExecInitNode(outerPlan(node), estate, eflags);
if (node->nestParams == NIL)
eflags |= EXEC_FLAG_REWIND;
else
eflags &= ~EXEC_FLAG_REWIND;
innerPlanState(nlstate) = ExecInitNode(innerPlan(node), estate, eflags);
/*
* Initialize result slot, type and projection.
*/
Don't require return slots for nodes without projection. In a lot of nodes the return slot is not required. That can either be because the node doesn't do any projection (say an Append node), or because the node does perform projections but the projection is optimized away because the projection would yield an identical row. Slots aren't that small, especially for wide rows, so it's worthwhile to avoid creating them. It's not possible to just skip creating the slot - it's currently used to determine the tuple descriptor returned by ExecGetResultType(). So separate the determination of the result type from the slot creation. The work previously done internally ExecInitResultTupleSlotTL() can now also be done separately with ExecInitResultTypeTL() and ExecInitResultSlot(). That way nodes that aren't guaranteed to need a result slot, can use ExecInitResultTypeTL() to determine the result type of the node, and ExecAssignScanProjectionInfo() (via ExecConditionalAssignProjectionInfo()) determines that a result slot is needed, it is created with ExecInitResultSlot(). Besides the advantage of avoiding to create slots that then are unused, this is necessary preparation for later patches around tuple table slot abstraction. In particular separating the return descriptor and slot is a prerequisite to allow JITing of tuple deforming with knowledge of the underlying tuple format, and to avoid unnecessarily creating JITed tuple deforming for virtual slots. This commit removes a redundant argument from ExecInitResultTupleSlotTL(). While this commit touches a lot of the relevant lines anyway, it'd normally still not worthwhile to cause breakage, except that aforementioned later commits will touch *all* ExecInitResultTupleSlotTL() callers anyway (but fits worse thematically). Author: Andres Freund Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-10 02:19:39 +01:00
ExecInitResultTupleSlotTL(&nlstate->js.ps);
ExecAssignProjectionInfo(&nlstate->js.ps, NULL);
/*
* initialize child expressions
*/
nlstate->js.ps.qual =
ExecInitQual(node->join.plan.qual, (PlanState *) nlstate);
nlstate->js.jointype = node->join.jointype;
nlstate->js.joinqual =
ExecInitQual(node->join.joinqual, (PlanState *) nlstate);
/*
* detect whether we need only consider the first matching inner tuple
*/
nlstate->js.single_match = (node->join.inner_unique ||
node->join.jointype == JOIN_SEMI);
/* set up null tuples for outer joins, if needed */
switch (node->join.jointype)
{
case JOIN_INNER:
case JOIN_SEMI:
break;
case JOIN_LEFT:
case JOIN_ANTI:
nlstate->nl_NullInnerTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(innerPlanState(nlstate)));
break;
default:
elog(ERROR, "unrecognized join type: %d",
(int) node->join.jointype);
}
/*
* finally, wipe the current outer tuple clean.
*/
nlstate->nl_NeedNewOuter = true;
nlstate->nl_MatchedOuter = false;
NL1_printf("ExecInitNestLoop: %s\n",
"node initialized");
return nlstate;
}
/* ----------------------------------------------------------------
* ExecEndNestLoop
*
* closes down scans and frees allocated storage
* ----------------------------------------------------------------
*/
void
ExecEndNestLoop(NestLoopState *node)
{
NL1_printf("ExecEndNestLoop: %s\n",
"ending node processing");
/*
* Free the exprcontext
*/
ExecFreeExprContext(&node->js.ps);
/*
* clean out the tuple table
*/
ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
/*
* close down subplans
*/
ExecEndNode(outerPlanState(node));
ExecEndNode(innerPlanState(node));
NL1_printf("ExecEndNestLoop: %s\n",
"node processing ended");
}
/* ----------------------------------------------------------------
* ExecReScanNestLoop
* ----------------------------------------------------------------
*/
void
ExecReScanNestLoop(NestLoopState *node)
{
2003-08-04 02:43:34 +02:00
PlanState *outerPlan = outerPlanState(node);
/*
* If outerPlan->chgParam is not null then plan will be automatically
* re-scanned by first ExecProcNode.
*/
if (outerPlan->chgParam == NULL)
ExecReScan(outerPlan);
/*
* innerPlan is re-scanned for each new outer tuple and MUST NOT be
* re-scanned from here or you'll get troubles from inner index scans when
* outer Vars are used as run-time keys...
*/
node->nl_NeedNewOuter = true;
node->nl_MatchedOuter = false;
}