postgresql/src/backend/executor/nodeNestloop.c

412 lines
11 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* nodeNestloop.c
* routines to support nest-loop joins
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/executor/nodeNestloop.c
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* ExecNestLoop - process a nestloop join of two plans
* ExecInitNestLoop - initialize the join
* ExecEndNestLoop - shut down the join
*/
#include "postgres.h"
1996-11-08 07:02:30 +01:00
#include "executor/execdebug.h"
#include "executor/nodeNestloop.h"
#include "miscadmin.h"
#include "utils/memutils.h"
/* ----------------------------------------------------------------
* ExecNestLoop(node)
*
* old comments
* Returns the tuple joined from inner and outer tuples which
* satisfies the qualification clause.
*
* It scans the inner relation to join with current outer tuple.
*
* If none is found, next tuple from the outer relation is retrieved
* and the inner relation is scanned from the beginning again to join
* with the outer tuple.
*
* NULL is returned if all the remaining outer tuples are tried and
* all fail to join with the inner tuples.
*
* NULL is also returned if there is no tuple from inner relation.
*
* Conditions:
* -- outerTuple contains current tuple from outer relation and
* the right son(inner relation) maintains "cursor" at the tuple
* returned previously.
* This is achieved by maintaining a scan position on the outer
* relation.
*
* Initial States:
* -- the outer child and the inner child
* are prepared to return the first tuple.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecNestLoop(PlanState *pstate)
{
NestLoopState *node = castNode(NestLoopState, pstate);
NestLoop *nl;
PlanState *innerPlan;
PlanState *outerPlan;
TupleTableSlot *outerTupleSlot;
TupleTableSlot *innerTupleSlot;
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
ExprState *joinqual;
ExprState *otherqual;
ExprContext *econtext;
ListCell *lc;
CHECK_FOR_INTERRUPTS();
/*
* get information from the node
*/
ENL1_printf("getting info from node");
nl = (NestLoop *) node->js.ps.plan;
joinqual = node->js.joinqual;
otherqual = node->js.ps.qual;
outerPlan = outerPlanState(node);
innerPlan = innerPlanState(node);
econtext = node->js.ps.ps_ExprContext;
/*
* Reset per-tuple memory context to free any expression evaluation
* storage allocated in the previous tuple cycle.
*/
ResetExprContext(econtext);
/*
* Ok, everything is setup for the join so now loop until we return a
* qualifying join tuple.
*/
ENL1_printf("entering main loop");
for (;;)
{
/*
* If we don't have an outer tuple, get the next one and reset the
* inner scan.
*/
if (node->nl_NeedNewOuter)
{
ENL1_printf("getting new outer tuple");
outerTupleSlot = ExecProcNode(outerPlan);
/*
2005-10-15 04:49:52 +02:00
* if there are no more outer tuples, then the join is complete..
*/
if (TupIsNull(outerTupleSlot))
{
ENL1_printf("no outer tuple, ending join");
return NULL;
}
ENL1_printf("saving new outer tuple information");
econtext->ecxt_outertuple = outerTupleSlot;
node->nl_NeedNewOuter = false;
node->nl_MatchedOuter = false;
/*
2011-04-10 17:42:00 +02:00
* fetch the values of any outer Vars that must be passed to the
* inner scan, and store them in the appropriate PARAM_EXEC slots.
*/
foreach(lc, nl->nestParams)
{
NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
int paramno = nlp->paramno;
ParamExecData *prm;
prm = &(econtext->ecxt_param_exec_vals[paramno]);
/* Param value should be an OUTER_VAR var */
Assert(IsA(nlp->paramval, Var));
Assert(nlp->paramval->varno == OUTER_VAR);
Assert(nlp->paramval->varattno > 0);
prm->value = slot_getattr(outerTupleSlot,
nlp->paramval->varattno,
&(prm->isnull));
/* Flag parameter value as changed */
innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
paramno);
}
/*
* now rescan the inner plan
*/
ENL1_printf("rescanning inner plan");
ExecReScan(innerPlan);
}
/*
* we have an outerTuple, try to get the next inner tuple.
*/
ENL1_printf("getting new inner tuple");
innerTupleSlot = ExecProcNode(innerPlan);
econtext->ecxt_innertuple = innerTupleSlot;
if (TupIsNull(innerTupleSlot))
{
ENL1_printf("no inner tuple, need new outer tuple");
node->nl_NeedNewOuter = true;
if (!node->nl_MatchedOuter &&
(node->js.jointype == JOIN_LEFT ||
node->js.jointype == JOIN_ANTI))
{
/*
2005-10-15 04:49:52 +02:00
* We are doing an outer join and there were no join matches
* for this outer tuple. Generate a fake join tuple with
* nulls for the inner tuple, and return it if it passes the
* non-join quals.
*/
econtext->ecxt_innertuple = node->nl_NullInnerTupleSlot;
ENL1_printf("testing qualification for outer-join tuple");
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
if (otherqual == NULL || ExecQual(otherqual, econtext))
{
/*
2005-10-15 04:49:52 +02:00
* qualification was satisfied so we project and return
* the slot containing the result tuple using
* ExecProject().
*/
ENL1_printf("qualification succeeded, projecting tuple");
return ExecProject(node->js.ps.ps_ProjInfo);
}
else
InstrCountFiltered2(node, 1);
}
2001-03-22 05:01:46 +01:00
/*
* Otherwise just return to top of loop for a new outer tuple.
*/
continue;
}
/*
2005-10-15 04:49:52 +02:00
* at this point we have a new pair of inner and outer tuples so we
* test the inner and outer tuples to see if they satisfy the node's
* qualification.
*
* Only the joinquals determine MatchedOuter status, but all quals
* must pass to actually return the tuple.
*/
ENL1_printf("testing qualification");
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
if (ExecQual(joinqual, econtext))
{
node->nl_MatchedOuter = true;
/* In an antijoin, we never return a matched tuple */
if (node->js.jointype == JOIN_ANTI)
{
node->nl_NeedNewOuter = true;
continue; /* return to top of loop */
}
/*
* If we only need to join to the first matching inner tuple, then
* consider returning this one, but after that continue with next
* outer tuple.
*/
if (node->js.single_match)
node->nl_NeedNewOuter = true;
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
if (otherqual == NULL || ExecQual(otherqual, econtext))
{
/*
* qualification was satisfied so we project and return the
* slot containing the result tuple using ExecProject().
*/
ENL1_printf("qualification succeeded, projecting tuple");
return ExecProject(node->js.ps.ps_ProjInfo);
}
else
InstrCountFiltered2(node, 1);
}
else
InstrCountFiltered1(node, 1);
/*
* Tuple fails qual, so free per-tuple memory and try again.
*/
ResetExprContext(econtext);
ENL1_printf("qualification failed, looping");
}
}
/* ----------------------------------------------------------------
* ExecInitNestLoop
* ----------------------------------------------------------------
*/
NestLoopState *
ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
{
NestLoopState *nlstate;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
NL1_printf("ExecInitNestLoop: %s\n",
"initializing node");
/*
* create state structure
*/
nlstate = makeNode(NestLoopState);
nlstate->js.ps.plan = (Plan *) node;
nlstate->js.ps.state = estate;
nlstate->js.ps.ExecProcNode = ExecNestLoop;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &nlstate->js.ps);
/*
* initialize child nodes
*
* If we have no parameters to pass into the inner rel from the outer,
* tell the inner child that cheap rescans would be good. If we do have
2011-04-10 17:42:00 +02:00
* such parameters, then there is no point in REWIND support at all in the
* inner child, because it will always be rescanned with fresh parameter
* values.
*/
outerPlanState(nlstate) = ExecInitNode(outerPlan(node), estate, eflags);
if (node->nestParams == NIL)
eflags |= EXEC_FLAG_REWIND;
else
eflags &= ~EXEC_FLAG_REWIND;
innerPlanState(nlstate) = ExecInitNode(innerPlan(node), estate, eflags);
/*
* Initialize result slot, type and projection.
*/
Introduce notion of different types of slots (without implementing them). Upcoming work intends to allow pluggable ways to introduce new ways of storing table data. Accessing those table access methods from the executor requires TupleTableSlots to be carry tuples in the native format of such storage methods; otherwise there'll be a significant conversion overhead. Different access methods will require different data to store tuples efficiently (just like virtual, minimal, heap already require fields in TupleTableSlot). To allow that without requiring additional pointer indirections, we want to have different structs (embedding TupleTableSlot) for different types of slots. Thus different types of slots are needed, which requires adapting creators of slots. The slot that most efficiently can represent a type of tuple in an executor node will often depend on the type of slot a child node uses. Therefore we need to track the type of slot is returned by nodes, so parent slots can create slots based on that. Relatedly, JIT compilation of tuple deforming needs to know which type of slot a certain expression refers to, so it can create an appropriate deforming function for the type of tuple in the slot. But not all nodes will only return one type of slot, e.g. an append node will potentially return different types of slots for each of its subplans. Therefore add function that allows to query the type of a node's result slot, and whether it'll always be the same type (whether it's fixed). This can be queried using ExecGetResultSlotOps(). The scan, result, inner, outer type of slots are automatically inferred from ExecInitScanTupleSlot(), ExecInitResultSlot(), left/right subtrees respectively. If that's not correct for a node, that can be overwritten using new fields in PlanState. This commit does not introduce the actually abstracted implementation of different kind of TupleTableSlots, that will be left for a followup commit. The different types of slots introduced will, for now, still use the same backing implementation. While this already partially invalidates the big comment in tuptable.h, it seems to make more sense to update it later, when the different TupleTableSlot implementations actually exist. Author: Ashutosh Bapat and Andres Freund, with changes by Amit Khandekar Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-16 07:00:30 +01:00
ExecInitResultTupleSlotTL(&nlstate->js.ps, &TTSOpsVirtual);
ExecAssignProjectionInfo(&nlstate->js.ps, NULL);
/*
* initialize child expressions
*/
nlstate->js.ps.qual =
ExecInitQual(node->join.plan.qual, (PlanState *) nlstate);
nlstate->js.jointype = node->join.jointype;
nlstate->js.joinqual =
ExecInitQual(node->join.joinqual, (PlanState *) nlstate);
/*
* detect whether we need only consider the first matching inner tuple
*/
nlstate->js.single_match = (node->join.inner_unique ||
node->join.jointype == JOIN_SEMI);
/* set up null tuples for outer joins, if needed */
switch (node->join.jointype)
{
case JOIN_INNER:
case JOIN_SEMI:
break;
case JOIN_LEFT:
case JOIN_ANTI:
nlstate->nl_NullInnerTupleSlot =
ExecInitNullTupleSlot(estate,
Introduce notion of different types of slots (without implementing them). Upcoming work intends to allow pluggable ways to introduce new ways of storing table data. Accessing those table access methods from the executor requires TupleTableSlots to be carry tuples in the native format of such storage methods; otherwise there'll be a significant conversion overhead. Different access methods will require different data to store tuples efficiently (just like virtual, minimal, heap already require fields in TupleTableSlot). To allow that without requiring additional pointer indirections, we want to have different structs (embedding TupleTableSlot) for different types of slots. Thus different types of slots are needed, which requires adapting creators of slots. The slot that most efficiently can represent a type of tuple in an executor node will often depend on the type of slot a child node uses. Therefore we need to track the type of slot is returned by nodes, so parent slots can create slots based on that. Relatedly, JIT compilation of tuple deforming needs to know which type of slot a certain expression refers to, so it can create an appropriate deforming function for the type of tuple in the slot. But not all nodes will only return one type of slot, e.g. an append node will potentially return different types of slots for each of its subplans. Therefore add function that allows to query the type of a node's result slot, and whether it'll always be the same type (whether it's fixed). This can be queried using ExecGetResultSlotOps(). The scan, result, inner, outer type of slots are automatically inferred from ExecInitScanTupleSlot(), ExecInitResultSlot(), left/right subtrees respectively. If that's not correct for a node, that can be overwritten using new fields in PlanState. This commit does not introduce the actually abstracted implementation of different kind of TupleTableSlots, that will be left for a followup commit. The different types of slots introduced will, for now, still use the same backing implementation. While this already partially invalidates the big comment in tuptable.h, it seems to make more sense to update it later, when the different TupleTableSlot implementations actually exist. Author: Ashutosh Bapat and Andres Freund, with changes by Amit Khandekar Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-16 07:00:30 +01:00
ExecGetResultType(innerPlanState(nlstate)),
&TTSOpsVirtual);
break;
default:
elog(ERROR, "unrecognized join type: %d",
(int) node->join.jointype);
}
/*
* finally, wipe the current outer tuple clean.
*/
nlstate->nl_NeedNewOuter = true;
nlstate->nl_MatchedOuter = false;
NL1_printf("ExecInitNestLoop: %s\n",
"node initialized");
return nlstate;
}
/* ----------------------------------------------------------------
* ExecEndNestLoop
*
* closes down scans and frees allocated storage
* ----------------------------------------------------------------
*/
void
ExecEndNestLoop(NestLoopState *node)
{
NL1_printf("ExecEndNestLoop: %s\n",
"ending node processing");
/*
* Free the exprcontext
*/
ExecFreeExprContext(&node->js.ps);
/*
* clean out the tuple table
*/
ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
/*
* close down subplans
*/
ExecEndNode(outerPlanState(node));
ExecEndNode(innerPlanState(node));
NL1_printf("ExecEndNestLoop: %s\n",
"node processing ended");
}
/* ----------------------------------------------------------------
* ExecReScanNestLoop
* ----------------------------------------------------------------
*/
void
ExecReScanNestLoop(NestLoopState *node)
{
2003-08-04 02:43:34 +02:00
PlanState *outerPlan = outerPlanState(node);
/*
* If outerPlan->chgParam is not null then plan will be automatically
* re-scanned by first ExecProcNode.
*/
if (outerPlan->chgParam == NULL)
ExecReScan(outerPlan);
/*
* innerPlan is re-scanned for each new outer tuple and MUST NOT be
* re-scanned from here or you'll get troubles from inner index scans when
* outer Vars are used as run-time keys...
*/
node->nl_NeedNewOuter = true;
node->nl_MatchedOuter = false;
}