postgresql/src/backend/executor/nodeGroup.c

368 lines
8.9 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* nodeGroup.c
* Routines to handle group nodes (used for queries with GROUP BY clause).
*
2002-06-20 22:29:54 +02:00
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* DESCRIPTION
* The Group node is designed for handling queries with a GROUP BY clause.
* Its outer plan must deliver tuples that are sorted in the order
* specified by the grouping columns (ie. tuples from the same group are
* consecutive). That way, we just have to compare adjacent tuples to
* locate group boundaries.
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.48 2002/11/06 00:00:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "catalog/pg_operator.h"
#include "executor/executor.h"
#include "executor/nodeGroup.h"
#include "parser/parse_oper.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
/*
* ExecGroup -
*
* Return one tuple for each group of matching input tuples.
*/
TupleTableSlot *
ExecGroup(Group *node)
{
GroupState *grpstate;
EState *estate;
ExprContext *econtext;
TupleDesc tupdesc;
HeapTuple outerTuple = NULL;
HeapTuple firsttuple;
TupleTableSlot *outerslot;
ProjectionInfo *projInfo;
TupleTableSlot *resultSlot;
/*
* get state info from node
*/
grpstate = node->grpstate;
if (grpstate->grp_done)
return NULL;
estate = node->plan.state;
econtext = node->grpstate->csstate.cstate.cs_ExprContext;
tupdesc = ExecGetScanType(&grpstate->csstate);
/*
2001-03-22 05:01:46 +01:00
* We need not call ResetExprContext here because execTuplesMatch will
* reset the per-tuple memory context once per input tuple.
*/
/* If we don't already have first tuple of group, fetch it */
/* this should occur on the first call only */
firsttuple = grpstate->grp_firstTuple;
if (firsttuple == NULL)
{
outerslot = ExecProcNode(outerPlan(node), (Plan *) node);
1998-11-27 20:52:36 +01:00
if (TupIsNull(outerslot))
{
grpstate->grp_done = TRUE;
return NULL;
}
1999-05-25 18:15:34 +02:00
grpstate->grp_firstTuple = firsttuple =
heap_copytuple(outerslot->val);
}
/*
* Scan over all tuples that belong to this group
*/
for (;;)
{
outerslot = ExecProcNode(outerPlan(node), (Plan *) node);
1998-11-27 20:52:36 +01:00
if (TupIsNull(outerslot))
{
grpstate->grp_done = TRUE;
1998-11-27 20:52:36 +01:00
outerTuple = NULL;
break;
}
1998-11-27 20:52:36 +01:00
outerTuple = outerslot->val;
/*
* Compare with first tuple and see if this tuple is of the same
* group.
*/
if (!execTuplesMatch(firsttuple, outerTuple,
tupdesc,
node->numCols, node->grpColIdx,
grpstate->eqfunctions,
econtext->ecxt_per_tuple_memory))
break;
}
/*
* form a projection tuple based on the (copied) first tuple of the
* group, and store it in the result tuple slot.
*/
ExecStoreTuple(firsttuple,
grpstate->csstate.css_ScanTupleSlot,
InvalidBuffer,
false);
econtext->ecxt_scantuple = grpstate->csstate.css_ScanTupleSlot;
projInfo = grpstate->csstate.cstate.cs_ProjInfo;
resultSlot = ExecProject(projInfo, NULL);
/* save first tuple of next group, if we are not done yet */
if (!grpstate->grp_done)
{
heap_freetuple(firsttuple);
grpstate->grp_firstTuple = heap_copytuple(outerTuple);
}
return resultSlot;
}
/* -----------------
* ExecInitGroup
*
* Creates the run-time information for the group node produced by the
* planner and initializes its outer subtree
* -----------------
*/
bool
ExecInitGroup(Group *node, EState *estate, Plan *parent)
{
GroupState *grpstate;
Plan *outerPlan;
/*
* assign the node's execution state
*/
node->plan.state = estate;
/*
* create state structure
*/
grpstate = makeNode(GroupState);
node->grpstate = grpstate;
grpstate->grp_useFirstTuple = FALSE;
grpstate->grp_done = FALSE;
grpstate->grp_firstTuple = NULL;
/*
* create expression context
*/
ExecAssignExprContext(estate, &grpstate->csstate.cstate);
#define GROUP_NSLOTS 2
/*
* tuple table initialization
*/
ExecInitScanTupleSlot(estate, &grpstate->csstate);
ExecInitResultTupleSlot(estate, &grpstate->csstate.cstate);
/*
* initializes child nodes
*/
outerPlan = outerPlan(node);
ExecInitNode(outerPlan, estate, (Plan *) node);
/*
* initialize tuple type.
*/
ExecAssignScanTypeFromOuterPlan((Plan *) node, &grpstate->csstate);
/*
* Initialize tuple type for both result and scan. This node does no
* projection
*/
ExecAssignResultTypeFromTL((Plan *) node, &grpstate->csstate.cstate);
ExecAssignProjectionInfo((Plan *) node, &grpstate->csstate.cstate);
/*
* Precompute fmgr lookup data for inner loop
*/
grpstate->eqfunctions =
execTuplesMatchPrepare(ExecGetScanType(&grpstate->csstate),
node->numCols,
node->grpColIdx);
return TRUE;
}
int
ExecCountSlotsGroup(Group *node)
{
return ExecCountSlotsNode(outerPlan(node)) + GROUP_NSLOTS;
}
/* ------------------------
* ExecEndGroup(node)
*
* -----------------------
*/
void
ExecEndGroup(Group *node)
{
GroupState *grpstate;
Plan *outerPlan;
grpstate = node->grpstate;
ExecFreeProjectionInfo(&grpstate->csstate.cstate);
ExecFreeExprContext(&grpstate->csstate.cstate);
outerPlan = outerPlan(node);
ExecEndNode(outerPlan, (Plan *) node);
/* clean up tuple table */
ExecClearTuple(grpstate->csstate.css_ScanTupleSlot);
if (grpstate->grp_firstTuple != NULL)
{
heap_freetuple(grpstate->grp_firstTuple);
grpstate->grp_firstTuple = NULL;
}
}
void
ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent)
{
GroupState *grpstate = node->grpstate;
grpstate->grp_useFirstTuple = FALSE;
grpstate->grp_done = FALSE;
if (grpstate->grp_firstTuple != NULL)
{
heap_freetuple(grpstate->grp_firstTuple);
grpstate->grp_firstTuple = NULL;
}
if (((Plan *) node)->lefttree &&
((Plan *) node)->lefttree->chgParam == NULL)
ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node);
}
/*****************************************************************************
* Code shared with nodeUnique.c and nodeAgg.c
*****************************************************************************/
/*
* execTuplesMatch
* Return true if two tuples match in all the indicated fields.
* This is used to detect group boundaries in nodeGroup and nodeAgg,
* and to decide whether two tuples are distinct or not in nodeUnique.
*
* tuple1, tuple2: the tuples to compare
* tupdesc: tuple descriptor applying to both tuples
* numCols: the number of attributes to be examined
* matchColIdx: array of attribute column numbers
* eqFunctions: array of fmgr lookup info for the equality functions to use
* evalContext: short-term memory context for executing the functions
*
* NB: evalContext is reset each time!
*/
bool
execTuplesMatch(HeapTuple tuple1,
HeapTuple tuple2,
TupleDesc tupdesc,
int numCols,
AttrNumber *matchColIdx,
FmgrInfo *eqfunctions,
MemoryContext evalContext)
{
MemoryContext oldContext;
bool result;
int i;
/* Reset and switch into the temp context. */
MemoryContextReset(evalContext);
oldContext = MemoryContextSwitchTo(evalContext);
/*
* We cannot report a match without checking all the fields, but we
* can report a non-match as soon as we find unequal fields. So,
* start comparing at the last field (least significant sort key).
* That's the most likely to be different if we are dealing with
* sorted input.
*/
result = true;
for (i = numCols; --i >= 0;)
{
AttrNumber att = matchColIdx[i];
Datum attr1,
attr2;
bool isNull1,
isNull2;
attr1 = heap_getattr(tuple1,
att,
tupdesc,
&isNull1);
attr2 = heap_getattr(tuple2,
att,
tupdesc,
&isNull2);
if (isNull1 != isNull2)
{
result = false; /* one null and one not; they aren't equal */
break;
}
if (isNull1)
continue; /* both are null, treat as equal */
/* Apply the type-specific equality function */
2001-03-22 05:01:46 +01:00
if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
attr1, attr2)))
{
result = false; /* they aren't equal */
break;
}
}
MemoryContextSwitchTo(oldContext);
return result;
}
1998-07-16 03:49:19 +02:00
/*
* execTuplesMatchPrepare
* Look up the equality functions needed for execTuplesMatch.
* The result is a palloc'd array.
*/
FmgrInfo *
execTuplesMatchPrepare(TupleDesc tupdesc,
int numCols,
AttrNumber *matchColIdx)
1998-07-16 03:49:19 +02:00
{
FmgrInfo *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
int i;
1998-07-16 03:49:19 +02:00
for (i = 0; i < numCols; i++)
{
AttrNumber att = matchColIdx[i];
Oid typid = tupdesc->attrs[att - 1]->atttypid;
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
Oid eq_function;
eq_function = compatible_oper_funcid(makeList1(makeString("=")),
typid, typid, true);
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
if (!OidIsValid(eq_function))
elog(ERROR, "Unable to identify an equality operator for type %s",
format_type_be(typid));
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
fmgr_info(eq_function, &eqfunctions[i]);
}
return eqfunctions;
1998-07-16 03:49:19 +02:00
}