Save a few cycles in advance_transition_function().

Keep a pre-initialized FunctionCallInfoData in AggStatePerAggData, and
re-use that at each row instead of doing InitFunctionCallInfoData each
time.  This saves only half a dozen assignments and maybe some stack
manipulation, and yet that seems to be good for a percent or two of the
overall query run time for simple aggregates such as count(*).  The cost
is that the FunctionCallInfoData (which is about a kilobyte, on 64-bit
machines) stays allocated for the duration of the query instead of being
short-lived stack data.  But we're already paying an equivalent space cost
for each regular FuncExpr or OpExpr node, so I don't feel bad about paying
it for aggregate functions.  The code seems a little cleaner this way too,
since the number of things passed to advance_transition_function decreases.
This commit is contained in:
Tom Lane 2014-01-08 13:58:15 -05:00
parent d59ff6c110
commit e6336b8b57
1 changed files with 38 additions and 27 deletions

View File

@ -235,6 +235,14 @@ typedef struct AggStatePerAggData
*/
Tuplesortstate *sortstate; /* sort object, if DISTINCT or ORDER BY */
/*
* This field is a pre-initialized FunctionCallInfo struct used for
* calling this aggregate's transfn. We save a few cycles per row by not
* re-initializing the unchanging fields; which isn't much, but it seems
* worth the extra space consumption.
*/
FunctionCallInfoData transfn_fcinfo;
} AggStatePerAggData;
/*
@ -290,8 +298,7 @@ static void initialize_aggregates(AggState *aggstate,
AggStatePerGroup pergroup);
static void advance_transition_function(AggState *aggstate,
AggStatePerAgg peraggstate,
AggStatePerGroup pergroupstate,
FunctionCallInfoData *fcinfo);
AggStatePerGroup pergroupstate);
static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
static void process_ordered_aggregate_single(AggState *aggstate,
AggStatePerAgg peraggstate,
@ -399,21 +406,20 @@ initialize_aggregates(AggState *aggstate,
* Given new input value(s), advance the transition function of an aggregate.
*
* The new values (and null flags) have been preloaded into argument positions
* 1 and up in fcinfo, so that we needn't copy them again to pass to the
* transition function. No other fields of fcinfo are assumed valid.
* 1 and up in peraggstate->transfn_fcinfo, so that we needn't copy them again
* to pass to the transition function. We also expect that the static fields
* of the fcinfo are already initialized; that was done by ExecInitAgg().
*
* It doesn't matter which memory context this is called in.
*/
static void
advance_transition_function(AggState *aggstate,
AggStatePerAgg peraggstate,
AggStatePerGroup pergroupstate,
FunctionCallInfoData *fcinfo)
AggStatePerGroup pergroupstate)
{
int numTransInputs = peraggstate->numTransInputs;
FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
MemoryContext oldContext;
Datum newVal;
int i;
if (peraggstate->transfn.fn_strict)
{
@ -421,6 +427,9 @@ advance_transition_function(AggState *aggstate,
* For a strict transfn, nothing happens when there's a NULL input; we
* just keep the prior transValue.
*/
int numTransInputs = peraggstate->numTransInputs;
int i;
for (i = 1; i <= numTransInputs; i++)
{
if (fcinfo->argnull[i])
@ -467,12 +476,9 @@ advance_transition_function(AggState *aggstate,
/*
* OK to call the transition function
*/
InitFunctionCallInfoData(*fcinfo, &(peraggstate->transfn),
numTransInputs + 1,
peraggstate->aggCollation,
(void *) aggstate, NULL);
fcinfo->arg[0] = pergroupstate->transValue;
fcinfo->argnull[0] = pergroupstate->transValueIsNull;
fcinfo->isnull = false; /* just in case transfn doesn't set it */
newVal = FunctionCallInvoke(fcinfo);
@ -574,19 +580,18 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
else
{
/* We can apply the transition function immediately */
FunctionCallInfoData fcinfo;
FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
/* Load values into fcinfo */
/* Start from 1, since the 0th arg will be the transition value */
Assert(slot->tts_nvalid >= numTransInputs);
for (i = 0; i < numTransInputs; i++)
{
fcinfo.arg[i + 1] = slot->tts_values[i];
fcinfo.argnull[i + 1] = slot->tts_isnull[i];
fcinfo->arg[i + 1] = slot->tts_values[i];
fcinfo->argnull[i + 1] = slot->tts_isnull[i];
}
advance_transition_function(aggstate, peraggstate, pergroupstate,
&fcinfo);
advance_transition_function(aggstate, peraggstate, pergroupstate);
}
}
}
@ -622,17 +627,17 @@ process_ordered_aggregate_single(AggState *aggstate,
MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
MemoryContext oldContext;
bool isDistinct = (peraggstate->numDistinctCols > 0);
FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
Datum *newVal;
bool *isNull;
FunctionCallInfoData fcinfo;
Assert(peraggstate->numDistinctCols < 2);
tuplesort_performsort(peraggstate->sortstate);
/* Load the column into argument 1 (arg 0 will be transition value) */
newVal = fcinfo.arg + 1;
isNull = fcinfo.argnull + 1;
newVal = fcinfo->arg + 1;
isNull = fcinfo->argnull + 1;
/*
* Note: if input type is pass-by-ref, the datums returned by the sort are
@ -668,8 +673,7 @@ process_ordered_aggregate_single(AggState *aggstate,
}
else
{
advance_transition_function(aggstate, peraggstate, pergroupstate,
&fcinfo);
advance_transition_function(aggstate, peraggstate, pergroupstate);
/* forget the old value, if any */
if (!oldIsNull && !peraggstate->inputtypeByVal)
pfree(DatumGetPointer(oldVal));
@ -704,7 +708,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
AggStatePerGroup pergroupstate)
{
MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
FunctionCallInfoData fcinfo;
FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
TupleTableSlot *slot1 = peraggstate->evalslot;
TupleTableSlot *slot2 = peraggstate->uniqslot;
int numTransInputs = peraggstate->numTransInputs;
@ -739,12 +743,11 @@ process_ordered_aggregate_multi(AggState *aggstate,
/* Start from 1, since the 0th arg will be the transition value */
for (i = 0; i < numTransInputs; i++)
{
fcinfo.arg[i + 1] = slot1->tts_values[i];
fcinfo.argnull[i + 1] = slot1->tts_isnull[i];
fcinfo->arg[i + 1] = slot1->tts_values[i];
fcinfo->argnull[i + 1] = slot1->tts_isnull[i];
}
advance_transition_function(aggstate, peraggstate, pergroupstate,
&fcinfo);
advance_transition_function(aggstate, peraggstate, pergroupstate);
if (numDistinctCols > 0)
{
@ -1799,6 +1802,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
&transfnexpr,
&finalfnexpr);
/* set up infrastructure for calling the transfn and finalfn */
fmgr_info(transfn_oid, &peraggstate->transfn);
fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn);
@ -1810,6 +1814,13 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
peraggstate->aggCollation = aggref->inputcollid;
InitFunctionCallInfoData(peraggstate->transfn_fcinfo,
&peraggstate->transfn,
peraggstate->numTransInputs + 1,
peraggstate->aggCollation,
(void *) aggstate, NULL);
/* get info about relevant datatypes */
get_typlenbyval(aggref->aggtype,
&peraggstate->resulttypeLen,
&peraggstate->resulttypeByVal);