Cache if PathTarget and RestrictInfos contain volatile functions

Here we aim to reduce duplicate work done by contain_volatile_functions()
by caching whether PathTargets and RestrictInfos contain any volatile
functions the first time contain_volatile_functions() is called for them.
Any future calls for these nodes just use the cached value rather than
going to the trouble of recursively checking the sub-node all over again.
Thanks to Tom Lane for the idea.

Any locations in the code which make changes to a PathTarget or
RestrictInfo which could change the outcome of the volatility check must
change the cached value back to VOLATILITY_UNKNOWN again.
contain_volatile_functions() is the only code in charge of setting the
cache value to either VOLATILITY_VOLATILE or VOLATILITY_NOVOLATILE.

Some existing code does benefit from this additional caching, however,
this change is mainly aimed at an upcoming patch that must check for
volatility during the join search.  Repeated volatility checks in that
case can become very expensive when the join search contains more than a
few relations.

Author: David Rowley
Discussion: https://postgr.es/m/3795226.1614059027@sss.pgh.pa.us
This commit is contained in:
David Rowley 2021-03-29 14:47:05 +13:00
parent b64654d6c4
commit f58b230ed0
8 changed files with 134 additions and 20 deletions

View File

@ -2309,6 +2309,7 @@ _copyRestrictInfo(const RestrictInfo *from)
COPY_SCALAR_FIELD(can_join);
COPY_SCALAR_FIELD(pseudoconstant);
COPY_SCALAR_FIELD(leakproof);
COPY_SCALAR_FIELD(has_volatile);
COPY_SCALAR_FIELD(security_level);
COPY_BITMAPSET_FIELD(clause_relids);
COPY_BITMAPSET_FIELD(required_relids);

View File

@ -2471,6 +2471,7 @@ _outPathTarget(StringInfo str, const PathTarget *node)
WRITE_FLOAT_FIELD(cost.startup, "%.2f");
WRITE_FLOAT_FIELD(cost.per_tuple, "%.2f");
WRITE_INT_FIELD(width);
WRITE_ENUM_FIELD(has_volatile_expr, VolatileFunctionStatus);
}
static void
@ -2495,6 +2496,7 @@ _outRestrictInfo(StringInfo str, const RestrictInfo *node)
WRITE_BOOL_FIELD(can_join);
WRITE_BOOL_FIELD(pseudoconstant);
WRITE_BOOL_FIELD(leakproof);
WRITE_ENUM_FIELD(has_volatile, VolatileFunctionStatus);
WRITE_UINT_FIELD(security_level);
WRITE_BITMAPSET_FIELD(clause_relids);
WRITE_BITMAPSET_FIELD(required_relids);

View File

@ -134,7 +134,8 @@ static void check_output_expressions(Query *subquery,
static void compare_tlist_datatypes(List *tlist, List *colTypes,
pushdown_safety_info *safetyInfo);
static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
static bool qual_is_pushdown_safe(Query *subquery, Index rti,
RestrictInfo *rinfo,
pushdown_safety_info *safetyInfo);
static void subquery_push_qual(Query *subquery,
RangeTblEntry *rte, Index rti, Node *qual);
@ -2177,11 +2178,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
foreach(l, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
Node *clause = (Node *) rinfo->clause;
if (!rinfo->pseudoconstant &&
qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo))
{
Node *clause = (Node *) rinfo->clause;
/* Push it down */
subquery_push_qual(subquery, rte, rti, clause);
}
@ -3390,37 +3392,39 @@ targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
}
/*
* qual_is_pushdown_safe - is a particular qual safe to push down?
* qual_is_pushdown_safe - is a particular rinfo safe to push down?
*
* qual is a restriction clause applying to the given subquery (whose RTE
* rinfo is a restriction clause applying to the given subquery (whose RTE
* has index rti in the parent query).
*
* Conditions checked here:
*
* 1. The qual must not contain any SubPlans (mainly because I'm not sure
* it will work correctly: SubLinks will already have been transformed into
* SubPlans in the qual, but not in the subquery). Note that SubLinks that
* transform to initplans are safe, and will be accepted here because what
* we'll see in the qual is just a Param referencing the initplan output.
* 1. rinfo's clause must not contain any SubPlans (mainly because it's
* unclear that it will work correctly: SubLinks will already have been
* transformed into SubPlans in the qual, but not in the subquery). Note that
* SubLinks that transform to initplans are safe, and will be accepted here
* because what we'll see in the qual is just a Param referencing the initplan
* output.
*
* 2. If unsafeVolatile is set, the qual must not contain any volatile
* 2. If unsafeVolatile is set, rinfo's clause must not contain any volatile
* functions.
*
* 3. If unsafeLeaky is set, the qual must not contain any leaky functions
* that are passed Var nodes, and therefore might reveal values from the
* subquery as side effects.
* 3. If unsafeLeaky is set, rinfo's clause must not contain any leaky
* functions that are passed Var nodes, and therefore might reveal values from
* the subquery as side effects.
*
* 4. The qual must not refer to the whole-row output of the subquery
* 4. rinfo's clause must not refer to the whole-row output of the subquery
* (since there is no easy way to name that within the subquery itself).
*
* 5. The qual must not refer to any subquery output columns that were
* 5. rinfo's clause must not refer to any subquery output columns that were
* found to be unsafe to reference by subquery_is_pushdown_safe().
*/
static bool
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo,
pushdown_safety_info *safetyInfo)
{
bool safe = true;
Node *qual = (Node *) rinfo->clause;
List *vars;
ListCell *vl;
@ -3430,7 +3434,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
/* Refuse volatile quals if we found they'd be unsafe (point 2) */
if (safetyInfo->unsafeVolatile &&
contain_volatile_functions(qual))
contain_volatile_functions((Node *) rinfo))
return false;
/* Refuse leaky quals if told to (point 3) */

View File

@ -2657,7 +2657,7 @@ check_mergejoinable(RestrictInfo *restrictinfo)
leftarg = linitial(((OpExpr *) clause)->args);
if (op_mergejoinable(opno, exprType(leftarg)) &&
!contain_volatile_functions((Node *) clause))
!contain_volatile_functions((Node *) restrictinfo))
restrictinfo->mergeopfamilies = get_mergejoin_opfamilies(opno);
/*
@ -2694,6 +2694,6 @@ check_hashjoinable(RestrictInfo *restrictinfo)
leftarg = linitial(((OpExpr *) clause)->args);
if (op_hashjoinable(opno, exprType(leftarg)) &&
!contain_volatile_functions((Node *) clause))
!contain_volatile_functions((Node *) restrictinfo))
restrictinfo->hashjoinoperator = opno;
}

View File

@ -432,6 +432,16 @@ contain_mutable_functions_walker(Node *node, void *context)
* subsequent planning need not consider volatility within those, since
* the executor won't change its evaluation rules for a SubPlan based on
* volatility.
*
* For some node types, for example, RestrictInfo and PathTarget, we cache
* whether we found any volatile functions or not and reuse that value in any
* future checks for that node. All of the logic for determining if the
* cached value should be set to VOLATILITY_NOVOLATILE or VOLATILITY_VOLATILE
* belongs in this function. Any code which makes changes to these nodes
* which could change the outcome this function must set the cached value back
* to VOLATILITY_UNKNOWN. That allows this function to redetermine the
* correct value during the next call, should we need to redetermine if the
* node contains any volatile functions again in the future.
*/
bool
contain_volatile_functions(Node *clause)
@ -461,6 +471,63 @@ contain_volatile_functions_walker(Node *node, void *context)
return true;
}
if (IsA(node, RestrictInfo))
{
RestrictInfo *rinfo = (RestrictInfo *) node;
/*
* For RestrictInfo, check if we've checked the volatility of it
* before. If so, we can just use the cached value and not bother
* checking it again. Otherwise, check it and cache if whether we
* found any volatile functions.
*/
if (rinfo->has_volatile == VOLATILITY_NOVOLATILE)
return false;
else if (rinfo->has_volatile == VOLATILITY_VOLATILE)
return true;
else
{
bool hasvolatile;
hasvolatile = contain_volatile_functions_walker((Node *) rinfo->clause,
context);
if (hasvolatile)
rinfo->has_volatile = VOLATILITY_VOLATILE;
else
rinfo->has_volatile = VOLATILITY_NOVOLATILE;
return hasvolatile;
}
}
if (IsA(node, PathTarget))
{
PathTarget *target = (PathTarget *) node;
/*
* We also do caching for PathTarget the same as we do above for
* RestrictInfos.
*/
if (target->has_volatile_expr == VOLATILITY_NOVOLATILE)
return false;
else if (target->has_volatile_expr == VOLATILITY_VOLATILE)
return true;
else
{
bool hasvolatile;
hasvolatile = contain_volatile_functions_walker((Node *) target->exprs,
context);
if (hasvolatile)
target->has_volatile_expr = VOLATILITY_VOLATILE;
else
target->has_volatile_expr = VOLATILITY_NOVOLATILE;
return hasvolatile;
}
}
/*
* See notes in contain_mutable_functions_walker about why we treat
* MinMaxExpr, XmlExpr, and CoerceToDomain as immutable, while

View File

@ -137,6 +137,13 @@ make_restrictinfo_internal(PlannerInfo *root,
else
restrictinfo->leakproof = false; /* really, "don't know" */
/*
* Mark volatility as unknown. The contain_volatile_functions function
* will determine if there are any volatile functions when called for the
* first time with this RestrictInfo.
*/
restrictinfo->has_volatile = VOLATILITY_UNKNOWN;
/*
* If it's a binary opclause, set up left/right relids info. In any case
* set up the total clause relids info.

View File

@ -623,6 +623,13 @@ make_pathtarget_from_tlist(List *tlist)
i++;
}
/*
* Mark volatility as unknown. The contain_volatile_functions function
* will determine if there are any volatile functions when called for the
* first time with this PathTarget.
*/
target->has_volatile_expr = VOLATILITY_UNKNOWN;
return target;
}
@ -724,6 +731,16 @@ add_column_to_pathtarget(PathTarget *target, Expr *expr, Index sortgroupref)
target->sortgrouprefs = (Index *) palloc0(nexprs * sizeof(Index));
target->sortgrouprefs[nexprs - 1] = sortgroupref;
}
/*
* Reset has_volatile_expr to UNKNOWN. We just leave it up to
* contain_volatile_functions to set this properly again. Technically we
* could save some effort here and just check the new Expr, but it seems
* better to keep the logic for setting this flag in one location rather
* than duplicating the logic here.
*/
if (target->has_volatile_expr == VOLATILITY_NOVOLATILE)
target->has_volatile_expr = VOLATILITY_UNKNOWN;
}
/*

View File

@ -1055,6 +1055,17 @@ typedef struct PathKey
bool pk_nulls_first; /* do NULLs come before normal values? */
} PathKey;
/*
* VolatileFunctionStatus -- allows nodes to cache their
* contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
* determined.
*/
typedef enum VolatileFunctionStatus
{
VOLATILITY_UNKNOWN = 0,
VOLATILITY_VOLATILE,
VOLATILITY_NOVOLATILE
} VolatileFunctionStatus;
/*
* PathTarget
@ -1086,6 +1097,8 @@ typedef struct PathTarget
Index *sortgrouprefs; /* corresponding sort/group refnos, or 0 */
QualCost cost; /* cost of evaluating the expressions */
int width; /* estimated avg width of result tuples */
VolatileFunctionStatus has_volatile_expr; /* indicates if exprs contain
* any volatile functions. */
} PathTarget;
/* Convenience macro to get a sort/group refno from a PathTarget */
@ -2016,6 +2029,9 @@ typedef struct RestrictInfo
bool leakproof; /* true if known to contain no leaked Vars */
VolatileFunctionStatus has_volatile; /* to indicate if clause contains
* any volatile functions. */
Index security_level; /* see comment above */
/* The set of relids (varnos) actually referenced in the clause: */