Add notion of a "transform function" that can simplify function calls.

Initially, we use this only to eliminate calls to the varchar()
function in cases where the length is not being reduced and, therefore,
the function call is equivalent to a RelabelType operation.  The most
significant effect of this is that we can avoid a table rewrite when
changing a varchar(X) column to a varchar(Y) column, where Y > X.

Noah Misch, reviewed by me and Alexey Klyukin
This commit is contained in:
Robert Haas 2011-06-21 22:15:24 -04:00
parent 771a9f69f7
commit 8f9fe6edce
11 changed files with 2401 additions and 2280 deletions

View File

@ -4337,6 +4337,13 @@
or zero if the function does not have a variadic parameter</entry>
</row>
<row>
<entry><structfield>protransform</structfield></entry>
<entry><type>regproc</type></entry>
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
<entry>Calls to function can be simplified by this other function</entry>
</row>
<row>
<entry><structfield>proisagg</structfield></entry>
<entry><type>bool</type></entry>

View File

@ -304,6 +304,7 @@ ProcedureCreate(const char *procedureName,
values[Anum_pg_proc_procost - 1] = Float4GetDatum(procost);
values[Anum_pg_proc_prorows - 1] = Float4GetDatum(prorows);
values[Anum_pg_proc_provariadic - 1] = ObjectIdGetDatum(variadicType);
values[Anum_pg_proc_protransform - 1] = ObjectIdGetDatum(InvalidOid);
values[Anum_pg_proc_proisagg - 1] = BoolGetDatum(isAgg);
values[Anum_pg_proc_proiswindow - 1] = BoolGetDatum(isWindowFunc);
values[Anum_pg_proc_prosecdef - 1] = BoolGetDatum(security_definer);

View File

@ -56,6 +56,7 @@
#include "nodes/nodeFuncs.h"
#include "nodes/parsenodes.h"
#include "optimizer/clauses.h"
#include "optimizer/planner.h"
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parse_collate.h"
@ -3495,7 +3496,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
{
NewColumnValue *ex = lfirst(l);
ex->exprstate = ExecPrepareExpr((Expr *) ex->expr, estate);
/* expr already planned */
ex->exprstate = ExecInitExpr((Expr *) ex->expr, NULL);
}
notnull_attrs = NIL;
@ -4398,7 +4400,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
newval = (NewColumnValue *) palloc0(sizeof(NewColumnValue));
newval->attnum = attribute.attnum;
newval->expr = defval;
newval->expr = expression_planner(defval);
tab->newvals = lappend(tab->newvals, newval);
tab->rewrite = true;
@ -6707,6 +6709,9 @@ ATPrepAlterColumnType(List **wqueue,
/* Fix collations after all else */
assign_expr_collations(pstate, transform);
/* Plan the expr now so we can accurately assess the need to rewrite. */
transform = (Node *) expression_planner((Expr *) transform);
/*
* Add a work queue item to make ATRewriteTable update the column
* contents.

View File

@ -106,9 +106,9 @@ static List *simplify_and_arguments(List *args,
eval_const_expressions_context *context,
bool *haveNull, bool *forceFalse);
static Node *simplify_boolean_equality(Oid opno, List *args);
static Expr *simplify_function(Oid funcid,
Oid result_type, int32 result_typmod,
Oid result_collid, Oid input_collid, List **args,
static Expr *simplify_function(Expr *oldexpr, Oid funcid,
Oid result_type, int32 result_typmod, Oid result_collid,
Oid input_collid, List **args,
bool has_named_args,
bool allow_inline,
eval_const_expressions_context *context);
@ -2223,7 +2223,8 @@ eval_const_expressions_mutator(Node *node,
* FuncExpr, but not when the node is recognizably a length coercion;
* we want to preserve the typmod in the eventual Const if so.
*/
simple = simplify_function(expr->funcid,
simple = simplify_function((Expr *) expr,
expr->funcid,
expr->funcresulttype, exprTypmod(node),
expr->funccollid,
expr->inputcollid,
@ -2275,7 +2276,8 @@ eval_const_expressions_mutator(Node *node,
* Code for op/func reduction is pretty bulky, so split it out as a
* separate function.
*/
simple = simplify_function(expr->opfuncid,
simple = simplify_function((Expr *) expr,
expr->opfuncid,
expr->opresulttype, -1,
expr->opcollid,
expr->inputcollid,
@ -2372,7 +2374,8 @@ eval_const_expressions_mutator(Node *node,
* Code for op/func reduction is pretty bulky, so split it out as
* a separate function.
*/
simple = simplify_function(expr->opfuncid,
simple = simplify_function((Expr *) expr,
expr->opfuncid,
expr->opresulttype, -1,
expr->opcollid,
expr->inputcollid,
@ -2561,7 +2564,8 @@ eval_const_expressions_mutator(Node *node,
getTypeOutputInfo(exprType((Node *) arg), &outfunc, &outtypisvarlena);
getTypeInputInfo(expr->resulttype, &infunc, &intypioparam);
simple = simplify_function(outfunc,
simple = simplify_function(NULL,
outfunc,
CSTRINGOID, -1,
InvalidOid,
InvalidOid,
@ -2581,7 +2585,8 @@ eval_const_expressions_mutator(Node *node,
Int32GetDatum(-1),
false, true));
simple = simplify_function(infunc,
simple = simplify_function(NULL,
infunc,
expr->resulttype, -1,
expr->resultcollid,
InvalidOid,
@ -3417,11 +3422,15 @@ simplify_boolean_equality(Oid opno, List *args)
* Subroutine for eval_const_expressions: try to simplify a function call
* (which might originally have been an operator; we don't care)
*
* Inputs are the function OID, actual result type OID (which is needed for
* polymorphic functions), result typmod, result collation,
* the input collation to use for the function,
* the pre-simplified argument list, and some flags;
* also the context data for eval_const_expressions.
* Inputs are the original expression (can be NULL), function OID, actual
* result type OID (which is needed for polymorphic functions), result typmod,
* result collation, the input collation to use for the function, the
* pre-simplified argument list, and some flags; also the context data for
* eval_const_expressions. In common cases, several of the arguments could be
* derived from the original expression. Sending them separately avoids
* duplicating NodeTag-specific knowledge, and it's necessary for CoerceViaIO.
* A NULL original expression disables use of transform functions while
* retaining all other behaviors.
*
* Returns a simplified expression if successful, or NULL if cannot
* simplify the function call.
@ -3433,22 +3442,24 @@ simplify_boolean_equality(Oid opno, List *args)
* pass-by-reference, and it may get modified even if simplification fails.
*/
static Expr *
simplify_function(Oid funcid, Oid result_type, int32 result_typmod,
Oid result_collid, Oid input_collid, List **args,
simplify_function(Expr *oldexpr, Oid funcid,
Oid result_type, int32 result_typmod, Oid result_collid,
Oid input_collid, List **args,
bool has_named_args,
bool allow_inline,
eval_const_expressions_context *context)
{
HeapTuple func_tuple;
Expr *newexpr;
Oid transform;
/*
* We have two strategies for simplification: either execute the function
* to deliver a constant result, or expand in-line the body of the
* function definition (which only works for simple SQL-language
* functions, but that is a common case). In either case we need access
* to the function's pg_proc tuple, so fetch it just once to use in both
* attempts.
* We have three strategies for simplification: execute the function to
* deliver a constant result, use a transform function to generate a
* substitute node tree, or expand in-line the body of the function
* definition (which only works for simple SQL-language functions, but
* that is a common case). Each needs access to the function's pg_proc
* tuple, so fetch it just once.
*/
func_tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
if (!HeapTupleIsValid(func_tuple))
@ -3468,6 +3479,40 @@ simplify_function(Oid funcid, Oid result_type, int32 result_typmod,
result_collid, input_collid, *args,
func_tuple, context);
/*
* Some functions calls can be simplified at plan time based on properties
* specific to the function. For example, "varchar(s::varchar(4), 8,
* true)" simplifies to "s::varchar(4)", and "int4mul(n, 1)" could
* simplify to "n". To define such function-specific optimizations, write
* a "transform function" and store its OID in the pg_proc.protransform of
* the primary function. Give each transform function the signature
* "protransform(internal) RETURNS internal". The argument, internally an
* Expr *, is the node representing a call to the primary function. If
* the transform function's study of that node proves that a simplified
* Expr substitutes for all possible concrete calls represented thereby,
* return that simplified Expr. Otherwise, return the NULL pointer.
*
* Currently, the specific Expr nodetag can be FuncExpr, OpExpr or
* DistinctExpr. This list may change in the future. The function should
* check the nodetag and return the NULL pointer for unexpected inputs.
*
* We make no guarantee that PostgreSQL will never call the primary
* function in cases that the transform function would simplify. Ensure
* rigorous equivalence between the simplified expression and an actual
* call to the primary function.
*
* Currently, this facility is undocumented and not exposed to users at
* the SQL level. Core length coercion casts use it to avoid calls
* guaranteed to return their input unchanged. This in turn allows ALTER
* TABLE ALTER TYPE to avoid rewriting tables for some typmod changes. In
* the future, this facility may find other applications, like simplifying
* x*0, x*1, and x+0.
*/
transform = ((Form_pg_proc) GETSTRUCT(func_tuple))->protransform;
if (!newexpr && OidIsValid(transform) && oldexpr)
newexpr = (Expr *) DatumGetPointer(OidFunctionCall1(transform,
PointerGetDatum(oldexpr)));
if (!newexpr && allow_inline)
newexpr = inline_function(funcid, result_type, result_collid,
input_collid, *args,

View File

@ -2278,3 +2278,25 @@ transformFrameOffset(ParseState *pstate, int frameOptions, Node *clause)
return node;
}
/*
* relabel_to_typmod
* Add a RelabelType node that changes just the typmod, and remove all
* now-superfluous RelabelType nodes beneath it.
*/
Node *
relabel_to_typmod(Node *expr, int32 typmod)
{
Oid type = exprType(expr);
Oid coll = exprCollation(expr);
/*
* Strip any existing RelabelType, then add one. This is to preserve the
* invariant of no redundant RelabelTypes.
*/
while (IsA(expr, RelabelType))
expr = (Node *) ((RelabelType *) expr)->arg;
return (Node *) makeRelabelType((Expr *) expr, type, typmod, coll,
COERCE_DONTCARE);
}

View File

@ -18,6 +18,8 @@
#include "access/hash.h"
#include "access/tuptoaster.h"
#include "libpq/pqformat.h"
#include "nodes/nodeFuncs.h"
#include "parser/parse_clause.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "mb/pg_wchar.h"
@ -548,6 +550,38 @@ varcharsend(PG_FUNCTION_ARGS)
}
/*
* Flatten calls to our length coercion function that leave the new maximum
* length >= the previous maximum length. We ignore the isExplicit argument,
* which only affects truncation.
*/
Datum
varchar_transform(PG_FUNCTION_ARGS)
{
FuncExpr *expr = (FuncExpr *) PG_GETARG_POINTER(0);
Node *typmod;
Node *ret = NULL;
if (!IsA(expr, FuncExpr))
PG_RETURN_POINTER(ret);
Assert(list_length(expr->args) == 3);
typmod = lsecond(expr->args);
if (IsA(typmod, Const))
{
Node *source = linitial(expr->args);
int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
int32 old_max = exprTypmod(source) - VARHDRSZ;
int32 new_max = new_typmod - VARHDRSZ;
if (new_max < 0 || (old_max >= 0 && old_max <= new_max))
ret = relabel_to_typmod(source, new_typmod);
}
PG_RETURN_POINTER(ret);
}
/*
* Converts a VARCHAR type to the specified size.
*

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201105231
#define CATALOG_VERSION_NO 201106211
#endif

View File

@ -134,7 +134,7 @@ DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t
DESCR("");
DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 20 0 f f f f f 3 _null_ _null_ ));
DESCR("");
DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 25 0 t f f f f 3 _null_ _null_ ));
DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
DESCR("");
DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
DESCR("");

File diff suppressed because it is too large Load Diff

View File

@ -44,4 +44,6 @@ extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist,
extern Index assignSortGroupRef(TargetEntry *tle, List *tlist);
extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList);
extern Node *relabel_to_typmod(Node *expr, int32 typmod);
#endif /* PARSE_CLAUSE_H */

View File

@ -684,6 +684,7 @@ extern Datum varcharrecv(PG_FUNCTION_ARGS);
extern Datum varcharsend(PG_FUNCTION_ARGS);
extern Datum varchartypmodin(PG_FUNCTION_ARGS);
extern Datum varchartypmodout(PG_FUNCTION_ARGS);
extern Datum varchar_transform(PG_FUNCTION_ARGS);
extern Datum varchar(PG_FUNCTION_ARGS);
/* varlena.c */