postgresql/src/backend/parser/parse_expr.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

3957 lines
108 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* parse_expr.c
* handle expressions in parser
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/parser/parse_expr.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
2003-06-27 19:07:03 +02:00
#include "commands/dbcommands.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
#include "parser/analyze.h"
#include "parser/parse_agg.h"
#include "parser/parse_clause.h"
1999-07-16 07:00:38 +02:00
#include "parser/parse_coerce.h"
#include "parser/parse_collate.h"
#include "parser/parse_expr.h"
#include "parser/parse_func.h"
#include "parser/parse_oper.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
2000-06-15 05:33:12 +02:00
#include "parser/parse_type.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/timestamp.h"
#include "utils/xml.h"
Make operator precedence follow the SQL standard more closely. While the SQL standard is pretty vague on the overall topic of operator precedence (because it never presents a unified BNF for all expressions), it does seem reasonable to conclude from the spec for <boolean value expression> that OR has the lowest precedence, then AND, then NOT, then IS tests, then the six standard comparison operators, then everything else (since any non-boolean operator in a WHERE clause would need to be an argument of one of these). We were only sort of on board with that: most notably, while "<" ">" and "=" had properly low precedence, "<=" ">=" and "<>" were treated as generic operators and so had significantly higher precedence. And "IS" tests were even higher precedence than those, which is very clearly wrong per spec. Another problem was that "foo NOT SOMETHING bar" constructs, such as "x NOT LIKE y", were treated inconsistently because of a bison implementation artifact: they had the documented precedence with respect to operators to their right, but behaved like NOT (i.e., very low priority) with respect to operators to their left. Fixing the precedence issues is just a small matter of rearranging the precedence declarations in gram.y, except for the NOT problem, which requires adding an additional lookahead case in base_yylex() so that we can attach a different token precedence to NOT LIKE and allied two-word operators. The bulk of this patch is not the bug fix per se, but adding logic to parse_expr.c to allow giving warnings if an expression has changed meaning because of these precedence changes. These warnings are off by default and are enabled by the new GUC operator_precedence_warning. It's believed that very few applications will be affected by these changes, but it was agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
/* GUC parameters */
bool Transform_null_equals = false;
Make operator precedence follow the SQL standard more closely. While the SQL standard is pretty vague on the overall topic of operator precedence (because it never presents a unified BNF for all expressions), it does seem reasonable to conclude from the spec for <boolean value expression> that OR has the lowest precedence, then AND, then NOT, then IS tests, then the six standard comparison operators, then everything else (since any non-boolean operator in a WHERE clause would need to be an argument of one of these). We were only sort of on board with that: most notably, while "<" ">" and "=" had properly low precedence, "<=" ">=" and "<>" were treated as generic operators and so had significantly higher precedence. And "IS" tests were even higher precedence than those, which is very clearly wrong per spec. Another problem was that "foo NOT SOMETHING bar" constructs, such as "x NOT LIKE y", were treated inconsistently because of a bison implementation artifact: they had the documented precedence with respect to operators to their right, but behaved like NOT (i.e., very low priority) with respect to operators to their left. Fixing the precedence issues is just a small matter of rearranging the precedence declarations in gram.y, except for the NOT problem, which requires adding an additional lookahead case in base_yylex() so that we can attach a different token precedence to NOT LIKE and allied two-word operators. The bulk of this patch is not the bug fix per se, but adding logic to parse_expr.c to allow giving warnings if an expression has changed meaning because of these precedence changes. These warnings are off by default and are enabled by the new GUC operator_precedence_warning. It's believed that very few applications will be affected by these changes, but it was agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
static Node *transformExprRecurse(ParseState *pstate, Node *expr);
static Node *transformParamRef(ParseState *pstate, ParamRef *pref);
static Node *transformAExprOp(ParseState *pstate, A_Expr *a);
static Node *transformAExprOpAny(ParseState *pstate, A_Expr *a);
static Node *transformAExprOpAll(ParseState *pstate, A_Expr *a);
static Node *transformAExprDistinct(ParseState *pstate, A_Expr *a);
static Node *transformAExprNullIf(ParseState *pstate, A_Expr *a);
static Node *transformAExprIn(ParseState *pstate, A_Expr *a);
static Node *transformAExprBetween(ParseState *pstate, A_Expr *a);
static Node *transformBoolExpr(ParseState *pstate, BoolExpr *a);
static Node *transformFuncCall(ParseState *pstate, FuncCall *fn);
static Node *transformMultiAssignRef(ParseState *pstate, MultiAssignRef *maref);
static Node *transformCaseExpr(ParseState *pstate, CaseExpr *c);
static Node *transformSubLink(ParseState *pstate, SubLink *sublink);
static Node *transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
Oid array_type, Oid element_type, int32 typmod);
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
static Node *transformRowExpr(ParseState *pstate, RowExpr *r, bool allowDefault);
static Node *transformCoalesceExpr(ParseState *pstate, CoalesceExpr *c);
static Node *transformMinMaxExpr(ParseState *pstate, MinMaxExpr *m);
static Node *transformSQLValueFunction(ParseState *pstate,
SQLValueFunction *svf);
static Node *transformXmlExpr(ParseState *pstate, XmlExpr *x);
static Node *transformXmlSerialize(ParseState *pstate, XmlSerialize *xs);
static Node *transformBooleanTest(ParseState *pstate, BooleanTest *b);
static Node *transformCurrentOfExpr(ParseState *pstate, CurrentOfExpr *cexpr);
static Node *transformColumnRef(ParseState *pstate, ColumnRef *cref);
static Node *transformWholeRowRef(ParseState *pstate,
ParseNamespaceItem *nsitem,
int sublevels_up, int location);
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
static Node *transformIndirection(ParseState *pstate, A_Indirection *ind);
static Node *transformTypeCast(ParseState *pstate, TypeCast *tc);
static Node *transformCollateClause(ParseState *pstate, CollateClause *c);
static Node *transformJsonObjectConstructor(ParseState *pstate,
JsonObjectConstructor *ctor);
static Node *transformJsonArrayConstructor(ParseState *pstate,
JsonArrayConstructor *ctor);
static Node *transformJsonArrayQueryConstructor(ParseState *pstate,
JsonArrayQueryConstructor *ctor);
static Node *transformJsonObjectAgg(ParseState *pstate, JsonObjectAgg *agg);
static Node *transformJsonArrayAgg(ParseState *pstate, JsonArrayAgg *agg);
static Node *transformJsonIsPredicate(ParseState *pstate, JsonIsPredicate *pred);
static Node *make_row_comparison_op(ParseState *pstate, List *opname,
List *largs, List *rargs, int location);
static Node *make_row_distinct_op(ParseState *pstate, List *opname,
RowExpr *lrow, RowExpr *rrow, int location);
static Expr *make_distinct_op(ParseState *pstate, List *opname,
Node *ltree, Node *rtree, int location);
static Node *make_nulltest_from_distinct(ParseState *pstate,
A_Expr *distincta, Node *arg);
/*
* transformExpr -
* Analyze and transform expressions. Type checking and type casting is
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
* done here. This processing converts the raw grammar output into
* expression trees with fully determined semantics.
*/
Node *
transformExpr(ParseState *pstate, Node *expr, ParseExprKind exprKind)
{
Node *result;
ParseExprKind sv_expr_kind;
/* Save and restore identity of expression type we're parsing */
Assert(exprKind != EXPR_KIND_NONE);
sv_expr_kind = pstate->p_expr_kind;
pstate->p_expr_kind = exprKind;
result = transformExprRecurse(pstate, expr);
pstate->p_expr_kind = sv_expr_kind;
return result;
}
static Node *
transformExprRecurse(ParseState *pstate, Node *expr)
{
Node *result;
if (expr == NULL)
return NULL;
/* Guard against stack overflow due to overly complex expressions */
check_stack_depth();
switch (nodeTag(expr))
{
case T_ColumnRef:
result = transformColumnRef(pstate, (ColumnRef *) expr);
break;
case T_ParamRef:
result = transformParamRef(pstate, (ParamRef *) expr);
break;
case T_A_Const:
result = (Node *) make_const(pstate, (A_Const *) expr);
break;
case T_A_Indirection:
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
result = transformIndirection(pstate, (A_Indirection *) expr);
break;
case T_A_ArrayExpr:
result = transformArrayExpr(pstate, (A_ArrayExpr *) expr,
InvalidOid, InvalidOid, -1);
break;
case T_TypeCast:
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
result = transformTypeCast(pstate, (TypeCast *) expr);
break;
case T_CollateClause:
result = transformCollateClause(pstate, (CollateClause *) expr);
break;
case T_A_Expr:
{
A_Expr *a = (A_Expr *) expr;
switch (a->kind)
{
case AEXPR_OP:
result = transformAExprOp(pstate, a);
break;
case AEXPR_OP_ANY:
result = transformAExprOpAny(pstate, a);
break;
case AEXPR_OP_ALL:
result = transformAExprOpAll(pstate, a);
break;
case AEXPR_DISTINCT:
case AEXPR_NOT_DISTINCT:
result = transformAExprDistinct(pstate, a);
break;
case AEXPR_NULLIF:
result = transformAExprNullIf(pstate, a);
break;
case AEXPR_IN:
result = transformAExprIn(pstate, a);
break;
case AEXPR_LIKE:
case AEXPR_ILIKE:
case AEXPR_SIMILAR:
/* we can transform these just like AEXPR_OP */
result = transformAExprOp(pstate, a);
break;
case AEXPR_BETWEEN:
case AEXPR_NOT_BETWEEN:
case AEXPR_BETWEEN_SYM:
case AEXPR_NOT_BETWEEN_SYM:
result = transformAExprBetween(pstate, a);
break;
default:
elog(ERROR, "unrecognized A_Expr kind: %d", a->kind);
result = NULL; /* keep compiler quiet */
break;
}
break;
}
case T_BoolExpr:
result = transformBoolExpr(pstate, (BoolExpr *) expr);
break;
case T_FuncCall:
result = transformFuncCall(pstate, (FuncCall *) expr);
break;
case T_MultiAssignRef:
result = transformMultiAssignRef(pstate, (MultiAssignRef *) expr);
break;
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
case T_GroupingFunc:
result = transformGroupingFunc(pstate, (GroupingFunc *) expr);
break;
case T_NamedArgExpr:
{
NamedArgExpr *na = (NamedArgExpr *) expr;
na->arg = (Expr *) transformExprRecurse(pstate, (Node *) na->arg);
result = expr;
break;
}
case T_SubLink:
result = transformSubLink(pstate, (SubLink *) expr);
break;
1998-12-04 16:34:49 +01:00
case T_CaseExpr:
result = transformCaseExpr(pstate, (CaseExpr *) expr);
break;
1998-12-04 16:34:49 +01:00
case T_RowExpr:
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
result = transformRowExpr(pstate, (RowExpr *) expr, false);
break;
case T_CoalesceExpr:
result = transformCoalesceExpr(pstate, (CoalesceExpr *) expr);
break;
case T_MinMaxExpr:
result = transformMinMaxExpr(pstate, (MinMaxExpr *) expr);
break;
case T_SQLValueFunction:
result = transformSQLValueFunction(pstate,
(SQLValueFunction *) expr);
break;
case T_XmlExpr:
result = transformXmlExpr(pstate, (XmlExpr *) expr);
break;
case T_XmlSerialize:
result = transformXmlSerialize(pstate, (XmlSerialize *) expr);
break;
case T_NullTest:
{
NullTest *n = (NullTest *) expr;
n->arg = (Expr *) transformExprRecurse(pstate, (Node *) n->arg);
/* the argument can be any type, so don't coerce it */
n->argisrow = type_is_rowtype(exprType((Node *) n->arg));
result = expr;
break;
}
case T_BooleanTest:
result = transformBooleanTest(pstate, (BooleanTest *) expr);
break;
case T_CurrentOfExpr:
result = transformCurrentOfExpr(pstate, (CurrentOfExpr *) expr);
break;
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
/*
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
* In all places where DEFAULT is legal, the caller should have
* processed it rather than passing it to transformExpr().
*/
case T_SetToDefault:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("DEFAULT is not allowed in this context"),
parser_errposition(pstate,
((SetToDefault *) expr)->location)));
break;
/*
* CaseTestExpr doesn't require any processing; it is only
* injected into parse trees in a fully-formed state.
*
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
* Ordinarily we should not see a Var here, but it is convenient
* for transformJoinUsingClause() to create untransformed operator
* trees containing already-transformed Vars. The best
* alternative would be to deconstruct and reconstruct column
* references, which seems expensively pointless. So allow it.
*/
case T_CaseTestExpr:
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
case T_Var:
1998-03-26 22:08:10 +01:00
{
result = (Node *) expr;
break;
}
case T_JsonObjectConstructor:
result = transformJsonObjectConstructor(pstate, (JsonObjectConstructor *) expr);
break;
case T_JsonArrayConstructor:
result = transformJsonArrayConstructor(pstate, (JsonArrayConstructor *) expr);
break;
case T_JsonArrayQueryConstructor:
result = transformJsonArrayQueryConstructor(pstate, (JsonArrayQueryConstructor *) expr);
break;
case T_JsonObjectAgg:
result = transformJsonObjectAgg(pstate, (JsonObjectAgg *) expr);
break;
case T_JsonArrayAgg:
result = transformJsonArrayAgg(pstate, (JsonArrayAgg *) expr);
break;
case T_JsonIsPredicate:
result = transformJsonIsPredicate(pstate, (JsonIsPredicate *) expr);
break;
default:
/* should not reach here */
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(expr));
result = NULL; /* keep compiler quiet */
break;
}
return result;
}
/*
* helper routine for delivering "column does not exist" error message
*
* (Usually we don't have to work this hard, but the general case of field
* selection from an arbitrary node needs it.)
*/
static void
unknown_attribute(ParseState *pstate, Node *relref, const char *attname,
int location)
{
RangeTblEntry *rte;
if (IsA(relref, Var) &&
((Var *) relref)->varattno == InvalidAttrNumber)
{
/* Reference the RTE by alias not by actual table name */
rte = GetRTEByRangeTablePosn(pstate,
((Var *) relref)->varno,
((Var *) relref)->varlevelsup);
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column %s.%s does not exist",
rte->eref->aliasname, attname),
parser_errposition(pstate, location)));
}
else
{
/* Have to do it by reference to the type of the expression */
Oid relTypeId = exprType(relref);
if (ISCOMPLEX(relTypeId))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" not found in data type %s",
attname, format_type_be(relTypeId)),
parser_errposition(pstate, location)));
else if (relTypeId == RECORDOID)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("could not identify column \"%s\" in record data type",
attname),
parser_errposition(pstate, location)));
else
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("column notation .%s applied to type %s, "
"which is not a composite type",
attname, format_type_be(relTypeId)),
parser_errposition(pstate, location)));
}
}
static Node *
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
transformIndirection(ParseState *pstate, A_Indirection *ind)
{
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
Node *last_srf = pstate->p_last_srf;
Node *result = transformExprRecurse(pstate, ind->arg);
List *subscripts = NIL;
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
int location = exprLocation(result);
ListCell *i;
/*
* We have to split any field-selection operations apart from
* subscripting. Adjacent A_Indices nodes have to be treated as a single
* multidimensional subscript operation.
*/
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
foreach(i, ind->indirection)
{
Node *n = lfirst(i);
if (IsA(n, A_Indices))
subscripts = lappend(subscripts, n);
else if (IsA(n, A_Star))
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("row expansion via \"*\" is not supported here"),
parser_errposition(pstate, location)));
}
else
{
Node *newresult;
Assert(IsA(n, String));
/* process subscripts before this field selection */
if (subscripts)
result = (Node *) transformContainerSubscripts(pstate,
result,
exprType(result),
exprTypmod(result),
subscripts,
Support subscripting of arbitrary types, not only arrays. This patch generalizes the subscripting infrastructure so that any data type can be subscripted, if it provides a handler function to define what that means. Traditional variable-length (varlena) arrays all use array_subscript_handler(), while the existing fixed-length types that support subscripting use raw_array_subscript_handler(). It's expected that other types that want to use subscripting notation will define their own handlers. (This patch provides no such new features, though; it only lays the foundation for them.) To do this, move the parser's semantic processing of subscripts (including coercion to whatever data type is required) into a method callback supplied by the handler. On the execution side, replace the ExecEvalSubscriptingRef* layer of functions with direct calls to callback-supplied execution routines. (Thus, essentially no new run-time overhead should be caused by this patch. Indeed, there is room to remove some overhead by supplying specialized execution routines. This patch does a little bit in that line, but more could be done.) Additional work is required here and there to remove formerly hard-wired assumptions about the result type, collation, etc of a SubscriptingRef expression node; and to remove assumptions that the subscript values must be integers. One useful side-effect of this is that we now have a less squishy mechanism for identifying whether a data type is a "true" array: instead of wiring in weird rules about typlen, we can look to see if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this to be bulletproof, we have to forbid user-defined types from using that handler directly; but there seems no good reason for them to do so. This patch also removes assumptions that the number of subscripts is limited to MAXDIM (6), or indeed has any hard-wired limit. That limit still applies to types handled by array_subscript_handler or raw_array_subscript_handler, but to discourage other dependencies on this constant, I've moved it from c.h to utils/array.h. Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov, Peter Eisentraut, Pavel Stehule Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
false);
subscripts = NIL;
newresult = ParseFuncOrColumn(pstate,
list_make1(n),
list_make1(result),
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
last_srf,
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
NULL,
false,
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
location);
if (newresult == NULL)
unknown_attribute(pstate, result, strVal(n), location);
result = newresult;
}
}
/* process trailing subscripts, if any */
if (subscripts)
result = (Node *) transformContainerSubscripts(pstate,
result,
exprType(result),
exprTypmod(result),
subscripts,
Support subscripting of arbitrary types, not only arrays. This patch generalizes the subscripting infrastructure so that any data type can be subscripted, if it provides a handler function to define what that means. Traditional variable-length (varlena) arrays all use array_subscript_handler(), while the existing fixed-length types that support subscripting use raw_array_subscript_handler(). It's expected that other types that want to use subscripting notation will define their own handlers. (This patch provides no such new features, though; it only lays the foundation for them.) To do this, move the parser's semantic processing of subscripts (including coercion to whatever data type is required) into a method callback supplied by the handler. On the execution side, replace the ExecEvalSubscriptingRef* layer of functions with direct calls to callback-supplied execution routines. (Thus, essentially no new run-time overhead should be caused by this patch. Indeed, there is room to remove some overhead by supplying specialized execution routines. This patch does a little bit in that line, but more could be done.) Additional work is required here and there to remove formerly hard-wired assumptions about the result type, collation, etc of a SubscriptingRef expression node; and to remove assumptions that the subscript values must be integers. One useful side-effect of this is that we now have a less squishy mechanism for identifying whether a data type is a "true" array: instead of wiring in weird rules about typlen, we can look to see if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this to be bulletproof, we have to forbid user-defined types from using that handler directly; but there seems no good reason for them to do so. This patch also removes assumptions that the number of subscripts is limited to MAXDIM (6), or indeed has any hard-wired limit. That limit still applies to types handled by array_subscript_handler or raw_array_subscript_handler, but to discourage other dependencies on this constant, I've moved it from c.h to utils/array.h. Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov, Peter Eisentraut, Pavel Stehule Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
false);
return result;
}
/*
* Transform a ColumnRef.
*
* If you find yourself changing this code, see also ExpandColumnRefStar.
*/
static Node *
transformColumnRef(ParseState *pstate, ColumnRef *cref)
{
Node *node = NULL;
char *nspname = NULL;
char *relname = NULL;
char *colname = NULL;
ParseNamespaceItem *nsitem;
int levels_up;
enum
{
CRERR_NO_COLUMN,
CRERR_NO_RTE,
CRERR_WRONG_DB,
CRERR_TOO_MANY
} crerr = CRERR_NO_COLUMN;
const char *err;
/*
* Check to see if the column reference is in an invalid place within the
* query. We allow column references in most places, except in default
* expressions and partition bound expressions.
*/
err = NULL;
switch (pstate->p_expr_kind)
{
case EXPR_KIND_NONE:
Assert(false); /* can't happen */
break;
case EXPR_KIND_OTHER:
case EXPR_KIND_JOIN_ON:
case EXPR_KIND_JOIN_USING:
case EXPR_KIND_FROM_SUBSELECT:
case EXPR_KIND_FROM_FUNCTION:
case EXPR_KIND_WHERE:
case EXPR_KIND_POLICY:
case EXPR_KIND_HAVING:
case EXPR_KIND_FILTER:
case EXPR_KIND_WINDOW_PARTITION:
case EXPR_KIND_WINDOW_ORDER:
case EXPR_KIND_WINDOW_FRAME_RANGE:
case EXPR_KIND_WINDOW_FRAME_ROWS:
case EXPR_KIND_WINDOW_FRAME_GROUPS:
case EXPR_KIND_SELECT_TARGET:
case EXPR_KIND_INSERT_TARGET:
case EXPR_KIND_UPDATE_SOURCE:
case EXPR_KIND_UPDATE_TARGET:
Add support for MERGE SQL command MERGE performs actions that modify rows in the target table using a source table or query. MERGE provides a single SQL statement that can conditionally INSERT/UPDATE/DELETE rows -- a task that would otherwise require multiple PL statements. For example, MERGE INTO target AS t USING source AS s ON t.tid = s.sid WHEN MATCHED AND t.balance > s.delta THEN UPDATE SET balance = t.balance - s.delta WHEN MATCHED THEN DELETE WHEN NOT MATCHED AND s.delta > 0 THEN INSERT VALUES (s.sid, s.delta) WHEN NOT MATCHED THEN DO NOTHING; MERGE works with regular tables, partitioned tables and inheritance hierarchies, including column and row security enforcement, as well as support for row and statement triggers and transition tables therein. MERGE is optimized for OLTP and is parameterizable, though also useful for large scale ETL/ELT. MERGE is not intended to be used in preference to existing single SQL commands for INSERT, UPDATE or DELETE since there is some overhead. MERGE can be used from PL/pgSQL. MERGE does not support targetting updatable views or foreign tables, and RETURNING clauses are not allowed either. These limitations are likely fixable with sufficient effort. Rewrite rules are also not supported, but it's not clear that we'd want to support them. Author: Pavan Deolasee <pavan.deolasee@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Author: Amit Langote <amitlangote09@gmail.com> Author: Simon Riggs <simon.riggs@enterprisedb.com> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Reviewed-by: Andres Freund <andres@anarazel.de> (earlier versions) Reviewed-by: Peter Geoghegan <pg@bowt.ie> (earlier versions) Reviewed-by: Robert Haas <robertmhaas@gmail.com> (earlier versions) Reviewed-by: Japin Li <japinli@hotmail.com> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Tomas Vondra <tomas.vondra@enterprisedb.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Discussion: https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com Discussion: https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com Discussion: https://postgr.es/m/20201231134736.GA25392@alvherre.pgsql
2022-03-28 16:45:58 +02:00
case EXPR_KIND_MERGE_WHEN:
case EXPR_KIND_GROUP_BY:
case EXPR_KIND_ORDER_BY:
case EXPR_KIND_DISTINCT_ON:
case EXPR_KIND_LIMIT:
case EXPR_KIND_OFFSET:
case EXPR_KIND_RETURNING:
case EXPR_KIND_VALUES:
case EXPR_KIND_VALUES_SINGLE:
case EXPR_KIND_CHECK_CONSTRAINT:
case EXPR_KIND_DOMAIN_CHECK:
case EXPR_KIND_FUNCTION_DEFAULT:
case EXPR_KIND_INDEX_EXPRESSION:
case EXPR_KIND_INDEX_PREDICATE:
Extended statistics on expressions Allow defining extended statistics on expressions, not just just on simple column references. With this commit, expressions are supported by all existing extended statistics kinds, improving the same types of estimates. A simple example may look like this: CREATE TABLE t (a int); CREATE STATISTICS s ON mod(a,10), mod(a,20) FROM t; ANALYZE t; The collected statistics are useful e.g. to estimate queries with those expressions in WHERE or GROUP BY clauses: SELECT * FROM t WHERE mod(a,10) = 0 AND mod(a,20) = 0; SELECT 1 FROM t GROUP BY mod(a,10), mod(a,20); This introduces new internal statistics kind 'e' (expressions) which is built automatically when the statistics object definition includes any expressions. This represents single-expression statistics, as if there was an expression index (but without the index maintenance overhead). The statistics is stored in pg_statistics_ext_data as an array of composite types, which is possible thanks to 79f6a942bd. CREATE STATISTICS allows building statistics on a single expression, in which case in which case it's not possible to specify statistics kinds. A new system view pg_stats_ext_exprs can be used to display expression statistics, similarly to pg_stats and pg_stats_ext views. ALTER TABLE ... ALTER COLUMN ... TYPE now treats indexes the same way it treats indexes, i.e. it drops and recreates the statistics. This means all statistics are reset, and we no longer try to preserve at least the functional dependencies. This should not be a major issue in practice, as the functional dependencies actually rely on per-column statistics, which were always reset anyway. Author: Tomas Vondra Reviewed-by: Justin Pryzby, Dean Rasheed, Zhihong Yu Discussion: https://postgr.es/m/ad7891d2-e90c-b446-9fe2-7419143847d7%40enterprisedb.com
2021-03-26 23:22:01 +01:00
case EXPR_KIND_STATS_EXPRESSION:
case EXPR_KIND_ALTER_COL_TRANSFORM:
case EXPR_KIND_EXECUTE_PARAMETER:
case EXPR_KIND_TRIGGER_WHEN:
case EXPR_KIND_PARTITION_EXPRESSION:
case EXPR_KIND_CALL_ARGUMENT:
case EXPR_KIND_COPY_WHERE:
case EXPR_KIND_GENERATED_COLUMN:
case EXPR_KIND_CYCLE_MARK:
/* okay */
break;
case EXPR_KIND_COLUMN_DEFAULT:
err = _("cannot use column reference in DEFAULT expression");
break;
case EXPR_KIND_PARTITION_BOUND:
err = _("cannot use column reference in partition bound expression");
break;
/*
* There is intentionally no default: case here, so that the
* compiler will warn if we add a new ParseExprKind without
* extending this switch. If we do see an unrecognized value at
* runtime, the behavior will be the same as for EXPR_KIND_OTHER,
* which is sane anyway.
*/
}
if (err)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg_internal("%s", err),
parser_errposition(pstate, cref->location)));
/*
* Give the PreParseColumnRefHook, if any, first shot. If it returns
* non-null then that's all, folks.
*/
if (pstate->p_pre_columnref_hook != NULL)
{
node = pstate->p_pre_columnref_hook(pstate, cref);
if (node != NULL)
return node;
}
/*----------
* The allowed syntaxes are:
*
* A First try to resolve as unqualified column name;
* if no luck, try to resolve as unqualified table name (A.*).
* A.B A is an unqualified table name; B is either a
* column or function name (trying column name first).
* A.B.C schema A, table B, col or func name C.
* A.B.C.D catalog A, schema B, table C, col or func D.
* A.* A is an unqualified table name; means whole-row value.
* A.B.* whole-row value of table B in schema A.
* A.B.C.* whole-row value of table C in schema B in catalog A.
*
* We do not need to cope with bare "*"; that will only be accepted by
* the grammar at the top level of a SELECT list, and transformTargetList
* will take care of it before it ever gets here. Also, "A.*" etc will
* be expanded by transformTargetList if they appear at SELECT top level,
* so here we are only going to see them as function or operator inputs.
*
* Currently, if a catalog name is given then it must equal the current
* database name; we check it here and then discard it.
*----------
*/
switch (list_length(cref->fields))
{
case 1:
{
Node *field1 = (Node *) linitial(cref->fields);
colname = strVal(field1);
/* Try to identify as an unqualified column */
node = colNameToVar(pstate, colname, false, cref->location);
2002-09-04 22:31:48 +02:00
if (node == NULL)
2002-09-04 22:31:48 +02:00
{
/*
* Not known as a column of any range-table entry.
*
* Try to find the name as a relation. Note that only
* relations already entered into the rangetable will be
* recognized.
2002-09-04 22:31:48 +02:00
*
* This is a hack for backwards compatibility with
* PostQUEL-inspired syntax. The preferred form now is
* "rel.*".
2002-09-04 22:31:48 +02:00
*/
nsitem = refnameNamespaceItem(pstate, NULL, colname,
cref->location,
&levels_up);
if (nsitem)
node = transformWholeRowRef(pstate, nsitem, levels_up,
cref->location);
2002-09-04 22:31:48 +02:00
}
break;
}
case 2:
{
Node *field1 = (Node *) linitial(cref->fields);
Node *field2 = (Node *) lsecond(cref->fields);
relname = strVal(field1);
/* Locate the referenced nsitem */
nsitem = refnameNamespaceItem(pstate, nspname, relname,
cref->location,
&levels_up);
if (nsitem == NULL)
{
crerr = CRERR_NO_RTE;
break;
}
/* Whole-row reference? */
if (IsA(field2, A_Star))
{
node = transformWholeRowRef(pstate, nsitem, levels_up,
cref->location);
2002-09-04 22:31:48 +02:00
break;
}
colname = strVal(field2);
/* Try to identify as a column of the nsitem */
node = scanNSItemForColumn(pstate, nsitem, levels_up, colname,
cref->location);
if (node == NULL)
{
/* Try it as a function call on the whole row */
node = transformWholeRowRef(pstate, nsitem, levels_up,
cref->location);
node = ParseFuncOrColumn(pstate,
list_make1(makeString(colname)),
list_make1(node),
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
pstate->p_last_srf,
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
NULL,
false,
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
cref->location);
2002-09-04 22:31:48 +02:00
}
break;
}
case 3:
{
Node *field1 = (Node *) linitial(cref->fields);
Node *field2 = (Node *) lsecond(cref->fields);
Node *field3 = (Node *) lthird(cref->fields);
nspname = strVal(field1);
relname = strVal(field2);
/* Locate the referenced nsitem */
nsitem = refnameNamespaceItem(pstate, nspname, relname,
cref->location,
&levels_up);
if (nsitem == NULL)
{
crerr = CRERR_NO_RTE;
break;
}
/* Whole-row reference? */
if (IsA(field3, A_Star))
2002-09-04 22:31:48 +02:00
{
node = transformWholeRowRef(pstate, nsitem, levels_up,
cref->location);
2002-09-04 22:31:48 +02:00
break;
}
colname = strVal(field3);
/* Try to identify as a column of the nsitem */
node = scanNSItemForColumn(pstate, nsitem, levels_up, colname,
cref->location);
if (node == NULL)
{
/* Try it as a function call on the whole row */
node = transformWholeRowRef(pstate, nsitem, levels_up,
cref->location);
node = ParseFuncOrColumn(pstate,
list_make1(makeString(colname)),
list_make1(node),
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
pstate->p_last_srf,
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
NULL,
false,
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
cref->location);
2002-09-04 22:31:48 +02:00
}
break;
}
case 4:
{
Node *field1 = (Node *) linitial(cref->fields);
Node *field2 = (Node *) lsecond(cref->fields);
Node *field3 = (Node *) lthird(cref->fields);
Node *field4 = (Node *) lfourth(cref->fields);
char *catname;
catname = strVal(field1);
nspname = strVal(field2);
relname = strVal(field3);
2002-09-04 22:31:48 +02:00
/*
* We check the catalog name and then ignore it.
2002-09-04 22:31:48 +02:00
*/
if (strcmp(catname, get_database_name(MyDatabaseId)) != 0)
{
crerr = CRERR_WRONG_DB;
break;
}
/* Locate the referenced nsitem */
nsitem = refnameNamespaceItem(pstate, nspname, relname,
cref->location,
&levels_up);
if (nsitem == NULL)
{
crerr = CRERR_NO_RTE;
break;
}
2002-09-04 22:31:48 +02:00
/* Whole-row reference? */
if (IsA(field4, A_Star))
2002-09-04 22:31:48 +02:00
{
node = transformWholeRowRef(pstate, nsitem, levels_up,
cref->location);
2002-09-04 22:31:48 +02:00
break;
}
colname = strVal(field4);
/* Try to identify as a column of the nsitem */
node = scanNSItemForColumn(pstate, nsitem, levels_up, colname,
cref->location);
if (node == NULL)
{
/* Try it as a function call on the whole row */
node = transformWholeRowRef(pstate, nsitem, levels_up,
cref->location);
node = ParseFuncOrColumn(pstate,
list_make1(makeString(colname)),
list_make1(node),
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
pstate->p_last_srf,
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
NULL,
false,
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
cref->location);
2002-09-04 22:31:48 +02:00
}
break;
}
default:
crerr = CRERR_TOO_MANY; /* too many dotted names */
break;
}
/*
* Now give the PostParseColumnRefHook, if any, a chance. We pass the
* translation-so-far so that it can throw an error if it wishes in the
* case that it has a conflicting interpretation of the ColumnRef. (If it
* just translates anyway, we'll throw an error, because we can't undo
* whatever effects the preceding steps may have had on the pstate.) If it
* returns NULL, use the standard translation, or throw a suitable error
* if there is none.
*/
if (pstate->p_post_columnref_hook != NULL)
{
Node *hookresult;
hookresult = pstate->p_post_columnref_hook(pstate, cref, node);
if (node == NULL)
node = hookresult;
else if (hookresult != NULL)
ereport(ERROR,
(errcode(ERRCODE_AMBIGUOUS_COLUMN),
errmsg("column reference \"%s\" is ambiguous",
NameListToString(cref->fields)),
parser_errposition(pstate, cref->location)));
}
/*
* Throw error if no translation found.
*/
if (node == NULL)
{
switch (crerr)
{
case CRERR_NO_COLUMN:
errorMissingColumn(pstate, relname, colname, cref->location);
break;
case CRERR_NO_RTE:
errorMissingRTE(pstate, makeRangeVar(nspname, relname,
cref->location));
break;
case CRERR_WRONG_DB:
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cross-database references are not implemented: %s",
NameListToString(cref->fields)),
parser_errposition(pstate, cref->location)));
break;
case CRERR_TOO_MANY:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("improper qualified name (too many dotted names): %s",
NameListToString(cref->fields)),
parser_errposition(pstate, cref->location)));
break;
}
}
return node;
}
static Node *
transformParamRef(ParseState *pstate, ParamRef *pref)
{
Node *result;
/*
* The core parser knows nothing about Params. If a hook is supplied,
* call it. If not, or if the hook returns NULL, throw a generic error.
*/
if (pstate->p_paramref_hook != NULL)
result = pstate->p_paramref_hook(pstate, pref);
else
result = NULL;
if (result == NULL)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_PARAMETER),
errmsg("there is no parameter $%d", pref->number),
parser_errposition(pstate, pref->location)));
return result;
}
/* Test whether an a_expr is a plain NULL constant or not */
static bool
exprIsNullConstant(Node *arg)
{
if (arg && IsA(arg, A_Const))
{
A_Const *con = (A_Const *) arg;
if (con->isnull)
return true;
}
return false;
}
static Node *
transformAExprOp(ParseState *pstate, A_Expr *a)
{
Node *lexpr = a->lexpr;
Node *rexpr = a->rexpr;
Node *result;
/*
* Special-case "foo = NULL" and "NULL = foo" for compatibility with
* standards-broken products (like Microsoft's). Turn these into IS NULL
* exprs. (If either side is a CaseTestExpr, then the expression was
* generated internally from a CASE-WHEN expression, and
* transform_null_equals does not apply.)
*/
if (Transform_null_equals &&
list_length(a->name) == 1 &&
strcmp(strVal(linitial(a->name)), "=") == 0 &&
(exprIsNullConstant(lexpr) || exprIsNullConstant(rexpr)) &&
(!IsA(lexpr, CaseTestExpr) && !IsA(rexpr, CaseTestExpr)))
{
NullTest *n = makeNode(NullTest);
n->nulltesttype = IS_NULL;
n->location = a->location;
if (exprIsNullConstant(lexpr))
n->arg = (Expr *) rexpr;
else
n->arg = (Expr *) lexpr;
result = transformExprRecurse(pstate, (Node *) n);
}
else if (lexpr && IsA(lexpr, RowExpr) &&
rexpr && IsA(rexpr, SubLink) &&
((SubLink *) rexpr)->subLinkType == EXPR_SUBLINK)
{
/*
* Convert "row op subselect" into a ROWCOMPARE sublink. Formerly the
* grammar did this, but now that a row construct is allowed anywhere
* in expressions, it's easier to do it here.
*/
SubLink *s = (SubLink *) rexpr;
s->subLinkType = ROWCOMPARE_SUBLINK;
s->testexpr = lexpr;
s->operName = a->name;
s->location = a->location;
result = transformExprRecurse(pstate, (Node *) s);
}
else if (lexpr && IsA(lexpr, RowExpr) &&
rexpr && IsA(rexpr, RowExpr))
{
/* ROW() op ROW() is handled specially */
lexpr = transformExprRecurse(pstate, lexpr);
rexpr = transformExprRecurse(pstate, rexpr);
result = make_row_comparison_op(pstate,
a->name,
2017-02-21 17:33:07 +01:00
castNode(RowExpr, lexpr)->args,
castNode(RowExpr, rexpr)->args,
a->location);
}
else
{
/* Ordinary scalar operator */
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
Node *last_srf = pstate->p_last_srf;
lexpr = transformExprRecurse(pstate, lexpr);
rexpr = transformExprRecurse(pstate, rexpr);
result = (Node *) make_op(pstate,
a->name,
lexpr,
rexpr,
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
last_srf,
a->location);
}
return result;
}
static Node *
transformAExprOpAny(ParseState *pstate, A_Expr *a)
{
Node *lexpr = transformExprRecurse(pstate, a->lexpr);
Node *rexpr = transformExprRecurse(pstate, a->rexpr);
return (Node *) make_scalar_array_op(pstate,
a->name,
true,
lexpr,
rexpr,
a->location);
}
static Node *
transformAExprOpAll(ParseState *pstate, A_Expr *a)
{
Node *lexpr = transformExprRecurse(pstate, a->lexpr);
Node *rexpr = transformExprRecurse(pstate, a->rexpr);
return (Node *) make_scalar_array_op(pstate,
a->name,
false,
lexpr,
rexpr,
a->location);
}
static Node *
transformAExprDistinct(ParseState *pstate, A_Expr *a)
{
Make operator precedence follow the SQL standard more closely. While the SQL standard is pretty vague on the overall topic of operator precedence (because it never presents a unified BNF for all expressions), it does seem reasonable to conclude from the spec for <boolean value expression> that OR has the lowest precedence, then AND, then NOT, then IS tests, then the six standard comparison operators, then everything else (since any non-boolean operator in a WHERE clause would need to be an argument of one of these). We were only sort of on board with that: most notably, while "<" ">" and "=" had properly low precedence, "<=" ">=" and "<>" were treated as generic operators and so had significantly higher precedence. And "IS" tests were even higher precedence than those, which is very clearly wrong per spec. Another problem was that "foo NOT SOMETHING bar" constructs, such as "x NOT LIKE y", were treated inconsistently because of a bison implementation artifact: they had the documented precedence with respect to operators to their right, but behaved like NOT (i.e., very low priority) with respect to operators to their left. Fixing the precedence issues is just a small matter of rearranging the precedence declarations in gram.y, except for the NOT problem, which requires adding an additional lookahead case in base_yylex() so that we can attach a different token precedence to NOT LIKE and allied two-word operators. The bulk of this patch is not the bug fix per se, but adding logic to parse_expr.c to allow giving warnings if an expression has changed meaning because of these precedence changes. These warnings are off by default and are enabled by the new GUC operator_precedence_warning. It's believed that very few applications will be affected by these changes, but it was agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
Node *lexpr = a->lexpr;
Node *rexpr = a->rexpr;
Node *result;
Make operator precedence follow the SQL standard more closely. While the SQL standard is pretty vague on the overall topic of operator precedence (because it never presents a unified BNF for all expressions), it does seem reasonable to conclude from the spec for <boolean value expression> that OR has the lowest precedence, then AND, then NOT, then IS tests, then the six standard comparison operators, then everything else (since any non-boolean operator in a WHERE clause would need to be an argument of one of these). We were only sort of on board with that: most notably, while "<" ">" and "=" had properly low precedence, "<=" ">=" and "<>" were treated as generic operators and so had significantly higher precedence. And "IS" tests were even higher precedence than those, which is very clearly wrong per spec. Another problem was that "foo NOT SOMETHING bar" constructs, such as "x NOT LIKE y", were treated inconsistently because of a bison implementation artifact: they had the documented precedence with respect to operators to their right, but behaved like NOT (i.e., very low priority) with respect to operators to their left. Fixing the precedence issues is just a small matter of rearranging the precedence declarations in gram.y, except for the NOT problem, which requires adding an additional lookahead case in base_yylex() so that we can attach a different token precedence to NOT LIKE and allied two-word operators. The bulk of this patch is not the bug fix per se, but adding logic to parse_expr.c to allow giving warnings if an expression has changed meaning because of these precedence changes. These warnings are off by default and are enabled by the new GUC operator_precedence_warning. It's believed that very few applications will be affected by these changes, but it was agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
/*
* If either input is an undecorated NULL literal, transform to a NullTest
* on the other input. That's simpler to process than a full DistinctExpr,
* and it avoids needing to require that the datatype have an = operator.
*/
if (exprIsNullConstant(rexpr))
return make_nulltest_from_distinct(pstate, a, lexpr);
if (exprIsNullConstant(lexpr))
return make_nulltest_from_distinct(pstate, a, rexpr);
Make operator precedence follow the SQL standard more closely. While the SQL standard is pretty vague on the overall topic of operator precedence (because it never presents a unified BNF for all expressions), it does seem reasonable to conclude from the spec for <boolean value expression> that OR has the lowest precedence, then AND, then NOT, then IS tests, then the six standard comparison operators, then everything else (since any non-boolean operator in a WHERE clause would need to be an argument of one of these). We were only sort of on board with that: most notably, while "<" ">" and "=" had properly low precedence, "<=" ">=" and "<>" were treated as generic operators and so had significantly higher precedence. And "IS" tests were even higher precedence than those, which is very clearly wrong per spec. Another problem was that "foo NOT SOMETHING bar" constructs, such as "x NOT LIKE y", were treated inconsistently because of a bison implementation artifact: they had the documented precedence with respect to operators to their right, but behaved like NOT (i.e., very low priority) with respect to operators to their left. Fixing the precedence issues is just a small matter of rearranging the precedence declarations in gram.y, except for the NOT problem, which requires adding an additional lookahead case in base_yylex() so that we can attach a different token precedence to NOT LIKE and allied two-word operators. The bulk of this patch is not the bug fix per se, but adding logic to parse_expr.c to allow giving warnings if an expression has changed meaning because of these precedence changes. These warnings are off by default and are enabled by the new GUC operator_precedence_warning. It's believed that very few applications will be affected by these changes, but it was agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
lexpr = transformExprRecurse(pstate, lexpr);
rexpr = transformExprRecurse(pstate, rexpr);
if (lexpr && IsA(lexpr, RowExpr) &&
rexpr && IsA(rexpr, RowExpr))
{
/* ROW() op ROW() is handled specially */
result = make_row_distinct_op(pstate, a->name,
(RowExpr *) lexpr,
(RowExpr *) rexpr,
a->location);
}
else
{
/* Ordinary scalar operator */
result = (Node *) make_distinct_op(pstate,
a->name,
lexpr,
rexpr,
a->location);
}
/*
* If it's NOT DISTINCT, we first build a DistinctExpr and then stick a
* NOT on top.
*/
if (a->kind == AEXPR_NOT_DISTINCT)
result = (Node *) makeBoolExpr(NOT_EXPR,
list_make1(result),
a->location);
return result;
}
static Node *
transformAExprNullIf(ParseState *pstate, A_Expr *a)
{
Node *lexpr = transformExprRecurse(pstate, a->lexpr);
Node *rexpr = transformExprRecurse(pstate, a->rexpr);
OpExpr *result;
result = (OpExpr *) make_op(pstate,
a->name,
lexpr,
rexpr,
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
pstate->p_last_srf,
a->location);
/*
* The comparison operator itself should yield boolean ...
*/
if (result->opresulttype != BOOLOID)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("NULLIF requires = operator to yield boolean"),
parser_errposition(pstate, a->location)));
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
if (result->opretset)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
/* translator: %s is name of a SQL construct, eg NULLIF */
errmsg("%s must not return a set", "NULLIF"),
parser_errposition(pstate, a->location)));
/*
* ... but the NullIfExpr will yield the first operand's type.
*/
result->opresulttype = exprType((Node *) linitial(result->args));
/*
* We rely on NullIfExpr and OpExpr being the same struct
*/
NodeSetTag(result, T_NullIfExpr);
return (Node *) result;
}
static Node *
transformAExprIn(ParseState *pstate, A_Expr *a)
{
Node *result = NULL;
Node *lexpr;
List *rexprs;
List *rvars;
List *rnonvars;
bool useOr;
ListCell *l;
/*
* If the operator is <>, combine with AND not OR.
*/
if (strcmp(strVal(linitial(a->name)), "<>") == 0)
useOr = false;
else
useOr = true;
/*
* We try to generate a ScalarArrayOpExpr from IN/NOT IN, but this is only
* possible if there is a suitable array type available. If not, we fall
* back to a boolean condition tree with multiple copies of the lefthand
* expression. Also, any IN-list items that contain Vars are handled as
* separate boolean conditions, because that gives the planner more scope
* for optimization on such clauses.
*
* First step: transform all the inputs, and detect whether any contain
* Vars.
*/
lexpr = transformExprRecurse(pstate, a->lexpr);
rexprs = rvars = rnonvars = NIL;
foreach(l, (List *) a->rexpr)
{
Node *rexpr = transformExprRecurse(pstate, lfirst(l));
rexprs = lappend(rexprs, rexpr);
if (contain_vars_of_level(rexpr, 0))
rvars = lappend(rvars, rexpr);
else
rnonvars = lappend(rnonvars, rexpr);
}
/*
* ScalarArrayOpExpr is only going to be useful if there's more than one
* non-Var righthand item.
*/
if (list_length(rnonvars) > 1)
{
List *allexprs;
Oid scalar_type;
Oid array_type;
/*
* Try to select a common type for the array elements. Note that
* since the LHS' type is first in the list, it will be preferred when
* there is doubt (eg, when all the RHS items are unknown literals).
*
* Note: use list_concat here not lcons, to avoid damaging rnonvars.
*/
allexprs = list_concat(list_make1(lexpr), rnonvars);
scalar_type = select_common_type(pstate, allexprs, NULL, NULL);
Fix failure to validate the result of select_common_type(). Although select_common_type() has a failure-return convention, an apparent successful return just provides a type OID that *might* work as a common supertype; we've not validated that the required casts actually exist. In the mainstream use-cases that doesn't matter, because we'll proceed to invoke coerce_to_common_type() on each input, which will fail appropriately if the proposed common type doesn't actually work. However, a few callers didn't read the (nonexistent) fine print, and thought that if they got back a nonzero OID then the coercions were sure to work. This affects in particular the recently-added "anycompatible" polymorphic types; we might think that a function/operator using such types matches cases it really doesn't. A likely end result of that is unexpected "ambiguous operator" errors, as for example in bug #17387 from James Inform. Another, much older, case is that the parser might try to transform an "x IN (list)" construct to a ScalarArrayOpExpr even when the list elements don't actually have a common supertype. It doesn't seem desirable to add more checking to select_common_type itself, as that'd just slow down the mainstream use-cases. Instead, write a separate function verify_common_type that performs the missing checks, and add a call to that where necessary. Likewise add verify_common_type_from_oids to go with select_common_type_from_oids. Back-patch to v13 where the "anycompatible" types came in. (The symptom complained of in bug #17387 doesn't appear till v14, but that's just because we didn't get around to converting || to use anycompatible till then.) In principle the "x IN (list)" fix could go back all the way, but I'm not currently convinced that it makes much difference in real-world cases, so I won't bother for now. Discussion: https://postgr.es/m/17387-5dfe54b988444963@postgresql.org
2022-01-29 17:41:12 +01:00
/* We have to verify that the selected type actually works */
if (OidIsValid(scalar_type) &&
!verify_common_type(scalar_type, allexprs))
scalar_type = InvalidOid;
/*
* Do we have an array type to use? Aside from the case where there
* isn't one, we don't risk using ScalarArrayOpExpr when the common
* type is RECORD, because the RowExpr comparison logic below can cope
* with some cases of non-identical row types.
*/
if (OidIsValid(scalar_type) && scalar_type != RECORDOID)
array_type = get_array_type(scalar_type);
else
array_type = InvalidOid;
if (array_type != InvalidOid)
{
/*
* OK: coerce all the right-hand non-Var inputs to the common type
* and build an ArrayExpr for them.
*/
List *aexprs;
ArrayExpr *newa;
aexprs = NIL;
foreach(l, rnonvars)
{
Node *rexpr = (Node *) lfirst(l);
rexpr = coerce_to_common_type(pstate, rexpr,
scalar_type,
"IN");
aexprs = lappend(aexprs, rexpr);
}
newa = makeNode(ArrayExpr);
newa->array_typeid = array_type;
/* array_collid will be set by parse_collate.c */
newa->element_typeid = scalar_type;
newa->elements = aexprs;
newa->multidims = false;
newa->location = -1;
result = (Node *) make_scalar_array_op(pstate,
a->name,
useOr,
lexpr,
(Node *) newa,
a->location);
/* Consider only the Vars (if any) in the loop below */
rexprs = rvars;
}
}
/*
* Must do it the hard way, ie, with a boolean expression tree.
*/
foreach(l, rexprs)
{
Node *rexpr = (Node *) lfirst(l);
Node *cmp;
if (IsA(lexpr, RowExpr) &&
IsA(rexpr, RowExpr))
{
/* ROW() op ROW() is handled specially */
cmp = make_row_comparison_op(pstate,
a->name,
copyObject(((RowExpr *) lexpr)->args),
((RowExpr *) rexpr)->args,
a->location);
}
else
{
/* Ordinary scalar operator */
cmp = (Node *) make_op(pstate,
a->name,
copyObject(lexpr),
rexpr,
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
pstate->p_last_srf,
a->location);
}
cmp = coerce_to_boolean(pstate, cmp, "IN");
if (result == NULL)
result = cmp;
else
result = (Node *) makeBoolExpr(useOr ? OR_EXPR : AND_EXPR,
list_make2(result, cmp),
a->location);
}
return result;
}
static Node *
transformAExprBetween(ParseState *pstate, A_Expr *a)
{
Node *aexpr;
Node *bexpr;
Node *cexpr;
Node *result;
Node *sub1;
Node *sub2;
List *args;
/* Deconstruct A_Expr into three subexprs */
aexpr = a->lexpr;
2017-02-21 17:33:07 +01:00
args = castNode(List, a->rexpr);
Assert(list_length(args) == 2);
bexpr = (Node *) linitial(args);
cexpr = (Node *) lsecond(args);
/*
* Build the equivalent comparison expression. Make copies of
* multiply-referenced subexpressions for safety. (XXX this is really
* wrong since it results in multiple runtime evaluations of what may be
* volatile expressions ...)
*
* Ideally we would not use hard-wired operators here but instead use
* opclasses. However, mixed data types and other issues make this
* difficult:
* http://archives.postgresql.org/pgsql-hackers/2008-08/msg01142.php
*/
switch (a->kind)
{
case AEXPR_BETWEEN:
args = list_make2(makeSimpleA_Expr(AEXPR_OP, ">=",
aexpr, bexpr,
a->location),
makeSimpleA_Expr(AEXPR_OP, "<=",
copyObject(aexpr), cexpr,
a->location));
result = (Node *) makeBoolExpr(AND_EXPR, args, a->location);
break;
case AEXPR_NOT_BETWEEN:
args = list_make2(makeSimpleA_Expr(AEXPR_OP, "<",
aexpr, bexpr,
a->location),
makeSimpleA_Expr(AEXPR_OP, ">",
copyObject(aexpr), cexpr,
a->location));
result = (Node *) makeBoolExpr(OR_EXPR, args, a->location);
break;
case AEXPR_BETWEEN_SYM:
args = list_make2(makeSimpleA_Expr(AEXPR_OP, ">=",
aexpr, bexpr,
a->location),
makeSimpleA_Expr(AEXPR_OP, "<=",
copyObject(aexpr), cexpr,
a->location));
sub1 = (Node *) makeBoolExpr(AND_EXPR, args, a->location);
args = list_make2(makeSimpleA_Expr(AEXPR_OP, ">=",
copyObject(aexpr), copyObject(cexpr),
a->location),
makeSimpleA_Expr(AEXPR_OP, "<=",
copyObject(aexpr), copyObject(bexpr),
a->location));
sub2 = (Node *) makeBoolExpr(AND_EXPR, args, a->location);
args = list_make2(sub1, sub2);
result = (Node *) makeBoolExpr(OR_EXPR, args, a->location);
break;
case AEXPR_NOT_BETWEEN_SYM:
args = list_make2(makeSimpleA_Expr(AEXPR_OP, "<",
aexpr, bexpr,
a->location),
makeSimpleA_Expr(AEXPR_OP, ">",
copyObject(aexpr), cexpr,
a->location));
sub1 = (Node *) makeBoolExpr(OR_EXPR, args, a->location);
args = list_make2(makeSimpleA_Expr(AEXPR_OP, "<",
copyObject(aexpr), copyObject(cexpr),
a->location),
makeSimpleA_Expr(AEXPR_OP, ">",
copyObject(aexpr), copyObject(bexpr),
a->location));
sub2 = (Node *) makeBoolExpr(OR_EXPR, args, a->location);
args = list_make2(sub1, sub2);
result = (Node *) makeBoolExpr(AND_EXPR, args, a->location);
break;
default:
elog(ERROR, "unrecognized A_Expr kind: %d", a->kind);
result = NULL; /* keep compiler quiet */
break;
}
return transformExprRecurse(pstate, result);
}
static Node *
transformBoolExpr(ParseState *pstate, BoolExpr *a)
{
List *args = NIL;
const char *opname;
ListCell *lc;
switch (a->boolop)
{
case AND_EXPR:
opname = "AND";
break;
case OR_EXPR:
opname = "OR";
break;
case NOT_EXPR:
opname = "NOT";
break;
default:
elog(ERROR, "unrecognized boolop: %d", (int) a->boolop);
opname = NULL; /* keep compiler quiet */
break;
}
foreach(lc, a->args)
{
Node *arg = (Node *) lfirst(lc);
arg = transformExprRecurse(pstate, arg);
arg = coerce_to_boolean(pstate, arg, opname);
args = lappend(args, arg);
}
return (Node *) makeBoolExpr(a->boolop, args, a->location);
}
static Node *
transformFuncCall(ParseState *pstate, FuncCall *fn)
{
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
Node *last_srf = pstate->p_last_srf;
List *targs;
ListCell *args;
/* Transform the list of arguments ... */
targs = NIL;
foreach(args, fn->args)
{
targs = lappend(targs, transformExprRecurse(pstate,
(Node *) lfirst(args)));
}
/*
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
* When WITHIN GROUP is used, we treat its ORDER BY expressions as
* additional arguments to the function, for purposes of function lookup
* and argument type coercion. So, transform each such expression and add
* them to the targs list. We don't explicitly mark where each argument
* came from, but ParseFuncOrColumn can tell what's what by reference to
* list_length(fn->agg_order).
*/
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
if (fn->agg_within_group)
{
Assert(fn->agg_order != NIL);
foreach(args, fn->agg_order)
{
SortBy *arg = (SortBy *) lfirst(args);
targs = lappend(targs, transformExpr(pstate, arg->node,
EXPR_KIND_ORDER_BY));
}
}
/* ... and hand off to ParseFuncOrColumn */
return ParseFuncOrColumn(pstate,
fn->funcname,
targs,
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
last_srf,
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
fn,
false,
fn->location);
}
static Node *
transformMultiAssignRef(ParseState *pstate, MultiAssignRef *maref)
{
SubLink *sublink;
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
RowExpr *rexpr;
Query *qtree;
TargetEntry *tle;
/* We should only see this in first-stage processing of UPDATE tlists */
Assert(pstate->p_expr_kind == EXPR_KIND_UPDATE_SOURCE);
/* We only need to transform the source if this is the first column */
if (maref->colno == 1)
{
/*
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
* For now, we only allow EXPR SubLinks and RowExprs as the source of
* an UPDATE multiassignment. This is sufficient to cover interesting
* cases; at worst, someone would have to write (SELECT * FROM expr)
* to expand a composite-returning expression of another form.
*/
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
if (IsA(maref->source, SubLink) &&
((SubLink *) maref->source)->subLinkType == EXPR_SUBLINK)
{
/* Relabel it as a MULTIEXPR_SUBLINK */
sublink = (SubLink *) maref->source;
sublink->subLinkType = MULTIEXPR_SUBLINK;
/* And transform it */
sublink = (SubLink *) transformExprRecurse(pstate,
(Node *) sublink);
qtree = castNode(Query, sublink->subselect);
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
/* Check subquery returns required number of columns */
if (count_nonjunk_tlist_entries(qtree->targetList) != maref->ncolumns)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("number of columns does not match number of values"),
parser_errposition(pstate, sublink->location)));
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
/*
* Build a resjunk tlist item containing the MULTIEXPR SubLink,
* and add it to pstate->p_multiassign_exprs, whence it will later
* get appended to the completed targetlist. We needn't worry
* about selecting a resno for it; transformUpdateStmt will do
* that.
*/
tle = makeTargetEntry((Expr *) sublink, 0, NULL, true);
pstate->p_multiassign_exprs = lappend(pstate->p_multiassign_exprs,
tle);
/*
* Assign a unique-within-this-targetlist ID to the MULTIEXPR
* SubLink. We can just use its position in the
* p_multiassign_exprs list.
*/
sublink->subLinkId = list_length(pstate->p_multiassign_exprs);
}
else if (IsA(maref->source, RowExpr))
{
/* Transform the RowExpr, allowing SetToDefault items */
rexpr = (RowExpr *) transformRowExpr(pstate,
(RowExpr *) maref->source,
true);
/* Check it returns required number of columns */
if (list_length(rexpr->args) != maref->ncolumns)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("number of columns does not match number of values"),
parser_errposition(pstate, rexpr->location)));
/*
* Temporarily append it to p_multiassign_exprs, so we can get it
* back when we come back here for additional columns.
*/
tle = makeTargetEntry((Expr *) rexpr, 0, NULL, true);
pstate->p_multiassign_exprs = lappend(pstate->p_multiassign_exprs,
tle);
}
else
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("source for a multiple-column UPDATE item must be a sub-SELECT or ROW() expression"),
parser_errposition(pstate, exprLocation(maref->source))));
}
else
{
/*
* Second or later column in a multiassignment. Re-fetch the
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
* transformed SubLink or RowExpr, which we assume is still the last
* entry in p_multiassign_exprs.
*/
Assert(pstate->p_multiassign_exprs != NIL);
tle = (TargetEntry *) llast(pstate->p_multiassign_exprs);
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
}
/*
* Emit the appropriate output expression for the current column
*/
if (IsA(tle->expr, SubLink))
{
Param *param;
sublink = (SubLink *) tle->expr;
Assert(sublink->subLinkType == MULTIEXPR_SUBLINK);
qtree = castNode(Query, sublink->subselect);
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
/* Build a Param representing the current subquery output column */
tle = (TargetEntry *) list_nth(qtree->targetList, maref->colno - 1);
Assert(!tle->resjunk);
param = makeNode(Param);
param->paramkind = PARAM_MULTIEXPR;
param->paramid = (sublink->subLinkId << 16) | maref->colno;
param->paramtype = exprType((Node *) tle->expr);
param->paramtypmod = exprTypmod((Node *) tle->expr);
param->paramcollid = exprCollation((Node *) tle->expr);
param->location = exprLocation((Node *) tle->expr);
return (Node *) param;
}
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
if (IsA(tle->expr, RowExpr))
{
Node *result;
rexpr = (RowExpr *) tle->expr;
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
/* Just extract and return the next element of the RowExpr */
result = (Node *) list_nth(rexpr->args, maref->colno - 1);
/*
* If we're at the last column, delete the RowExpr from
* p_multiassign_exprs; we don't need it anymore, and don't want it in
* the finished UPDATE tlist. We assume this is still the last entry
* in p_multiassign_exprs.
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
*/
if (maref->colno == maref->ncolumns)
pstate->p_multiassign_exprs =
list_delete_last(pstate->p_multiassign_exprs);
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
return result;
}
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
elog(ERROR, "unexpected expr type in multiassign list");
return NULL; /* keep compiler quiet */
}
static Node *
transformCaseExpr(ParseState *pstate, CaseExpr *c)
{
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
CaseExpr *newc = makeNode(CaseExpr);
Node *last_srf = pstate->p_last_srf;
Node *arg;
CaseTestExpr *placeholder;
List *newargs;
List *resultexprs;
ListCell *l;
Node *defresult;
Oid ptype;
/* transform the test expression, if any */
arg = transformExprRecurse(pstate, (Node *) c->arg);
/* generate placeholder for test expression */
if (arg)
{
/*
* If test expression is an untyped literal, force it to text. We have
* to do something now because we won't be able to do this coercion on
* the placeholder. This is not as flexible as what was done in 7.4
* and before, but it's good enough to handle the sort of silly coding
* commonly seen.
*/
if (exprType(arg) == UNKNOWNOID)
arg = coerce_to_common_type(pstate, arg, TEXTOID, "CASE");
/*
* Run collation assignment on the test expression so that we know
* what collation to mark the placeholder with. In principle we could
* leave it to parse_collate.c to do that later, but propagating the
* result to the CaseTestExpr would be unnecessarily complicated.
*/
assign_expr_collations(pstate, arg);
placeholder = makeNode(CaseTestExpr);
placeholder->typeId = exprType(arg);
placeholder->typeMod = exprTypmod(arg);
placeholder->collation = exprCollation(arg);
}
else
placeholder = NULL;
newc->arg = (Expr *) arg;
/* transform the list of arguments */
newargs = NIL;
resultexprs = NIL;
foreach(l, c->args)
{
CaseWhen *w = lfirst_node(CaseWhen, l);
CaseWhen *neww = makeNode(CaseWhen);
Node *warg;
warg = (Node *) w->expr;
if (placeholder)
{
/* shorthand form was specified, so expand... */
warg = (Node *) makeSimpleA_Expr(AEXPR_OP, "=",
(Node *) placeholder,
warg,
w->location);
}
neww->expr = (Expr *) transformExprRecurse(pstate, warg);
neww->expr = (Expr *) coerce_to_boolean(pstate,
(Node *) neww->expr,
"CASE/WHEN");
warg = (Node *) w->result;
neww->result = (Expr *) transformExprRecurse(pstate, warg);
neww->location = w->location;
newargs = lappend(newargs, neww);
resultexprs = lappend(resultexprs, neww->result);
}
newc->args = newargs;
/* transform the default clause */
defresult = (Node *) c->defresult;
if (defresult == NULL)
{
A_Const *n = makeNode(A_Const);
n->isnull = true;
n->location = -1;
defresult = (Node *) n;
}
newc->defresult = (Expr *) transformExprRecurse(pstate, defresult);
/*
* Note: default result is considered the most significant type in
* determining preferred type. This is how the code worked before, but it
* seems a little bogus to me --- tgl
*/
resultexprs = lcons(newc->defresult, resultexprs);
ptype = select_common_type(pstate, resultexprs, "CASE", NULL);
Assert(OidIsValid(ptype));
newc->casetype = ptype;
/* casecollid will be set by parse_collate.c */
/* Convert default result clause, if necessary */
newc->defresult = (Expr *)
coerce_to_common_type(pstate,
(Node *) newc->defresult,
ptype,
"CASE/ELSE");
/* Convert when-clause results, if necessary */
foreach(l, newc->args)
{
CaseWhen *w = (CaseWhen *) lfirst(l);
w->result = (Expr *)
coerce_to_common_type(pstate,
(Node *) w->result,
ptype,
"CASE/WHEN");
}
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
/* if any subexpression contained a SRF, complain */
if (pstate->p_last_srf != last_srf)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/* translator: %s is name of a SQL construct, eg GROUP BY */
errmsg("set-returning functions are not allowed in %s",
"CASE"),
errhint("You might be able to move the set-returning function into a LATERAL FROM item."),
parser_errposition(pstate,
exprLocation(pstate->p_last_srf))));
newc->location = c->location;
return (Node *) newc;
}
static Node *
transformSubLink(ParseState *pstate, SubLink *sublink)
{
Node *result = (Node *) sublink;
Query *qtree;
const char *err;
/*
* Check to see if the sublink is in an invalid place within the query. We
* allow sublinks everywhere in SELECT/INSERT/UPDATE/DELETE/MERGE, but
* generally not in utility statements.
*/
err = NULL;
switch (pstate->p_expr_kind)
{
case EXPR_KIND_NONE:
Assert(false); /* can't happen */
break;
case EXPR_KIND_OTHER:
/* Accept sublink here; caller must throw error if wanted */
break;
case EXPR_KIND_JOIN_ON:
case EXPR_KIND_JOIN_USING:
case EXPR_KIND_FROM_SUBSELECT:
case EXPR_KIND_FROM_FUNCTION:
case EXPR_KIND_WHERE:
case EXPR_KIND_POLICY:
case EXPR_KIND_HAVING:
case EXPR_KIND_FILTER:
case EXPR_KIND_WINDOW_PARTITION:
case EXPR_KIND_WINDOW_ORDER:
case EXPR_KIND_WINDOW_FRAME_RANGE:
case EXPR_KIND_WINDOW_FRAME_ROWS:
Support all SQL:2011 options for window frame clauses. This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING" frame boundaries in window functions. We'd punted on that back in the original patch to add window functions, because it was not clear how to do it in a reasonably data-type-extensible fashion. That problem is resolved here by adding the ability for btree operator classes to provide an "in_range" support function that defines how to add or subtract the RANGE offset value. Factoring it this way also allows the operator class to avoid overflow problems near the ends of the datatype's range, if it wishes to expend effort on that. (In the committed patch, the integer opclasses handle that issue, but it did not seem worth the trouble to avoid overflow failures for datetime types.) The patch includes in_range support for the integer_ops opfamily (int2/int4/int8) as well as the standard datetime types. Support for other numeric types has been requested, but that seems like suitable material for a follow-on patch. In addition, the patch adds GROUPS mode which counts the offset in ORDER-BY peer groups rather than rows, and it adds the frame_exclusion options specified by SQL:2011. As far as I can see, we are now fully up to spec on window framing options. Existing behaviors remain unchanged, except that I changed the errcode for a couple of existing error reports to meet the SQL spec's expectation that negative "offset" values should be reported as SQLSTATE 22013. Internally and in relevant parts of the documentation, we now consistently use the terminology "offset PRECEDING/FOLLOWING" rather than "value PRECEDING/FOLLOWING", since the term "value" is confusingly vague. Oliver Ford, reviewed and whacked around some by me Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
2018-02-07 06:06:50 +01:00
case EXPR_KIND_WINDOW_FRAME_GROUPS:
case EXPR_KIND_SELECT_TARGET:
case EXPR_KIND_INSERT_TARGET:
case EXPR_KIND_UPDATE_SOURCE:
case EXPR_KIND_UPDATE_TARGET:
Add support for MERGE SQL command MERGE performs actions that modify rows in the target table using a source table or query. MERGE provides a single SQL statement that can conditionally INSERT/UPDATE/DELETE rows -- a task that would otherwise require multiple PL statements. For example, MERGE INTO target AS t USING source AS s ON t.tid = s.sid WHEN MATCHED AND t.balance > s.delta THEN UPDATE SET balance = t.balance - s.delta WHEN MATCHED THEN DELETE WHEN NOT MATCHED AND s.delta > 0 THEN INSERT VALUES (s.sid, s.delta) WHEN NOT MATCHED THEN DO NOTHING; MERGE works with regular tables, partitioned tables and inheritance hierarchies, including column and row security enforcement, as well as support for row and statement triggers and transition tables therein. MERGE is optimized for OLTP and is parameterizable, though also useful for large scale ETL/ELT. MERGE is not intended to be used in preference to existing single SQL commands for INSERT, UPDATE or DELETE since there is some overhead. MERGE can be used from PL/pgSQL. MERGE does not support targetting updatable views or foreign tables, and RETURNING clauses are not allowed either. These limitations are likely fixable with sufficient effort. Rewrite rules are also not supported, but it's not clear that we'd want to support them. Author: Pavan Deolasee <pavan.deolasee@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Author: Amit Langote <amitlangote09@gmail.com> Author: Simon Riggs <simon.riggs@enterprisedb.com> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Reviewed-by: Andres Freund <andres@anarazel.de> (earlier versions) Reviewed-by: Peter Geoghegan <pg@bowt.ie> (earlier versions) Reviewed-by: Robert Haas <robertmhaas@gmail.com> (earlier versions) Reviewed-by: Japin Li <japinli@hotmail.com> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Tomas Vondra <tomas.vondra@enterprisedb.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Discussion: https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com Discussion: https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com Discussion: https://postgr.es/m/20201231134736.GA25392@alvherre.pgsql
2022-03-28 16:45:58 +02:00
case EXPR_KIND_MERGE_WHEN:
case EXPR_KIND_GROUP_BY:
case EXPR_KIND_ORDER_BY:
case EXPR_KIND_DISTINCT_ON:
case EXPR_KIND_LIMIT:
case EXPR_KIND_OFFSET:
case EXPR_KIND_RETURNING:
case EXPR_KIND_VALUES:
case EXPR_KIND_VALUES_SINGLE:
case EXPR_KIND_CYCLE_MARK:
/* okay */
break;
case EXPR_KIND_CHECK_CONSTRAINT:
case EXPR_KIND_DOMAIN_CHECK:
2013-01-05 14:25:21 +01:00
err = _("cannot use subquery in check constraint");
break;
case EXPR_KIND_COLUMN_DEFAULT:
case EXPR_KIND_FUNCTION_DEFAULT:
err = _("cannot use subquery in DEFAULT expression");
break;
case EXPR_KIND_INDEX_EXPRESSION:
err = _("cannot use subquery in index expression");
break;
case EXPR_KIND_INDEX_PREDICATE:
err = _("cannot use subquery in index predicate");
break;
Extended statistics on expressions Allow defining extended statistics on expressions, not just just on simple column references. With this commit, expressions are supported by all existing extended statistics kinds, improving the same types of estimates. A simple example may look like this: CREATE TABLE t (a int); CREATE STATISTICS s ON mod(a,10), mod(a,20) FROM t; ANALYZE t; The collected statistics are useful e.g. to estimate queries with those expressions in WHERE or GROUP BY clauses: SELECT * FROM t WHERE mod(a,10) = 0 AND mod(a,20) = 0; SELECT 1 FROM t GROUP BY mod(a,10), mod(a,20); This introduces new internal statistics kind 'e' (expressions) which is built automatically when the statistics object definition includes any expressions. This represents single-expression statistics, as if there was an expression index (but without the index maintenance overhead). The statistics is stored in pg_statistics_ext_data as an array of composite types, which is possible thanks to 79f6a942bd. CREATE STATISTICS allows building statistics on a single expression, in which case in which case it's not possible to specify statistics kinds. A new system view pg_stats_ext_exprs can be used to display expression statistics, similarly to pg_stats and pg_stats_ext views. ALTER TABLE ... ALTER COLUMN ... TYPE now treats indexes the same way it treats indexes, i.e. it drops and recreates the statistics. This means all statistics are reset, and we no longer try to preserve at least the functional dependencies. This should not be a major issue in practice, as the functional dependencies actually rely on per-column statistics, which were always reset anyway. Author: Tomas Vondra Reviewed-by: Justin Pryzby, Dean Rasheed, Zhihong Yu Discussion: https://postgr.es/m/ad7891d2-e90c-b446-9fe2-7419143847d7%40enterprisedb.com
2021-03-26 23:22:01 +01:00
case EXPR_KIND_STATS_EXPRESSION:
err = _("cannot use subquery in statistics expression");
break;
case EXPR_KIND_ALTER_COL_TRANSFORM:
err = _("cannot use subquery in transform expression");
break;
case EXPR_KIND_EXECUTE_PARAMETER:
err = _("cannot use subquery in EXECUTE parameter");
break;
case EXPR_KIND_TRIGGER_WHEN:
err = _("cannot use subquery in trigger WHEN condition");
break;
case EXPR_KIND_PARTITION_BOUND:
err = _("cannot use subquery in partition bound");
break;
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
case EXPR_KIND_PARTITION_EXPRESSION:
err = _("cannot use subquery in partition key expression");
break;
case EXPR_KIND_CALL_ARGUMENT:
err = _("cannot use subquery in CALL argument");
break;
case EXPR_KIND_COPY_WHERE:
err = _("cannot use subquery in COPY FROM WHERE condition");
break;
case EXPR_KIND_GENERATED_COLUMN:
err = _("cannot use subquery in column generation expression");
break;
/*
* There is intentionally no default: case here, so that the
* compiler will warn if we add a new ParseExprKind without
* extending this switch. If we do see an unrecognized value at
* runtime, the behavior will be the same as for EXPR_KIND_OTHER,
* which is sane anyway.
*/
}
if (err)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg_internal("%s", err),
parser_errposition(pstate, sublink->location)));
pstate->p_hasSubLinks = true;
/*
* OK, let's transform the sub-SELECT.
*/
Change unknown-type literals to type text in SELECT and RETURNING lists. Previously, we left such literals alone if the query or subquery had no properties forcing a type decision to be made (such as an ORDER BY or DISTINCT clause using that output column). This meant that "unknown" could be an exposed output column type, which has never been a great idea because it could result in strange failures later on. For example, an outer query that tried to do any operations on an unknown-type subquery output would generally fail with some weird error like "failed to find conversion function from unknown to text" or "could not determine which collation to use for string comparison". Also, if the case occurred in a CREATE VIEW's query then the view would have an unknown-type column, causing similar failures in queries trying to use the view. To fix, at the tail end of parse analysis of a query, forcibly convert any remaining "unknown" literals in its SELECT or RETURNING list to type text. However, provide a switch to suppress that, and use it in the cases of SELECT inside a set operation or INSERT command. In those cases we already had type resolution rules that make use of context information from outside the subquery proper, and we don't want to change that behavior. Also, change creation of an unknown-type column in a relation from a warning to a hard error. The error should be unreachable now in CREATE VIEW or CREATE MATVIEW, but it's still possible to explicitly say "unknown" in CREATE TABLE or CREATE (composite) TYPE. We want to forbid that because it's nothing but a foot-gun. This change creates a pg_upgrade failure case: a matview that contains an unknown-type column can't be pg_upgraded, because reparsing the matview's defining query will now decide that the column is of type text, which doesn't match the cstring-like storage that the old materialized column would actually have. Add a checking pass to detect that. While at it, we can detect tables or composite types that would fail, essentially for free. Those would fail safely anyway later on, but we might as well fail earlier. This patch is by me, but it owes something to previous investigations by Rahila Syed. Also thanks to Ashutosh Bapat and Michael Paquier for review. Discussion: https://postgr.es/m/CAH2L28uwwbL9HUM-WR=hromW1Cvamkn7O-g8fPY2m=_7muJ0oA@mail.gmail.com
2017-01-25 15:17:18 +01:00
qtree = parse_sub_analyze(sublink->subselect, pstate, NULL, false, true);
/*
Change representation of statement lists, and add statement location info. This patch makes several changes that improve the consistency of representation of lists of statements. It's always been the case that the output of parse analysis is a list of Query nodes, whatever the types of the individual statements in the list. This patch brings similar consistency to the outputs of raw parsing and planning steps: * The output of raw parsing is now always a list of RawStmt nodes; the statement-type-dependent nodes are one level down from that. * The output of pg_plan_queries() is now always a list of PlannedStmt nodes, even for utility statements. In the case of a utility statement, "planning" just consists of wrapping a CMD_UTILITY PlannedStmt around the utility node. This list representation is now used in Portal and CachedPlan plan lists, replacing the former convention of intermixing PlannedStmts with bare utility-statement nodes. Now, every list of statements has a consistent head-node type depending on how far along it is in processing. This allows changing many places that formerly used generic "Node *" pointers to use a more specific pointer type, thus reducing the number of IsA() tests and casts needed, as well as improving code clarity. Also, the post-parse-analysis representation of DECLARE CURSOR is changed so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained SELECT remains a child of the DeclareCursorStmt rather than getting flipped around to be the other way. It's now true for both Query and PlannedStmt that utilityStmt is non-null if and only if commandType is CMD_UTILITY. That allows simplifying a lot of places that were testing both fields. (I think some of those were just defensive programming, but in many places, it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.) Because PlannedStmt carries a canSetTag field, we're also able to get rid of some ad-hoc rules about how to reconstruct canSetTag for a bare utility statement; specifically, the assumption that a utility is canSetTag if and only if it's the only one in its list. While I see no near-term need for relaxing that restriction, it's nice to get rid of the ad-hocery. The API of ProcessUtility() is changed so that what it's passed is the wrapper PlannedStmt not just the bare utility statement. This will affect all users of ProcessUtility_hook, but the changes are pretty trivial; see the affected contrib modules for examples of the minimum change needed. (Most compilers should give pointer-type-mismatch warnings for uncorrected code.) There's also a change in the API of ExplainOneQuery_hook, to pass through cursorOptions instead of expecting hook functions to know what to pick. This is needed because of the DECLARE CURSOR changes, but really should have been done in 9.6; it's unlikely that any extant hook functions know about using CURSOR_OPT_PARALLEL_OK. Finally, teach gram.y to save statement boundary locations in RawStmt nodes, and pass those through to Query and PlannedStmt nodes. This allows more intelligent handling of cases where a source query string contains multiple statements. This patch doesn't actually do anything with the information, but a follow-on patch will. (Passing this information through cleanly is the true motivation for these changes; while I think this is all good cleanup, it's unlikely we'd have bothered without this end goal.) catversion bump because addition of location fields to struct Query affects stored rules. This patch is by me, but it owes a good deal to Fabien Coelho who did a lot of preliminary work on the problem, and also reviewed the patch. Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
* Check that we got a SELECT. Anything else should be impossible given
* restrictions of the grammar, but check anyway.
*/
if (!IsA(qtree, Query) ||
Change representation of statement lists, and add statement location info. This patch makes several changes that improve the consistency of representation of lists of statements. It's always been the case that the output of parse analysis is a list of Query nodes, whatever the types of the individual statements in the list. This patch brings similar consistency to the outputs of raw parsing and planning steps: * The output of raw parsing is now always a list of RawStmt nodes; the statement-type-dependent nodes are one level down from that. * The output of pg_plan_queries() is now always a list of PlannedStmt nodes, even for utility statements. In the case of a utility statement, "planning" just consists of wrapping a CMD_UTILITY PlannedStmt around the utility node. This list representation is now used in Portal and CachedPlan plan lists, replacing the former convention of intermixing PlannedStmts with bare utility-statement nodes. Now, every list of statements has a consistent head-node type depending on how far along it is in processing. This allows changing many places that formerly used generic "Node *" pointers to use a more specific pointer type, thus reducing the number of IsA() tests and casts needed, as well as improving code clarity. Also, the post-parse-analysis representation of DECLARE CURSOR is changed so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained SELECT remains a child of the DeclareCursorStmt rather than getting flipped around to be the other way. It's now true for both Query and PlannedStmt that utilityStmt is non-null if and only if commandType is CMD_UTILITY. That allows simplifying a lot of places that were testing both fields. (I think some of those were just defensive programming, but in many places, it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.) Because PlannedStmt carries a canSetTag field, we're also able to get rid of some ad-hoc rules about how to reconstruct canSetTag for a bare utility statement; specifically, the assumption that a utility is canSetTag if and only if it's the only one in its list. While I see no near-term need for relaxing that restriction, it's nice to get rid of the ad-hocery. The API of ProcessUtility() is changed so that what it's passed is the wrapper PlannedStmt not just the bare utility statement. This will affect all users of ProcessUtility_hook, but the changes are pretty trivial; see the affected contrib modules for examples of the minimum change needed. (Most compilers should give pointer-type-mismatch warnings for uncorrected code.) There's also a change in the API of ExplainOneQuery_hook, to pass through cursorOptions instead of expecting hook functions to know what to pick. This is needed because of the DECLARE CURSOR changes, but really should have been done in 9.6; it's unlikely that any extant hook functions know about using CURSOR_OPT_PARALLEL_OK. Finally, teach gram.y to save statement boundary locations in RawStmt nodes, and pass those through to Query and PlannedStmt nodes. This allows more intelligent handling of cases where a source query string contains multiple statements. This patch doesn't actually do anything with the information, but a follow-on patch will. (Passing this information through cleanly is the true motivation for these changes; while I think this is all good cleanup, it's unlikely we'd have bothered without this end goal.) catversion bump because addition of location fields to struct Query affects stored rules. This patch is by me, but it owes a good deal to Fabien Coelho who did a lot of preliminary work on the problem, and also reviewed the patch. Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
qtree->commandType != CMD_SELECT)
elog(ERROR, "unexpected non-SELECT command in SubLink");
sublink->subselect = (Node *) qtree;
if (sublink->subLinkType == EXISTS_SUBLINK)
{
/*
* EXISTS needs no test expression or combining operator. These fields
* should be null already, but make sure.
*/
sublink->testexpr = NULL;
sublink->operName = NIL;
}
else if (sublink->subLinkType == EXPR_SUBLINK ||
sublink->subLinkType == ARRAY_SUBLINK)
{
/*
* Make sure the subselect delivers a single column (ignoring resjunk
* targets).
*/
if (count_nonjunk_tlist_entries(qtree->targetList) != 1)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("subquery must return only one column"),
parser_errposition(pstate, sublink->location)));
/*
* EXPR and ARRAY need no test expression or combining operator. These
* fields should be null already, but make sure.
*/
sublink->testexpr = NULL;
sublink->operName = NIL;
}
else if (sublink->subLinkType == MULTIEXPR_SUBLINK)
{
/* Same as EXPR case, except no restriction on number of columns */
sublink->testexpr = NULL;
sublink->operName = NIL;
}
else
{
/* ALL, ANY, or ROWCOMPARE: generate row-comparing expression */
Node *lefthand;
List *left_list;
List *right_list;
ListCell *l;
/*
* If the source was "x IN (select)", convert to "x = ANY (select)".
*/
if (sublink->operName == NIL)
sublink->operName = list_make1(makeString("="));
/*
* Transform lefthand expression, and convert to a list
*/
lefthand = transformExprRecurse(pstate, sublink->testexpr);
if (lefthand && IsA(lefthand, RowExpr))
left_list = ((RowExpr *) lefthand)->args;
else
left_list = list_make1(lefthand);
/*
* Build a list of PARAM_SUBLINK nodes representing the output columns
* of the subquery.
*/
right_list = NIL;
foreach(l, qtree->targetList)
{
TargetEntry *tent = (TargetEntry *) lfirst(l);
Param *param;
if (tent->resjunk)
continue;
param = makeNode(Param);
param->paramkind = PARAM_SUBLINK;
param->paramid = tent->resno;
param->paramtype = exprType((Node *) tent->expr);
param->paramtypmod = exprTypmod((Node *) tent->expr);
param->paramcollid = exprCollation((Node *) tent->expr);
param->location = -1;
right_list = lappend(right_list, param);
}
/*
* We could rely on make_row_comparison_op to complain if the list
* lengths differ, but we prefer to generate a more specific error
* message.
*/
if (list_length(left_list) < list_length(right_list))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("subquery has too many columns"),
parser_errposition(pstate, sublink->location)));
if (list_length(left_list) > list_length(right_list))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("subquery has too few columns"),
parser_errposition(pstate, sublink->location)));
/*
* Identify the combining operator(s) and generate a suitable
* row-comparison expression.
*/
sublink->testexpr = make_row_comparison_op(pstate,
sublink->operName,
left_list,
right_list,
sublink->location);
}
return result;
}
/*
* transformArrayExpr
*
* If the caller specifies the target type, the resulting array will
* be of exactly that type. Otherwise we try to infer a common type
* for the elements using select_common_type().
*/
static Node *
transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
Oid array_type, Oid element_type, int32 typmod)
{
ArrayExpr *newa = makeNode(ArrayExpr);
List *newelems = NIL;
List *newcoercedelems = NIL;
ListCell *element;
Oid coerce_type;
bool coerce_hard;
/*
* Transform the element expressions
*
* Assume that the array is one-dimensional unless we find an array-type
* element expression.
*/
newa->multidims = false;
foreach(element, a->elements)
{
Node *e = (Node *) lfirst(element);
Node *newe;
/*
* If an element is itself an A_ArrayExpr, recurse directly so that we
* can pass down any target type we were given.
*/
if (IsA(e, A_ArrayExpr))
{
newe = transformArrayExpr(pstate,
(A_ArrayExpr *) e,
array_type,
element_type,
typmod);
/* we certainly have an array here */
Assert(array_type == InvalidOid || array_type == exprType(newe));
newa->multidims = true;
}
else
{
newe = transformExprRecurse(pstate, e);
/*
* Check for sub-array expressions, if we haven't already found
* one.
*/
if (!newa->multidims && type_is_array(exprType(newe)))
newa->multidims = true;
}
newelems = lappend(newelems, newe);
}
/*
* Select a target type for the elements.
*
* If we haven't been given a target array type, we must try to deduce a
* common type based on the types of the individual elements present.
*/
if (OidIsValid(array_type))
{
/* Caller must ensure array_type matches element_type */
Assert(OidIsValid(element_type));
coerce_type = (newa->multidims ? array_type : element_type);
coerce_hard = true;
}
else
{
/* Can't handle an empty array without a target type */
if (newelems == NIL)
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_DATATYPE),
errmsg("cannot determine type of empty array"),
errhint("Explicitly cast to the desired type, "
"for example ARRAY[]::integer[]."),
parser_errposition(pstate, a->location)));
/* Select a common type for the elements */
coerce_type = select_common_type(pstate, newelems, "ARRAY", NULL);
if (newa->multidims)
{
array_type = coerce_type;
element_type = get_element_type(array_type);
if (!OidIsValid(element_type))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find element type for data type %s",
format_type_be(array_type)),
parser_errposition(pstate, a->location)));
}
else
{
element_type = coerce_type;
array_type = get_array_type(element_type);
if (!OidIsValid(array_type))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find array type for data type %s",
format_type_be(element_type)),
parser_errposition(pstate, a->location)));
}
coerce_hard = false;
}
/*
* Coerce elements to target type
*
* If the array has been explicitly cast, then the elements are in turn
* explicitly coerced.
*
* If the array's type was merely derived from the common type of its
* elements, then the elements are implicitly coerced to the common type.
* This is consistent with other uses of select_common_type().
*/
foreach(element, newelems)
{
Node *e = (Node *) lfirst(element);
Node *newe;
if (coerce_hard)
{
newe = coerce_to_target_type(pstate, e,
exprType(e),
coerce_type,
typmod,
COERCION_EXPLICIT,
COERCE_EXPLICIT_CAST,
-1);
if (newe == NULL)
ereport(ERROR,
(errcode(ERRCODE_CANNOT_COERCE),
errmsg("cannot cast type %s to %s",
format_type_be(exprType(e)),
format_type_be(coerce_type)),
parser_errposition(pstate, exprLocation(e))));
}
else
newe = coerce_to_common_type(pstate, e,
coerce_type,
"ARRAY");
newcoercedelems = lappend(newcoercedelems, newe);
}
newa->array_typeid = array_type;
/* array_collid will be set by parse_collate.c */
newa->element_typeid = element_type;
newa->elements = newcoercedelems;
newa->location = a->location;
return (Node *) newa;
}
static Node *
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
transformRowExpr(ParseState *pstate, RowExpr *r, bool allowDefault)
{
RowExpr *newr;
char fname[16];
int fnum;
newr = makeNode(RowExpr);
/* Transform the field expressions */
Improve handling of "UPDATE ... SET (column_list) = row_constructor". Previously, the right-hand side of a multiple-column assignment, if it wasn't a sub-SELECT, had to be a simple parenthesized expression list, because gram.y was responsible for "bursting" the construct into independent column assignments. This had the minor defect that you couldn't write ROW (though you should be able to, since the standard says this is a row constructor), and the rather larger defect that unlike other uses of row constructors, we would not expand a "foo.*" item into multiple columns. Fix that by changing the RHS to be just "a_expr" in the grammar, leaving it to transformMultiAssignRef to separate the elements of a RowExpr; which it will do only after performing standard transformation of the RowExpr, so that "foo.*" behaves as expected. The key reason we didn't do that before was the hard-wired handling of DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by allowing DEFAULT in any a_expr and having parse analysis throw an error if SetToDefault is found in an unexpected place. That's an improvement anyway since the error can be more specific than just "syntax error". The SQL standard suggests that the RHS could be any a_expr yielding a suitable row value. This patch doesn't really move the goal posts in that respect --- you're still limited to RowExpr or a sub-SELECT --- but it does fix the grammar restriction, so it provides some tangible progress towards a full implementation. And the limitation is now documented by an explicit error message rather than an unhelpful "syntax error". Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
newr->args = transformExpressionList(pstate, r->args,
pstate->p_expr_kind, allowDefault);
/* Disallow more columns than will fit in a tuple */
if (list_length(newr->args) > MaxTupleAttributeNumber)
ereport(ERROR,
(errcode(ERRCODE_TOO_MANY_COLUMNS),
errmsg("ROW expressions can have at most %d entries",
MaxTupleAttributeNumber),
parser_errposition(pstate, r->location)));
/* Barring later casting, we consider the type RECORD */
newr->row_typeid = RECORDOID;
newr->row_format = COERCE_IMPLICIT_CAST;
/* ROW() has anonymous columns, so invent some field names */
newr->colnames = NIL;
Represent Lists as expansible arrays, not chains of cons-cells. Originally, Postgres Lists were a more or less exact reimplementation of Lisp lists, which consist of chains of separately-allocated cons cells, each having a value and a next-cell link. We'd hacked that once before (commit d0b4399d8) to add a separate List header, but the data was still in cons cells. That makes some operations -- notably list_nth() -- O(N), and it's bulky because of the next-cell pointers and per-cell palloc overhead, and it's very cache-unfriendly if the cons cells end up scattered around rather than being adjacent. In this rewrite, we still have List headers, but the data is in a resizable array of values, with no next-cell links. Now we need at most two palloc's per List, and often only one, since we can allocate some values in the same palloc call as the List header. (Of course, extending an existing List may require repalloc's to enlarge the array. But this involves just O(log N) allocations not O(N).) Of course this is not without downsides. The key difficulty is that addition or deletion of a list entry may now cause other entries to move, which it did not before. For example, that breaks foreach() and sister macros, which historically used a pointer to the current cons-cell as loop state. We can repair those macros transparently by making their actual loop state be an integer list index; the exposed "ListCell *" pointer is no longer state carried across loop iterations, but is just a derived value. (In practice, modern compilers can optimize things back to having just one loop state value, at least for simple cases with inline loop bodies.) In principle, this is a semantics change for cases where the loop body inserts or deletes list entries ahead of the current loop index; but I found no such cases in the Postgres code. The change is not at all transparent for code that doesn't use foreach() but chases lists "by hand" using lnext(). The largest share of such code in the backend is in loops that were maintaining "prev" and "next" variables in addition to the current-cell pointer, in order to delete list cells efficiently using list_delete_cell(). However, we no longer need a previous-cell pointer to delete a list cell efficiently. Keeping a next-cell pointer doesn't work, as explained above, but we can improve matters by changing such code to use a regular foreach() loop and then using the new macro foreach_delete_current() to delete the current cell. (This macro knows how to update the associated foreach loop's state so that no cells will be missed in the traversal.) There remains a nontrivial risk of code assuming that a ListCell * pointer will remain good over an operation that could now move the list contents. To help catch such errors, list.c can be compiled with a new define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents whenever that could possibly happen. This makes list operations significantly more expensive so it's not normally turned on (though it is on by default if USE_VALGRIND is on). There are two notable API differences from the previous code: * lnext() now requires the List's header pointer in addition to the current cell's address. * list_delete_cell() no longer requires a previous-cell argument. These changes are somewhat unfortunate, but on the other hand code using either function needs inspection to see if it is assuming anything it shouldn't, so it's not all bad. Programmers should be aware of these significant performance changes: * list_nth() and related functions are now O(1); so there's no major access-speed difference between a list and an array. * Inserting or deleting a list element now takes time proportional to the distance to the end of the list, due to moving the array elements. (However, it typically *doesn't* require palloc or pfree, so except in long lists it's probably still faster than before.) Notably, lcons() used to be about the same cost as lappend(), but that's no longer true if the list is long. Code that uses lcons() and list_delete_first() to maintain a stack might usefully be rewritten to push and pop at the end of the list rather than the beginning. * There are now list_insert_nth...() and list_delete_nth...() functions that add or remove a list cell identified by index. These have the data-movement penalty explained above, but there's no search penalty. * list_concat() and variants now copy the second list's data into storage belonging to the first list, so there is no longer any sharing of cells between the input lists. The second argument is now declared "const List *" to reflect that it isn't changed. This patch just does the minimum needed to get the new implementation in place and fix bugs exposed by the regression tests. As suggested by the foregoing, there's a fair amount of followup work remaining to do. Also, the ENABLE_LIST_COMPAT macros are finally removed in this commit. Code using those should have been gone a dozen years ago. Patch by me; thanks to David Rowley, Jesper Pedersen, and others for review. Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
for (fnum = 1; fnum <= list_length(newr->args); fnum++)
{
Represent Lists as expansible arrays, not chains of cons-cells. Originally, Postgres Lists were a more or less exact reimplementation of Lisp lists, which consist of chains of separately-allocated cons cells, each having a value and a next-cell link. We'd hacked that once before (commit d0b4399d8) to add a separate List header, but the data was still in cons cells. That makes some operations -- notably list_nth() -- O(N), and it's bulky because of the next-cell pointers and per-cell palloc overhead, and it's very cache-unfriendly if the cons cells end up scattered around rather than being adjacent. In this rewrite, we still have List headers, but the data is in a resizable array of values, with no next-cell links. Now we need at most two palloc's per List, and often only one, since we can allocate some values in the same palloc call as the List header. (Of course, extending an existing List may require repalloc's to enlarge the array. But this involves just O(log N) allocations not O(N).) Of course this is not without downsides. The key difficulty is that addition or deletion of a list entry may now cause other entries to move, which it did not before. For example, that breaks foreach() and sister macros, which historically used a pointer to the current cons-cell as loop state. We can repair those macros transparently by making their actual loop state be an integer list index; the exposed "ListCell *" pointer is no longer state carried across loop iterations, but is just a derived value. (In practice, modern compilers can optimize things back to having just one loop state value, at least for simple cases with inline loop bodies.) In principle, this is a semantics change for cases where the loop body inserts or deletes list entries ahead of the current loop index; but I found no such cases in the Postgres code. The change is not at all transparent for code that doesn't use foreach() but chases lists "by hand" using lnext(). The largest share of such code in the backend is in loops that were maintaining "prev" and "next" variables in addition to the current-cell pointer, in order to delete list cells efficiently using list_delete_cell(). However, we no longer need a previous-cell pointer to delete a list cell efficiently. Keeping a next-cell pointer doesn't work, as explained above, but we can improve matters by changing such code to use a regular foreach() loop and then using the new macro foreach_delete_current() to delete the current cell. (This macro knows how to update the associated foreach loop's state so that no cells will be missed in the traversal.) There remains a nontrivial risk of code assuming that a ListCell * pointer will remain good over an operation that could now move the list contents. To help catch such errors, list.c can be compiled with a new define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents whenever that could possibly happen. This makes list operations significantly more expensive so it's not normally turned on (though it is on by default if USE_VALGRIND is on). There are two notable API differences from the previous code: * lnext() now requires the List's header pointer in addition to the current cell's address. * list_delete_cell() no longer requires a previous-cell argument. These changes are somewhat unfortunate, but on the other hand code using either function needs inspection to see if it is assuming anything it shouldn't, so it's not all bad. Programmers should be aware of these significant performance changes: * list_nth() and related functions are now O(1); so there's no major access-speed difference between a list and an array. * Inserting or deleting a list element now takes time proportional to the distance to the end of the list, due to moving the array elements. (However, it typically *doesn't* require palloc or pfree, so except in long lists it's probably still faster than before.) Notably, lcons() used to be about the same cost as lappend(), but that's no longer true if the list is long. Code that uses lcons() and list_delete_first() to maintain a stack might usefully be rewritten to push and pop at the end of the list rather than the beginning. * There are now list_insert_nth...() and list_delete_nth...() functions that add or remove a list cell identified by index. These have the data-movement penalty explained above, but there's no search penalty. * list_concat() and variants now copy the second list's data into storage belonging to the first list, so there is no longer any sharing of cells between the input lists. The second argument is now declared "const List *" to reflect that it isn't changed. This patch just does the minimum needed to get the new implementation in place and fix bugs exposed by the regression tests. As suggested by the foregoing, there's a fair amount of followup work remaining to do. Also, the ENABLE_LIST_COMPAT macros are finally removed in this commit. Code using those should have been gone a dozen years ago. Patch by me; thanks to David Rowley, Jesper Pedersen, and others for review. Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
snprintf(fname, sizeof(fname), "f%d", fnum);
newr->colnames = lappend(newr->colnames, makeString(pstrdup(fname)));
}
newr->location = r->location;
return (Node *) newr;
}
static Node *
transformCoalesceExpr(ParseState *pstate, CoalesceExpr *c)
{
CoalesceExpr *newc = makeNode(CoalesceExpr);
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
Node *last_srf = pstate->p_last_srf;
List *newargs = NIL;
List *newcoercedargs = NIL;
ListCell *args;
foreach(args, c->args)
{
Node *e = (Node *) lfirst(args);
Node *newe;
newe = transformExprRecurse(pstate, e);
newargs = lappend(newargs, newe);
}
newc->coalescetype = select_common_type(pstate, newargs, "COALESCE", NULL);
/* coalescecollid will be set by parse_collate.c */
/* Convert arguments if necessary */
foreach(args, newargs)
{
Node *e = (Node *) lfirst(args);
Node *newe;
newe = coerce_to_common_type(pstate, e,
newc->coalescetype,
"COALESCE");
newcoercedargs = lappend(newcoercedargs, newe);
}
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
/* if any subexpression contained a SRF, complain */
if (pstate->p_last_srf != last_srf)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/* translator: %s is name of a SQL construct, eg GROUP BY */
errmsg("set-returning functions are not allowed in %s",
"COALESCE"),
errhint("You might be able to move the set-returning function into a LATERAL FROM item."),
parser_errposition(pstate,
exprLocation(pstate->p_last_srf))));
newc->args = newcoercedargs;
newc->location = c->location;
return (Node *) newc;
}
static Node *
transformMinMaxExpr(ParseState *pstate, MinMaxExpr *m)
{
MinMaxExpr *newm = makeNode(MinMaxExpr);
List *newargs = NIL;
List *newcoercedargs = NIL;
const char *funcname = (m->op == IS_GREATEST) ? "GREATEST" : "LEAST";
ListCell *args;
newm->op = m->op;
foreach(args, m->args)
{
Node *e = (Node *) lfirst(args);
Node *newe;
newe = transformExprRecurse(pstate, e);
newargs = lappend(newargs, newe);
}
newm->minmaxtype = select_common_type(pstate, newargs, funcname, NULL);
/* minmaxcollid and inputcollid will be set by parse_collate.c */
/* Convert arguments if necessary */
foreach(args, newargs)
{
Node *e = (Node *) lfirst(args);
Node *newe;
newe = coerce_to_common_type(pstate, e,
newm->minmaxtype,
funcname);
newcoercedargs = lappend(newcoercedargs, newe);
}
newm->args = newcoercedargs;
newm->location = m->location;
return (Node *) newm;
}
static Node *
transformSQLValueFunction(ParseState *pstate, SQLValueFunction *svf)
{
/*
* All we need to do is insert the correct result type and (where needed)
* validate the typmod, so we just modify the node in-place.
*/
switch (svf->op)
{
case SVFOP_CURRENT_DATE:
svf->type = DATEOID;
break;
case SVFOP_CURRENT_TIME:
svf->type = TIMETZOID;
break;
case SVFOP_CURRENT_TIME_N:
svf->type = TIMETZOID;
svf->typmod = anytime_typmod_check(true, svf->typmod);
break;
case SVFOP_CURRENT_TIMESTAMP:
svf->type = TIMESTAMPTZOID;
break;
case SVFOP_CURRENT_TIMESTAMP_N:
svf->type = TIMESTAMPTZOID;
svf->typmod = anytimestamp_typmod_check(true, svf->typmod);
break;
case SVFOP_LOCALTIME:
svf->type = TIMEOID;
break;
case SVFOP_LOCALTIME_N:
svf->type = TIMEOID;
svf->typmod = anytime_typmod_check(false, svf->typmod);
break;
case SVFOP_LOCALTIMESTAMP:
svf->type = TIMESTAMPOID;
break;
case SVFOP_LOCALTIMESTAMP_N:
svf->type = TIMESTAMPOID;
svf->typmod = anytimestamp_typmod_check(false, svf->typmod);
break;
case SVFOP_CURRENT_ROLE:
case SVFOP_CURRENT_USER:
case SVFOP_USER:
case SVFOP_SESSION_USER:
case SVFOP_CURRENT_CATALOG:
case SVFOP_CURRENT_SCHEMA:
svf->type = NAMEOID;
break;
}
return (Node *) svf;
}
static Node *
transformXmlExpr(ParseState *pstate, XmlExpr *x)
{
XmlExpr *newx;
ListCell *lc;
int i;
newx = makeNode(XmlExpr);
newx->op = x->op;
if (x->name)
newx->name = map_sql_identifier_to_xml_name(x->name, false, false);
else
newx->name = NULL;
newx->xmloption = x->xmloption;
newx->type = XMLOID; /* this just marks the node as transformed */
newx->typmod = -1;
newx->location = x->location;
/*
* gram.y built the named args as a list of ResTarget. Transform each,
* and break the names out as a separate list.
*/
newx->named_args = NIL;
newx->arg_names = NIL;
foreach(lc, x->named_args)
{
ResTarget *r = lfirst_node(ResTarget, lc);
Node *expr;
char *argname;
expr = transformExprRecurse(pstate, r->val);
if (r->name)
argname = map_sql_identifier_to_xml_name(r->name, false, false);
else if (IsA(r->val, ColumnRef))
argname = map_sql_identifier_to_xml_name(FigureColname(r->val),
true, false);
else
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
x->op == IS_XMLELEMENT
? errmsg("unnamed XML attribute value must be a column reference")
: errmsg("unnamed XML element value must be a column reference"),
parser_errposition(pstate, r->location)));
argname = NULL; /* keep compiler quiet */
}
/* reject duplicate argnames in XMLELEMENT only */
if (x->op == IS_XMLELEMENT)
{
ListCell *lc2;
foreach(lc2, newx->arg_names)
{
if (strcmp(argname, strVal(lfirst(lc2))) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("XML attribute name \"%s\" appears more than once",
argname),
parser_errposition(pstate, r->location)));
}
}
newx->named_args = lappend(newx->named_args, expr);
newx->arg_names = lappend(newx->arg_names, makeString(argname));
}
/* The other arguments are of varying types depending on the function */
newx->args = NIL;
i = 0;
foreach(lc, x->args)
{
Node *e = (Node *) lfirst(lc);
Node *newe;
newe = transformExprRecurse(pstate, e);
switch (x->op)
{
case IS_XMLCONCAT:
newe = coerce_to_specific_type(pstate, newe, XMLOID,
"XMLCONCAT");
break;
2007-01-12 23:09:49 +01:00
case IS_XMLELEMENT:
/* no coercion necessary */
break;
case IS_XMLFOREST:
newe = coerce_to_specific_type(pstate, newe, XMLOID,
"XMLFOREST");
break;
case IS_XMLPARSE:
if (i == 0)
newe = coerce_to_specific_type(pstate, newe, TEXTOID,
"XMLPARSE");
else
newe = coerce_to_boolean(pstate, newe, "XMLPARSE");
break;
case IS_XMLPI:
newe = coerce_to_specific_type(pstate, newe, TEXTOID,
"XMLPI");
break;
case IS_XMLROOT:
if (i == 0)
newe = coerce_to_specific_type(pstate, newe, XMLOID,
"XMLROOT");
else if (i == 1)
newe = coerce_to_specific_type(pstate, newe, TEXTOID,
"XMLROOT");
else
newe = coerce_to_specific_type(pstate, newe, INT4OID,
"XMLROOT");
break;
case IS_XMLSERIALIZE:
/* not handled here */
Assert(false);
break;
case IS_DOCUMENT:
newe = coerce_to_specific_type(pstate, newe, XMLOID,
"IS DOCUMENT");
break;
}
newx->args = lappend(newx->args, newe);
i++;
}
return (Node *) newx;
}
static Node *
transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
{
Node *result;
XmlExpr *xexpr;
Oid targetType;
int32 targetTypmod;
xexpr = makeNode(XmlExpr);
xexpr->op = IS_XMLSERIALIZE;
xexpr->args = list_make1(coerce_to_specific_type(pstate,
transformExprRecurse(pstate, xs->expr),
XMLOID,
"XMLSERIALIZE"));
typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
xexpr->xmloption = xs->xmloption;
xexpr->indent = xs->indent;
xexpr->location = xs->location;
/* We actually only need these to be able to parse back the expression. */
xexpr->type = targetType;
xexpr->typmod = targetTypmod;
/*
* The actual target type is determined this way. SQL allows char and
* varchar as target types. We allow anything that can be cast implicitly
* from text. This way, user-defined text-like data types automatically
* fit in.
*/
result = coerce_to_target_type(pstate, (Node *) xexpr,
TEXTOID, targetType, targetTypmod,
COERCION_IMPLICIT,
COERCE_IMPLICIT_CAST,
-1);
if (result == NULL)
ereport(ERROR,
(errcode(ERRCODE_CANNOT_COERCE),
errmsg("cannot cast XMLSERIALIZE result to %s",
format_type_be(targetType)),
parser_errposition(pstate, xexpr->location)));
return result;
}
static Node *
transformBooleanTest(ParseState *pstate, BooleanTest *b)
{
const char *clausename;
switch (b->booltesttype)
{
case IS_TRUE:
clausename = "IS TRUE";
break;
case IS_NOT_TRUE:
clausename = "IS NOT TRUE";
break;
case IS_FALSE:
clausename = "IS FALSE";
break;
case IS_NOT_FALSE:
clausename = "IS NOT FALSE";
break;
case IS_UNKNOWN:
clausename = "IS UNKNOWN";
break;
case IS_NOT_UNKNOWN:
clausename = "IS NOT UNKNOWN";
break;
default:
elog(ERROR, "unrecognized booltesttype: %d",
(int) b->booltesttype);
clausename = NULL; /* keep compiler quiet */
}
b->arg = (Expr *) transformExprRecurse(pstate, (Node *) b->arg);
b->arg = (Expr *) coerce_to_boolean(pstate,
(Node *) b->arg,
clausename);
return (Node *) b;
}
static Node *
transformCurrentOfExpr(ParseState *pstate, CurrentOfExpr *cexpr)
{
/* CURRENT OF can only appear at top level of UPDATE/DELETE */
Make parser rely more heavily on the ParseNamespaceItem data structure. When I added the ParseNamespaceItem data structure (in commit 5ebaaa494), it wasn't very tightly integrated into the parser's APIs. In the wake of adding p_rtindex to that struct (commit b541e9acc), there is a good reason to make more use of it: by passing around ParseNamespaceItem pointers instead of bare RTE pointers, we can get rid of various messy methods for passing back or deducing the rangetable index of an RTE during parsing. Hence, refactor the addRangeTableEntryXXX functions to build and return a ParseNamespaceItem struct, not just the RTE proper; and replace addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem rather than building one internally. Also, add per-column data (a ParseNamespaceColumn array) to each ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX, where we have column type data at hand so that it's nearly free to fill the data structure. Later, when we need to build Vars referencing RTEs, we can use the ParseNamespaceColumn info to avoid the rather expensive operations done in get_rte_attribute_type() or expandRTE(). get_rte_attribute_type() is indeed dead code now, so I've removed it. This makes for a useful improvement in parse analysis speed, around 20% in one moderately-complex test query. The ParseNamespaceColumn structs also include Var identity information (varno/varattno). That info isn't actually being used in this patch, except that p_varno == 0 is a handy test for a dropped column. A follow-on patch will make more use of it. Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
Assert(pstate->p_target_nsitem != NULL);
cexpr->cvarno = pstate->p_target_nsitem->p_rtindex;
/*
* Check to see if the cursor name matches a parameter of type REFCURSOR.
* If so, replace the raw name reference with a parameter reference. (This
* is a hack for the convenience of plpgsql.)
*/
if (cexpr->cursor_name != NULL) /* in case already transformed */
{
ColumnRef *cref = makeNode(ColumnRef);
Node *node = NULL;
/* Build an unqualified ColumnRef with the given name */
cref->fields = list_make1(makeString(cexpr->cursor_name));
cref->location = -1;
/* See if there is a translation available from a parser hook */
if (pstate->p_pre_columnref_hook != NULL)
node = pstate->p_pre_columnref_hook(pstate, cref);
if (node == NULL && pstate->p_post_columnref_hook != NULL)
node = pstate->p_post_columnref_hook(pstate, cref, NULL);
/*
* XXX Should we throw an error if we get a translation that isn't a
* refcursor Param? For now it seems best to silently ignore false
* matches.
*/
if (node != NULL && IsA(node, Param))
{
Param *p = (Param *) node;
if (p->paramkind == PARAM_EXTERN &&
p->paramtype == REFCURSOROID)
{
/* Matches, so convert CURRENT OF to a param reference */
cexpr->cursor_name = NULL;
cexpr->cursor_param = p->paramid;
}
}
}
return (Node *) cexpr;
}
/*
* Construct a whole-row reference to represent the notation "relation.*".
*/
static Node *
transformWholeRowRef(ParseState *pstate, ParseNamespaceItem *nsitem,
int sublevels_up, int location)
{
/*
* Build the appropriate referencing node. Normally this can be a
* whole-row Var, but if the nsitem is a JOIN USING alias then it contains
* only a subset of the columns of the underlying join RTE, so that will
* not work. Instead we immediately expand the reference into a RowExpr.
* Since the JOIN USING's common columns are fully determined at this
* point, there seems no harm in expanding it now rather than during
* planning.
*
* Note that if the RTE is a function returning scalar, we create just a
* plain reference to the function value, not a composite containing a
* single column. This is pretty inconsistent at first sight, but it's
* what we've done historically. One argument for it is that "rel" and
* "rel.*" mean the same thing for composite relations, so why not for
* scalar functions...
*/
if (nsitem->p_names == nsitem->p_rte->eref)
{
Var *result;
result = makeWholeRowVar(nsitem->p_rte, nsitem->p_rtindex,
sublevels_up, true);
/* location is not filled in by makeWholeRowVar */
result->location = location;
Make Vars be outer-join-aware. Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
/* mark Var if it's nulled by any outer joins */
markNullableIfNeeded(pstate, result);
/* mark relation as requiring whole-row SELECT access */
markVarForSelectPriv(pstate, result);
return (Node *) result;
}
else
{
RowExpr *rowexpr;
List *fields;
/*
* We want only as many columns as are listed in p_names->colnames,
* and we should use those names not whatever possibly-aliased names
* are in the RTE. We needn't worry about marking the RTE for SELECT
* access, as the common columns are surely so marked already.
*/
expandRTE(nsitem->p_rte, nsitem->p_rtindex,
sublevels_up, location, false,
NULL, &fields);
rowexpr = makeNode(RowExpr);
rowexpr->args = list_truncate(fields,
list_length(nsitem->p_names->colnames));
rowexpr->row_typeid = RECORDOID;
rowexpr->row_format = COERCE_IMPLICIT_CAST;
rowexpr->colnames = copyObject(nsitem->p_names->colnames);
rowexpr->location = location;
Make Vars be outer-join-aware. Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
/* XXX we ought to mark the row as possibly nullable */
return (Node *) rowexpr;
}
}
/*
* Handle an explicit CAST construct.
*
* Transform the argument, look up the type name, and apply any necessary
* coercion function(s).
*/
static Node *
transformTypeCast(ParseState *pstate, TypeCast *tc)
{
Node *result;
Node *arg = tc->arg;
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
Node *expr;
Oid inputType;
Oid targetType;
int32 targetTypmod;
int location;
/* Look up the type name first */
typenameTypeIdAndMod(pstate, tc->typeName, &targetType, &targetTypmod);
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
/*
* If the subject of the typecast is an ARRAY[] construct and the target
* type is an array type, we invoke transformArrayExpr() directly so that
* we can pass down the type information. This avoids some cases where
* transformArrayExpr() might not infer the correct type. Otherwise, just
* transform the argument normally.
*/
if (IsA(arg, A_ArrayExpr))
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
{
Oid targetBaseType;
int32 targetBaseTypmod;
Oid elementType;
/*
* If target is a domain over array, work with the base array type
* here. Below, we'll cast the array type to the domain. In the
* usual case that the target is not a domain, the remaining steps
* will be a no-op.
*/
targetBaseTypmod = targetTypmod;
targetBaseType = getBaseTypeAndTypmod(targetType, &targetBaseTypmod);
elementType = get_element_type(targetBaseType);
if (OidIsValid(elementType))
{
expr = transformArrayExpr(pstate,
(A_ArrayExpr *) arg,
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
targetBaseType,
elementType,
targetBaseTypmod);
}
else
expr = transformExprRecurse(pstate, arg);
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
}
else
expr = transformExprRecurse(pstate, arg);
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
inputType = exprType(expr);
if (inputType == InvalidOid)
return expr; /* do nothing if NULL input */
/*
* Location of the coercion is preferentially the location of the :: or
* CAST symbol, but if there is none then use the location of the type
* name (this can happen in TypeName 'string' syntax, for instance).
*/
location = tc->location;
if (location < 0)
location = tc->typeName->location;
result = coerce_to_target_type(pstate, expr, inputType,
targetType, targetTypmod,
COERCION_EXPLICIT,
COERCE_EXPLICIT_CAST,
location);
if (result == NULL)
ereport(ERROR,
(errcode(ERRCODE_CANNOT_COERCE),
errmsg("cannot cast type %s to %s",
format_type_be(inputType),
format_type_be(targetType)),
parser_coercion_errposition(pstate, location, expr)));
return result;
}
/*
* Handle an explicit COLLATE clause.
*
* Transform the argument, and look up the collation name.
*/
static Node *
transformCollateClause(ParseState *pstate, CollateClause *c)
{
CollateExpr *newc;
Oid argtype;
newc = makeNode(CollateExpr);
newc->arg = (Expr *) transformExprRecurse(pstate, c->arg);
argtype = exprType((Node *) newc->arg);
2011-04-10 17:42:00 +02:00
Remove collation information from TypeName, where it does not belong. The initial collations patch treated a COLLATE spec as part of a TypeName, following what can only be described as brain fade on the part of the SQL committee. It's a lot more reasonable to treat COLLATE as a syntactically separate object, so that it can be added in only the productions where it actually belongs, rather than needing to reject it in a boatload of places where it doesn't belong (something the original patch mostly failed to do). In addition this change lets us meet the spec's requirement to allow COLLATE anywhere in the clauses of a ColumnDef, and it avoids unfriendly behavior for constructs such as "foo::type COLLATE collation". To do this, pull collation information out of TypeName and put it in ColumnDef instead, thus reverting most of the collation-related changes in parse_type.c's API. I made one additional structural change, which was to use a ColumnDef as an intermediate node in AT_AlterColumnType AlterTableCmd nodes. This provides enough room to get rid of the "transform" wart in AlterTableCmd too, since the ColumnDef can carry the USING expression easily enough. Also fix some other minor bugs that have crept in in the same areas, like failure to copy recently-added fields of ColumnDef in copyfuncs.c. While at it, document the formerly secret ability to specify a collation in ALTER TABLE ALTER COLUMN TYPE, ALTER TYPE ADD ATTRIBUTE, and ALTER TYPE ALTER ATTRIBUTE TYPE; and correct some misstatements about what the default collation selection will be when COLLATE is omitted. BTW, the three-parameter form of format_type() should go away too, since it just contributes to the confusion in this area; but I'll do that in a separate patch.
2011-03-10 04:38:52 +01:00
/*
* The unknown type is not collatable, but coerce_type() takes care of it
* separately, so we'll let it go here.
*/
if (!type_is_collatable(argtype) && argtype != UNKNOWNOID)
ereport(ERROR,
Remove collation information from TypeName, where it does not belong. The initial collations patch treated a COLLATE spec as part of a TypeName, following what can only be described as brain fade on the part of the SQL committee. It's a lot more reasonable to treat COLLATE as a syntactically separate object, so that it can be added in only the productions where it actually belongs, rather than needing to reject it in a boatload of places where it doesn't belong (something the original patch mostly failed to do). In addition this change lets us meet the spec's requirement to allow COLLATE anywhere in the clauses of a ColumnDef, and it avoids unfriendly behavior for constructs such as "foo::type COLLATE collation". To do this, pull collation information out of TypeName and put it in ColumnDef instead, thus reverting most of the collation-related changes in parse_type.c's API. I made one additional structural change, which was to use a ColumnDef as an intermediate node in AT_AlterColumnType AlterTableCmd nodes. This provides enough room to get rid of the "transform" wart in AlterTableCmd too, since the ColumnDef can carry the USING expression easily enough. Also fix some other minor bugs that have crept in in the same areas, like failure to copy recently-added fields of ColumnDef in copyfuncs.c. While at it, document the formerly secret ability to specify a collation in ALTER TABLE ALTER COLUMN TYPE, ALTER TYPE ADD ATTRIBUTE, and ALTER TYPE ALTER ATTRIBUTE TYPE; and correct some misstatements about what the default collation selection will be when COLLATE is omitted. BTW, the three-parameter form of format_type() should go away too, since it just contributes to the confusion in this area; but I'll do that in a separate patch.
2011-03-10 04:38:52 +01:00
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("collations are not supported by type %s",
Remove collation information from TypeName, where it does not belong. The initial collations patch treated a COLLATE spec as part of a TypeName, following what can only be described as brain fade on the part of the SQL committee. It's a lot more reasonable to treat COLLATE as a syntactically separate object, so that it can be added in only the productions where it actually belongs, rather than needing to reject it in a boatload of places where it doesn't belong (something the original patch mostly failed to do). In addition this change lets us meet the spec's requirement to allow COLLATE anywhere in the clauses of a ColumnDef, and it avoids unfriendly behavior for constructs such as "foo::type COLLATE collation". To do this, pull collation information out of TypeName and put it in ColumnDef instead, thus reverting most of the collation-related changes in parse_type.c's API. I made one additional structural change, which was to use a ColumnDef as an intermediate node in AT_AlterColumnType AlterTableCmd nodes. This provides enough room to get rid of the "transform" wart in AlterTableCmd too, since the ColumnDef can carry the USING expression easily enough. Also fix some other minor bugs that have crept in in the same areas, like failure to copy recently-added fields of ColumnDef in copyfuncs.c. While at it, document the formerly secret ability to specify a collation in ALTER TABLE ALTER COLUMN TYPE, ALTER TYPE ADD ATTRIBUTE, and ALTER TYPE ALTER ATTRIBUTE TYPE; and correct some misstatements about what the default collation selection will be when COLLATE is omitted. BTW, the three-parameter form of format_type() should go away too, since it just contributes to the confusion in this area; but I'll do that in a separate patch.
2011-03-10 04:38:52 +01:00
format_type_be(argtype)),
parser_errposition(pstate, c->location)));
newc->collOid = LookupCollation(pstate, c->collname, c->location);
newc->location = c->location;
return (Node *) newc;
}
/*
* Transform a "row compare-op row" construct
*
* The inputs are lists of already-transformed expressions.
* As with coerce_type, pstate may be NULL if no special unknown-Param
* processing is wanted.
*
* The output may be a single OpExpr, an AND or OR combination of OpExprs,
* or a RowCompareExpr. In all cases it is guaranteed to return boolean.
* The AND, OR, and RowCompareExpr cases further imply things about the
* behavior of the operators (ie, they behave as =, <>, or < <= > >=).
*/
static Node *
make_row_comparison_op(ParseState *pstate, List *opname,
List *largs, List *rargs, int location)
{
RowCompareExpr *rcexpr;
RowCompareType rctype;
List *opexprs;
List *opnos;
List *opfamilies;
ListCell *l,
*r;
List **opinfo_lists;
Bitmapset *strats;
int nopers;
int i;
nopers = list_length(largs);
if (nopers != list_length(rargs))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unequal number of entries in row expressions"),
parser_errposition(pstate, location)));
/*
* We can't compare zero-length rows because there is no principled basis
* for figuring out what the operator is.
*/
if (nopers == 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot compare rows of zero length"),
parser_errposition(pstate, location)));
/*
* Identify all the pairwise operators, using make_op so that behavior is
* the same as in the simple scalar case.
*/
opexprs = NIL;
forboth(l, largs, r, rargs)
{
Node *larg = (Node *) lfirst(l);
Node *rarg = (Node *) lfirst(r);
OpExpr *cmp;
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
cmp = castNode(OpExpr, make_op(pstate, opname, larg, rarg,
pstate->p_last_srf, location));
/*
* We don't use coerce_to_boolean here because we insist on the
* operator yielding boolean directly, not via coercion. If it
* doesn't yield bool it won't be in any index opfamilies...
*/
if (cmp->opresulttype != BOOLOID)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("row comparison operator must yield type boolean, "
"not type %s",
format_type_be(cmp->opresulttype)),
parser_errposition(pstate, location)));
if (expression_returns_set((Node *) cmp))
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("row comparison operator must not return a set"),
parser_errposition(pstate, location)));
opexprs = lappend(opexprs, cmp);
}
/*
* If rows are length 1, just return the single operator. In this case we
* don't insist on identifying btree semantics for the operator (but we
* still require it to return boolean).
*/
if (nopers == 1)
return (Node *) linitial(opexprs);
/*
* Now we must determine which row comparison semantics (= <> < <= > >=)
* apply to this set of operators. We look for btree opfamilies
* containing the operators, and see which interpretations (strategy
* numbers) exist for each operator.
*/
opinfo_lists = (List **) palloc(nopers * sizeof(List *));
strats = NULL;
i = 0;
foreach(l, opexprs)
{
Oid opno = ((OpExpr *) lfirst(l))->opno;
Bitmapset *this_strats;
ListCell *j;
opinfo_lists[i] = get_op_btree_interpretation(opno);
2006-10-04 02:30:14 +02:00
/*
* convert strategy numbers into a Bitmapset to make the intersection
* calculation easy.
*/
this_strats = NULL;
foreach(j, opinfo_lists[i])
{
OpBtreeInterpretation *opinfo = lfirst(j);
this_strats = bms_add_member(this_strats, opinfo->strategy);
}
if (i == 0)
strats = this_strats;
else
strats = bms_int_members(strats, this_strats);
i++;
}
/*
* If there are multiple common interpretations, we may use any one of
* them ... this coding arbitrarily picks the lowest btree strategy
* number.
*/
i = bms_next_member(strats, -1);
if (i < 0)
{
/* No common interpretation, so fail */
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not determine interpretation of row comparison operator %s",
strVal(llast(opname))),
errhint("Row comparison operators must be associated with btree operator families."),
parser_errposition(pstate, location)));
}
rctype = (RowCompareType) i;
/*
* For = and <> cases, we just combine the pairwise operators with AND or
* OR respectively.
*/
if (rctype == ROWCOMPARE_EQ)
return (Node *) makeBoolExpr(AND_EXPR, opexprs, location);
if (rctype == ROWCOMPARE_NE)
return (Node *) makeBoolExpr(OR_EXPR, opexprs, location);
/*
* Otherwise we need to choose exactly which opfamily to associate with
* each operator.
*/
opfamilies = NIL;
for (i = 0; i < nopers; i++)
{
Oid opfamily = InvalidOid;
ListCell *j;
foreach(j, opinfo_lists[i])
{
OpBtreeInterpretation *opinfo = lfirst(j);
if (opinfo->strategy == rctype)
{
opfamily = opinfo->opfamily_id;
break;
}
}
if (OidIsValid(opfamily))
opfamilies = lappend_oid(opfamilies, opfamily);
else /* should not happen */
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not determine interpretation of row comparison operator %s",
strVal(llast(opname))),
errdetail("There are multiple equally-plausible candidates."),
parser_errposition(pstate, location)));
}
/*
* Now deconstruct the OpExprs and create a RowCompareExpr.
*
* Note: can't just reuse the passed largs/rargs lists, because of
* possibility that make_op inserted coercion operations.
*/
opnos = NIL;
largs = NIL;
rargs = NIL;
foreach(l, opexprs)
{
OpExpr *cmp = (OpExpr *) lfirst(l);
opnos = lappend_oid(opnos, cmp->opno);
largs = lappend(largs, linitial(cmp->args));
rargs = lappend(rargs, lsecond(cmp->args));
}
rcexpr = makeNode(RowCompareExpr);
rcexpr->rctype = rctype;
rcexpr->opnos = opnos;
rcexpr->opfamilies = opfamilies;
rcexpr->inputcollids = NIL; /* assign_expr_collations will fix this */
rcexpr->largs = largs;
rcexpr->rargs = rargs;
return (Node *) rcexpr;
}
/*
* Transform a "row IS DISTINCT FROM row" construct
*
* The input RowExprs are already transformed
*/
static Node *
make_row_distinct_op(ParseState *pstate, List *opname,
RowExpr *lrow, RowExpr *rrow,
int location)
{
Node *result = NULL;
List *largs = lrow->args;
List *rargs = rrow->args;
ListCell *l,
*r;
if (list_length(largs) != list_length(rargs))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unequal number of entries in row expressions"),
parser_errposition(pstate, location)));
forboth(l, largs, r, rargs)
{
Node *larg = (Node *) lfirst(l);
Node *rarg = (Node *) lfirst(r);
Node *cmp;
cmp = (Node *) make_distinct_op(pstate, opname, larg, rarg, location);
if (result == NULL)
result = cmp;
else
result = (Node *) makeBoolExpr(OR_EXPR,
list_make2(result, cmp),
location);
}
if (result == NULL)
{
/* zero-length rows? Generate constant FALSE */
result = makeBoolConst(false, false);
}
return result;
}
/*
* make the node for an IS DISTINCT FROM operator
*/
static Expr *
make_distinct_op(ParseState *pstate, List *opname, Node *ltree, Node *rtree,
int location)
{
Expr *result;
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
result = make_op(pstate, opname, ltree, rtree,
pstate->p_last_srf, location);
if (((OpExpr *) result)->opresulttype != BOOLOID)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("IS DISTINCT FROM requires = operator to yield boolean"),
parser_errposition(pstate, location)));
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
if (((OpExpr *) result)->opretset)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
/* translator: %s is name of a SQL construct, eg NULLIF */
errmsg("%s must not return a set", "IS DISTINCT FROM"),
parser_errposition(pstate, location)));
2004-08-29 07:07:03 +02:00
/*
* We rely on DistinctExpr and OpExpr being same struct
*/
NodeSetTag(result, T_DistinctExpr);
return result;
}
/*
* Produce a NullTest node from an IS [NOT] DISTINCT FROM NULL construct
*
* "arg" is the untransformed other argument
*/
static Node *
make_nulltest_from_distinct(ParseState *pstate, A_Expr *distincta, Node *arg)
{
NullTest *nt = makeNode(NullTest);
nt->arg = (Expr *) transformExprRecurse(pstate, arg);
/* the argument can be any type, so don't coerce it */
if (distincta->kind == AEXPR_NOT_DISTINCT)
nt->nulltesttype = IS_NULL;
else
nt->nulltesttype = IS_NOT_NULL;
/* argisrow = false is correct whether or not arg is composite */
nt->argisrow = false;
nt->location = distincta->location;
return (Node *) nt;
}
/*
* Produce a string identifying an expression by kind.
*
* Note: when practical, use a simple SQL keyword for the result. If that
* doesn't work well, check call sites to see whether custom error message
* strings are required.
*/
const char *
ParseExprKindName(ParseExprKind exprKind)
{
switch (exprKind)
{
case EXPR_KIND_NONE:
return "invalid expression context";
case EXPR_KIND_OTHER:
return "extension expression";
case EXPR_KIND_JOIN_ON:
return "JOIN/ON";
case EXPR_KIND_JOIN_USING:
return "JOIN/USING";
case EXPR_KIND_FROM_SUBSELECT:
return "sub-SELECT in FROM";
case EXPR_KIND_FROM_FUNCTION:
return "function in FROM";
case EXPR_KIND_WHERE:
return "WHERE";
case EXPR_KIND_POLICY:
return "POLICY";
case EXPR_KIND_HAVING:
return "HAVING";
case EXPR_KIND_FILTER:
return "FILTER";
case EXPR_KIND_WINDOW_PARTITION:
return "window PARTITION BY";
case EXPR_KIND_WINDOW_ORDER:
return "window ORDER BY";
case EXPR_KIND_WINDOW_FRAME_RANGE:
return "window RANGE";
case EXPR_KIND_WINDOW_FRAME_ROWS:
return "window ROWS";
Support all SQL:2011 options for window frame clauses. This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING" frame boundaries in window functions. We'd punted on that back in the original patch to add window functions, because it was not clear how to do it in a reasonably data-type-extensible fashion. That problem is resolved here by adding the ability for btree operator classes to provide an "in_range" support function that defines how to add or subtract the RANGE offset value. Factoring it this way also allows the operator class to avoid overflow problems near the ends of the datatype's range, if it wishes to expend effort on that. (In the committed patch, the integer opclasses handle that issue, but it did not seem worth the trouble to avoid overflow failures for datetime types.) The patch includes in_range support for the integer_ops opfamily (int2/int4/int8) as well as the standard datetime types. Support for other numeric types has been requested, but that seems like suitable material for a follow-on patch. In addition, the patch adds GROUPS mode which counts the offset in ORDER-BY peer groups rather than rows, and it adds the frame_exclusion options specified by SQL:2011. As far as I can see, we are now fully up to spec on window framing options. Existing behaviors remain unchanged, except that I changed the errcode for a couple of existing error reports to meet the SQL spec's expectation that negative "offset" values should be reported as SQLSTATE 22013. Internally and in relevant parts of the documentation, we now consistently use the terminology "offset PRECEDING/FOLLOWING" rather than "value PRECEDING/FOLLOWING", since the term "value" is confusingly vague. Oliver Ford, reviewed and whacked around some by me Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
2018-02-07 06:06:50 +01:00
case EXPR_KIND_WINDOW_FRAME_GROUPS:
return "window GROUPS";
case EXPR_KIND_SELECT_TARGET:
return "SELECT";
case EXPR_KIND_INSERT_TARGET:
return "INSERT";
case EXPR_KIND_UPDATE_SOURCE:
case EXPR_KIND_UPDATE_TARGET:
return "UPDATE";
Add support for MERGE SQL command MERGE performs actions that modify rows in the target table using a source table or query. MERGE provides a single SQL statement that can conditionally INSERT/UPDATE/DELETE rows -- a task that would otherwise require multiple PL statements. For example, MERGE INTO target AS t USING source AS s ON t.tid = s.sid WHEN MATCHED AND t.balance > s.delta THEN UPDATE SET balance = t.balance - s.delta WHEN MATCHED THEN DELETE WHEN NOT MATCHED AND s.delta > 0 THEN INSERT VALUES (s.sid, s.delta) WHEN NOT MATCHED THEN DO NOTHING; MERGE works with regular tables, partitioned tables and inheritance hierarchies, including column and row security enforcement, as well as support for row and statement triggers and transition tables therein. MERGE is optimized for OLTP and is parameterizable, though also useful for large scale ETL/ELT. MERGE is not intended to be used in preference to existing single SQL commands for INSERT, UPDATE or DELETE since there is some overhead. MERGE can be used from PL/pgSQL. MERGE does not support targetting updatable views or foreign tables, and RETURNING clauses are not allowed either. These limitations are likely fixable with sufficient effort. Rewrite rules are also not supported, but it's not clear that we'd want to support them. Author: Pavan Deolasee <pavan.deolasee@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Author: Amit Langote <amitlangote09@gmail.com> Author: Simon Riggs <simon.riggs@enterprisedb.com> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Reviewed-by: Andres Freund <andres@anarazel.de> (earlier versions) Reviewed-by: Peter Geoghegan <pg@bowt.ie> (earlier versions) Reviewed-by: Robert Haas <robertmhaas@gmail.com> (earlier versions) Reviewed-by: Japin Li <japinli@hotmail.com> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Tomas Vondra <tomas.vondra@enterprisedb.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Discussion: https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com Discussion: https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com Discussion: https://postgr.es/m/20201231134736.GA25392@alvherre.pgsql
2022-03-28 16:45:58 +02:00
case EXPR_KIND_MERGE_WHEN:
return "MERGE WHEN";
case EXPR_KIND_GROUP_BY:
return "GROUP BY";
case EXPR_KIND_ORDER_BY:
return "ORDER BY";
case EXPR_KIND_DISTINCT_ON:
return "DISTINCT ON";
case EXPR_KIND_LIMIT:
return "LIMIT";
case EXPR_KIND_OFFSET:
return "OFFSET";
case EXPR_KIND_RETURNING:
return "RETURNING";
case EXPR_KIND_VALUES:
case EXPR_KIND_VALUES_SINGLE:
return "VALUES";
case EXPR_KIND_CHECK_CONSTRAINT:
case EXPR_KIND_DOMAIN_CHECK:
return "CHECK";
case EXPR_KIND_COLUMN_DEFAULT:
case EXPR_KIND_FUNCTION_DEFAULT:
return "DEFAULT";
case EXPR_KIND_INDEX_EXPRESSION:
return "index expression";
case EXPR_KIND_INDEX_PREDICATE:
return "index predicate";
Extended statistics on expressions Allow defining extended statistics on expressions, not just just on simple column references. With this commit, expressions are supported by all existing extended statistics kinds, improving the same types of estimates. A simple example may look like this: CREATE TABLE t (a int); CREATE STATISTICS s ON mod(a,10), mod(a,20) FROM t; ANALYZE t; The collected statistics are useful e.g. to estimate queries with those expressions in WHERE or GROUP BY clauses: SELECT * FROM t WHERE mod(a,10) = 0 AND mod(a,20) = 0; SELECT 1 FROM t GROUP BY mod(a,10), mod(a,20); This introduces new internal statistics kind 'e' (expressions) which is built automatically when the statistics object definition includes any expressions. This represents single-expression statistics, as if there was an expression index (but without the index maintenance overhead). The statistics is stored in pg_statistics_ext_data as an array of composite types, which is possible thanks to 79f6a942bd. CREATE STATISTICS allows building statistics on a single expression, in which case in which case it's not possible to specify statistics kinds. A new system view pg_stats_ext_exprs can be used to display expression statistics, similarly to pg_stats and pg_stats_ext views. ALTER TABLE ... ALTER COLUMN ... TYPE now treats indexes the same way it treats indexes, i.e. it drops and recreates the statistics. This means all statistics are reset, and we no longer try to preserve at least the functional dependencies. This should not be a major issue in practice, as the functional dependencies actually rely on per-column statistics, which were always reset anyway. Author: Tomas Vondra Reviewed-by: Justin Pryzby, Dean Rasheed, Zhihong Yu Discussion: https://postgr.es/m/ad7891d2-e90c-b446-9fe2-7419143847d7%40enterprisedb.com
2021-03-26 23:22:01 +01:00
case EXPR_KIND_STATS_EXPRESSION:
return "statistics expression";
case EXPR_KIND_ALTER_COL_TRANSFORM:
return "USING";
case EXPR_KIND_EXECUTE_PARAMETER:
return "EXECUTE";
case EXPR_KIND_TRIGGER_WHEN:
return "WHEN";
case EXPR_KIND_PARTITION_BOUND:
return "partition bound";
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
case EXPR_KIND_PARTITION_EXPRESSION:
return "PARTITION BY";
case EXPR_KIND_CALL_ARGUMENT:
return "CALL";
case EXPR_KIND_COPY_WHERE:
return "WHERE";
case EXPR_KIND_GENERATED_COLUMN:
return "GENERATED AS";
case EXPR_KIND_CYCLE_MARK:
return "CYCLE";
/*
* There is intentionally no default: case here, so that the
* compiler will warn if we add a new ParseExprKind without
* extending this switch. If we do see an unrecognized value at
* runtime, we'll fall through to the "unrecognized" return.
*/
}
return "unrecognized expression kind";
}
/*
* Make string Const node from JSON encoding name.
*
* UTF8 is default encoding.
*/
static Const *
getJsonEncodingConst(JsonFormat *format)
{
JsonEncoding encoding;
const char *enc;
Name encname = palloc(sizeof(NameData));
if (!format ||
format->format_type == JS_FORMAT_DEFAULT ||
format->encoding == JS_ENC_DEFAULT)
encoding = JS_ENC_UTF8;
else
encoding = format->encoding;
switch (encoding)
{
case JS_ENC_UTF16:
enc = "UTF16";
break;
case JS_ENC_UTF32:
enc = "UTF32";
break;
case JS_ENC_UTF8:
enc = "UTF8";
break;
default:
elog(ERROR, "invalid JSON encoding: %d", encoding);
break;
}
namestrcpy(encname, enc);
return makeConst(NAMEOID, -1, InvalidOid, NAMEDATALEN,
NameGetDatum(encname), false, false);
}
/*
* Make bytea => text conversion using specified JSON format encoding.
*/
static Node *
makeJsonByteaToTextConversion(Node *expr, JsonFormat *format, int location)
{
Const *encoding = getJsonEncodingConst(format);
FuncExpr *fexpr = makeFuncExpr(F_CONVERT_FROM, TEXTOID,
list_make2(expr, encoding),
InvalidOid, InvalidOid,
COERCE_EXPLICIT_CALL);
fexpr->location = location;
return (Node *) fexpr;
}
/*
* Make a CaseTestExpr node.
*/
static Node *
makeCaseTestExpr(Node *expr)
{
CaseTestExpr *placeholder = makeNode(CaseTestExpr);
placeholder->typeId = exprType(expr);
placeholder->typeMod = exprTypmod(expr);
placeholder->collation = exprCollation(expr);
return (Node *) placeholder;
}
/*
* Transform JSON value expression using specified input JSON format or
* default format otherwise.
*/
static Node *
transformJsonValueExpr(ParseState *pstate, JsonValueExpr *ve,
JsonFormatType default_format)
{
Node *expr = transformExprRecurse(pstate, (Node *) ve->raw_expr);
Node *rawexpr;
JsonFormatType format;
Oid exprtype;
int location;
char typcategory;
bool typispreferred;
/*
* Using JSON_VALUE here is slightly bogus: perhaps we need to be passed a
* JsonConstructorType so that we can use one of JSON_OBJECTAGG, etc.
*/
if (exprType(expr) == UNKNOWNOID)
expr = coerce_to_specific_type(pstate, expr, TEXTOID, "JSON_VALUE");
rawexpr = expr;
exprtype = exprType(expr);
location = exprLocation(expr);
get_type_category_preferred(exprtype, &typcategory, &typispreferred);
if (ve->format->format_type != JS_FORMAT_DEFAULT)
{
if (ve->format->encoding != JS_ENC_DEFAULT && exprtype != BYTEAOID)
ereport(ERROR,
errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("JSON ENCODING clause is only allowed for bytea input type"),
parser_errposition(pstate, ve->format->location));
if (exprtype == JSONOID || exprtype == JSONBOID)
{
format = JS_FORMAT_DEFAULT; /* do not format json[b] types */
ereport(WARNING,
errmsg("FORMAT JSON has no effect for json and jsonb types"),
parser_errposition(pstate, ve->format->location));
}
else
format = ve->format->format_type;
}
else if (exprtype == JSONOID || exprtype == JSONBOID)
format = JS_FORMAT_DEFAULT; /* do not format json[b] types */
else
format = default_format;
if (format != JS_FORMAT_DEFAULT)
{
Oid targettype = format == JS_FORMAT_JSONB ? JSONBOID : JSONOID;
Node *orig = makeCaseTestExpr(expr);
Node *coerced;
expr = orig;
if (exprtype != BYTEAOID && typcategory != TYPCATEGORY_STRING)
ereport(ERROR,
errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg(ve->format->format_type == JS_FORMAT_DEFAULT ?
"cannot use non-string types with implicit FORMAT JSON clause" :
"cannot use non-string types with explicit FORMAT JSON clause"),
parser_errposition(pstate, ve->format->location >= 0 ?
ve->format->location : location));
/* Convert encoded JSON text from bytea. */
if (format == JS_FORMAT_JSON && exprtype == BYTEAOID)
{
expr = makeJsonByteaToTextConversion(expr, ve->format, location);
exprtype = TEXTOID;
}
/* Try to coerce to the target type. */
coerced = coerce_to_target_type(pstate, expr, exprtype,
targettype, -1,
COERCION_EXPLICIT,
COERCE_EXPLICIT_CAST,
location);
if (!coerced)
{
/* If coercion failed, use to_json()/to_jsonb() functions. */
Oid fnoid = targettype == JSONOID ? F_TO_JSON : F_TO_JSONB;
FuncExpr *fexpr = makeFuncExpr(fnoid, targettype,
list_make1(expr),
InvalidOid, InvalidOid,
COERCE_EXPLICIT_CALL);
fexpr->location = location;
coerced = (Node *) fexpr;
}
if (coerced == orig)
expr = rawexpr;
else
{
ve = copyObject(ve);
ve->raw_expr = (Expr *) rawexpr;
ve->formatted_expr = (Expr *) coerced;
expr = (Node *) ve;
}
}
return expr;
}
/*
* Checks specified output format for its applicability to the target type.
*/
static void
checkJsonOutputFormat(ParseState *pstate, const JsonFormat *format,
Oid targettype, bool allow_format_for_non_strings)
{
if (!allow_format_for_non_strings &&
format->format_type != JS_FORMAT_DEFAULT &&
(targettype != BYTEAOID &&
targettype != JSONOID &&
targettype != JSONBOID))
{
char typcategory;
bool typispreferred;
get_type_category_preferred(targettype, &typcategory, &typispreferred);
if (typcategory != TYPCATEGORY_STRING)
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
parser_errposition(pstate, format->location),
errmsg("cannot use JSON format with non-string output types"));
}
if (format->format_type == JS_FORMAT_JSON)
{
JsonEncoding enc = format->encoding != JS_ENC_DEFAULT ?
format->encoding : JS_ENC_UTF8;
if (targettype != BYTEAOID &&
format->encoding != JS_ENC_DEFAULT)
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
parser_errposition(pstate, format->location),
errmsg("cannot set JSON encoding for non-bytea output types"));
if (enc != JS_ENC_UTF8)
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("unsupported JSON encoding"),
errhint("Only UTF8 JSON encoding is supported."),
parser_errposition(pstate, format->location));
}
}
/*
* Transform JSON output clause.
*
* Assigns target type oid and modifier.
* Assigns default format or checks specified format for its applicability to
* the target type.
*/
static JsonReturning *
transformJsonOutput(ParseState *pstate, const JsonOutput *output,
bool allow_format)
{
JsonReturning *ret;
/* if output clause is not specified, make default clause value */
if (!output)
{
ret = makeNode(JsonReturning);
ret->format = makeJsonFormat(JS_FORMAT_DEFAULT, JS_ENC_DEFAULT, -1);
ret->typid = InvalidOid;
ret->typmod = -1;
return ret;
}
ret = copyObject(output->returning);
typenameTypeIdAndMod(pstate, output->typeName, &ret->typid, &ret->typmod);
if (output->typeName->setof)
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("returning SETOF types is not supported in SQL/JSON functions"));
if (ret->format->format_type == JS_FORMAT_DEFAULT)
/* assign JSONB format when returning jsonb, or JSON format otherwise */
ret->format->format_type =
ret->typid == JSONBOID ? JS_FORMAT_JSONB : JS_FORMAT_JSON;
else
checkJsonOutputFormat(pstate, ret->format, ret->typid, allow_format);
return ret;
}
/*
* Transform JSON output clause of JSON constructor functions.
*
* Derive RETURNING type, if not specified, from argument types.
*/
static JsonReturning *
transformJsonConstructorOutput(ParseState *pstate, JsonOutput *output,
List *args)
{
JsonReturning *returning = transformJsonOutput(pstate, output, true);
if (!OidIsValid(returning->typid))
{
ListCell *lc;
bool have_jsonb = false;
foreach(lc, args)
{
Node *expr = lfirst(lc);
Oid typid = exprType(expr);
have_jsonb |= typid == JSONBOID;
if (have_jsonb)
break;
}
if (have_jsonb)
{
returning->typid = JSONBOID;
returning->format->format_type = JS_FORMAT_JSONB;
}
else
{
/* XXX TEXT is default by the standard, but we return JSON */
returning->typid = JSONOID;
returning->format->format_type = JS_FORMAT_JSON;
}
returning->typmod = -1;
}
return returning;
}
/*
* Coerce json[b]-valued function expression to the output type.
*/
static Node *
coerceJsonFuncExpr(ParseState *pstate, Node *expr,
const JsonReturning *returning, bool report_error)
{
Node *res;
int location;
Oid exprtype = exprType(expr);
/* if output type is not specified or equals to function type, return */
if (!OidIsValid(returning->typid) || returning->typid == exprtype)
return expr;
location = exprLocation(expr);
if (location < 0)
location = returning->format->location;
/* special case for RETURNING bytea FORMAT json */
if (returning->format->format_type == JS_FORMAT_JSON &&
returning->typid == BYTEAOID)
{
/* encode json text into bytea using pg_convert_to() */
Node *texpr = coerce_to_specific_type(pstate, expr, TEXTOID,
"JSON_FUNCTION");
Const *enc = getJsonEncodingConst(returning->format);
FuncExpr *fexpr = makeFuncExpr(F_CONVERT_TO, BYTEAOID,
list_make2(texpr, enc),
InvalidOid, InvalidOid,
COERCE_EXPLICIT_CALL);
fexpr->location = location;
return (Node *) fexpr;
}
/* try to coerce expression to the output type */
res = coerce_to_target_type(pstate, expr, exprtype,
returning->typid, returning->typmod,
/* XXX throwing errors when casting to char(N) */
COERCION_EXPLICIT,
COERCE_EXPLICIT_CAST,
location);
if (!res && report_error)
ereport(ERROR,
errcode(ERRCODE_CANNOT_COERCE),
errmsg("cannot cast type %s to %s",
format_type_be(exprtype),
format_type_be(returning->typid)),
parser_coercion_errposition(pstate, location, expr));
return res;
}
/*
* Make a JsonConstructorExpr node.
*/
static Node *
makeJsonConstructorExpr(ParseState *pstate, JsonConstructorType type,
List *args, Expr *fexpr, JsonReturning *returning,
bool unique, bool absent_on_null, int location)
{
JsonConstructorExpr *jsctor = makeNode(JsonConstructorExpr);
Node *placeholder;
Node *coercion;
jsctor->args = args;
jsctor->func = fexpr;
jsctor->type = type;
jsctor->returning = returning;
jsctor->unique = unique;
jsctor->absent_on_null = absent_on_null;
jsctor->location = location;
if (fexpr)
placeholder = makeCaseTestExpr((Node *) fexpr);
else
{
CaseTestExpr *cte = makeNode(CaseTestExpr);
cte->typeId = returning->format->format_type == JS_FORMAT_JSONB ?
JSONBOID : JSONOID;
cte->typeMod = -1;
cte->collation = InvalidOid;
placeholder = (Node *) cte;
}
coercion = coerceJsonFuncExpr(pstate, placeholder, returning, true);
if (coercion != placeholder)
jsctor->coercion = (Expr *) coercion;
return (Node *) jsctor;
}
/*
* Transform JSON_OBJECT() constructor.
*
* JSON_OBJECT() is transformed into json[b]_build_object[_ext]() call
* depending on the output JSON format. The first two arguments of
* json[b]_build_object_ext() are absent_on_null and check_unique.
*
* Then function call result is coerced to the target type.
*/
static Node *
transformJsonObjectConstructor(ParseState *pstate, JsonObjectConstructor *ctor)
{
JsonReturning *returning;
List *args = NIL;
/* transform key-value pairs, if any */
if (ctor->exprs)
{
ListCell *lc;
/* transform and append key-value arguments */
foreach(lc, ctor->exprs)
{
JsonKeyValue *kv = castNode(JsonKeyValue, lfirst(lc));
Node *key = transformExprRecurse(pstate, (Node *) kv->key);
Node *val = transformJsonValueExpr(pstate, kv->value,
JS_FORMAT_DEFAULT);
args = lappend(args, key);
args = lappend(args, val);
}
}
returning = transformJsonConstructorOutput(pstate, ctor->output, args);
return makeJsonConstructorExpr(pstate, JSCTOR_JSON_OBJECT, args, NULL,
returning, ctor->unique,
ctor->absent_on_null, ctor->location);
}
/*
* Transform JSON_ARRAY(query [FORMAT] [RETURNING] [ON NULL]) into
* (SELECT JSON_ARRAYAGG(a [FORMAT] [RETURNING] [ON NULL]) FROM (query) q(a))
*/
static Node *
transformJsonArrayQueryConstructor(ParseState *pstate,
JsonArrayQueryConstructor *ctor)
{
SubLink *sublink = makeNode(SubLink);
SelectStmt *select = makeNode(SelectStmt);
RangeSubselect *range = makeNode(RangeSubselect);
Alias *alias = makeNode(Alias);
ResTarget *target = makeNode(ResTarget);
JsonArrayAgg *agg = makeNode(JsonArrayAgg);
ColumnRef *colref = makeNode(ColumnRef);
Query *query;
ParseState *qpstate;
/* Transform query only for counting target list entries. */
qpstate = make_parsestate(pstate);
query = transformStmt(qpstate, ctor->query);
if (count_nonjunk_tlist_entries(query->targetList) != 1)
ereport(ERROR,
errcode(ERRCODE_SYNTAX_ERROR),
errmsg("subquery must return only one column"),
parser_errposition(pstate, ctor->location));
free_parsestate(qpstate);
colref->fields = list_make2(makeString(pstrdup("q")),
makeString(pstrdup("a")));
colref->location = ctor->location;
agg->arg = makeJsonValueExpr((Expr *) colref, ctor->format);
agg->absent_on_null = ctor->absent_on_null;
agg->constructor = makeNode(JsonAggConstructor);
agg->constructor->agg_order = NIL;
agg->constructor->output = ctor->output;
agg->constructor->location = ctor->location;
target->name = NULL;
target->indirection = NIL;
target->val = (Node *) agg;
target->location = ctor->location;
alias->aliasname = pstrdup("q");
alias->colnames = list_make1(makeString(pstrdup("a")));
range->lateral = false;
range->subquery = ctor->query;
range->alias = alias;
select->targetList = list_make1(target);
select->fromClause = list_make1(range);
sublink->subLinkType = EXPR_SUBLINK;
sublink->subLinkId = 0;
sublink->testexpr = NULL;
sublink->operName = NIL;
sublink->subselect = (Node *) select;
sublink->location = ctor->location;
return transformExprRecurse(pstate, (Node *) sublink);
}
/*
* Common code for JSON_OBJECTAGG and JSON_ARRAYAGG transformation.
*/
static Node *
transformJsonAggConstructor(ParseState *pstate, JsonAggConstructor *agg_ctor,
JsonReturning *returning, List *args,
Oid aggfnoid, Oid aggtype,
JsonConstructorType ctor_type,
bool unique, bool absent_on_null)
{
Node *node;
Expr *aggfilter;
aggfilter = agg_ctor->agg_filter ? (Expr *)
transformWhereClause(pstate, agg_ctor->agg_filter,
EXPR_KIND_FILTER, "FILTER") : NULL;
if (agg_ctor->over)
{
/* window function */
WindowFunc *wfunc = makeNode(WindowFunc);
wfunc->winfnoid = aggfnoid;
wfunc->wintype = aggtype;
/* wincollid and inputcollid will be set by parse_collate.c */
wfunc->args = args;
wfunc->aggfilter = aggfilter;
/* winref will be set by transformWindowFuncCall */
wfunc->winstar = false;
wfunc->winagg = true;
wfunc->location = agg_ctor->location;
/*
* ordered aggs not allowed in windows yet
*/
if (agg_ctor->agg_order != NIL)
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("aggregate ORDER BY is not implemented for window functions"),
parser_errposition(pstate, agg_ctor->location));
/* parse_agg.c does additional window-func-specific processing */
transformWindowFuncCall(pstate, wfunc, agg_ctor->over);
node = (Node *) wfunc;
}
else
{
Aggref *aggref = makeNode(Aggref);
aggref->aggfnoid = aggfnoid;
aggref->aggtype = aggtype;
/* aggcollid and inputcollid will be set by parse_collate.c */
/* aggtranstype will be set by planner */
/* aggargtypes will be set by transformAggregateCall */
/* aggdirectargs and args will be set by transformAggregateCall */
/* aggorder and aggdistinct will be set by transformAggregateCall */
aggref->aggfilter = aggfilter;
aggref->aggstar = false;
aggref->aggvariadic = false;
aggref->aggkind = AGGKIND_NORMAL;
aggref->aggpresorted = false;
/* agglevelsup will be set by transformAggregateCall */
aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
aggref->aggno = -1; /* planner will set aggno and aggtransno */
aggref->aggtransno = -1;
aggref->location = agg_ctor->location;
transformAggregateCall(pstate, aggref, args, agg_ctor->agg_order, false);
node = (Node *) aggref;
}
return makeJsonConstructorExpr(pstate, ctor_type, NIL, (Expr *) node,
returning, unique, absent_on_null,
agg_ctor->location);
}
/*
* Transform JSON_OBJECTAGG() aggregate function.
*
* JSON_OBJECTAGG() is transformed into
* json[b]_objectagg[_unique][_strict](key, value) call depending on
* the output JSON format. Then the function call result is coerced to the
* target output type.
*/
static Node *
transformJsonObjectAgg(ParseState *pstate, JsonObjectAgg *agg)
{
JsonReturning *returning;
Node *key;
Node *val;
List *args;
Oid aggfnoid;
Oid aggtype;
key = transformExprRecurse(pstate, (Node *) agg->arg->key);
val = transformJsonValueExpr(pstate, agg->arg->value, JS_FORMAT_DEFAULT);
args = list_make2(key, val);
returning = transformJsonConstructorOutput(pstate, agg->constructor->output,
args);
if (returning->format->format_type == JS_FORMAT_JSONB)
{
if (agg->absent_on_null)
if (agg->unique)
aggfnoid = F_JSONB_OBJECT_AGG_UNIQUE_STRICT;
else
aggfnoid = F_JSONB_OBJECT_AGG_STRICT;
else if (agg->unique)
aggfnoid = F_JSONB_OBJECT_AGG_UNIQUE;
else
aggfnoid = F_JSONB_OBJECT_AGG;
aggtype = JSONBOID;
}
else
{
if (agg->absent_on_null)
if (agg->unique)
aggfnoid = F_JSON_OBJECT_AGG_UNIQUE_STRICT;
else
aggfnoid = F_JSON_OBJECT_AGG_STRICT;
else if (agg->unique)
aggfnoid = F_JSON_OBJECT_AGG_UNIQUE;
else
aggfnoid = F_JSON_OBJECT_AGG;
aggtype = JSONOID;
}
return transformJsonAggConstructor(pstate, agg->constructor, returning,
args, aggfnoid, aggtype,
JSCTOR_JSON_OBJECTAGG,
agg->unique, agg->absent_on_null);
}
/*
* Transform JSON_ARRAYAGG() aggregate function.
*
* JSON_ARRAYAGG() is transformed into json[b]_agg[_strict]() call depending
* on the output JSON format and absent_on_null. Then the function call result
* is coerced to the target output type.
*/
static Node *
transformJsonArrayAgg(ParseState *pstate, JsonArrayAgg *agg)
{
JsonReturning *returning;
Node *arg;
Oid aggfnoid;
Oid aggtype;
arg = transformJsonValueExpr(pstate, agg->arg, JS_FORMAT_DEFAULT);
returning = transformJsonConstructorOutput(pstate, agg->constructor->output,
list_make1(arg));
if (returning->format->format_type == JS_FORMAT_JSONB)
{
aggfnoid = agg->absent_on_null ? F_JSONB_AGG_STRICT : F_JSONB_AGG;
aggtype = JSONBOID;
}
else
{
aggfnoid = agg->absent_on_null ? F_JSON_AGG_STRICT : F_JSON_AGG;
aggtype = JSONOID;
}
return transformJsonAggConstructor(pstate, agg->constructor, returning,
list_make1(arg), aggfnoid, aggtype,
JSCTOR_JSON_ARRAYAGG,
false, agg->absent_on_null);
}
/*
* Transform JSON_ARRAY() constructor.
*
* JSON_ARRAY() is transformed into json[b]_build_array[_ext]() call
* depending on the output JSON format. The first argument of
* json[b]_build_array_ext() is absent_on_null.
*
* Then function call result is coerced to the target type.
*/
static Node *
transformJsonArrayConstructor(ParseState *pstate, JsonArrayConstructor *ctor)
{
JsonReturning *returning;
List *args = NIL;
/* transform element expressions, if any */
if (ctor->exprs)
{
ListCell *lc;
/* transform and append element arguments */
foreach(lc, ctor->exprs)
{
JsonValueExpr *jsval = castNode(JsonValueExpr, lfirst(lc));
Node *val = transformJsonValueExpr(pstate, jsval,
JS_FORMAT_DEFAULT);
args = lappend(args, val);
}
}
returning = transformJsonConstructorOutput(pstate, ctor->output, args);
return makeJsonConstructorExpr(pstate, JSCTOR_JSON_ARRAY, args, NULL,
returning, false, ctor->absent_on_null,
ctor->location);
}
static Node *
transformJsonParseArg(ParseState *pstate, Node *jsexpr, JsonFormat *format,
Oid *exprtype)
{
Node *raw_expr = transformExprRecurse(pstate, jsexpr);
Node *expr = raw_expr;
*exprtype = exprType(expr);
/* prepare input document */
if (*exprtype == BYTEAOID)
{
JsonValueExpr *jve;
expr = makeCaseTestExpr(raw_expr);
expr = makeJsonByteaToTextConversion(expr, format, exprLocation(expr));
*exprtype = TEXTOID;
jve = makeJsonValueExpr((Expr *) raw_expr, format);
jve->formatted_expr = (Expr *) expr;
expr = (Node *) jve;
}
else
{
char typcategory;
bool typispreferred;
get_type_category_preferred(*exprtype, &typcategory, &typispreferred);
if (*exprtype == UNKNOWNOID || typcategory == TYPCATEGORY_STRING)
{
expr = coerce_to_target_type(pstate, (Node *) expr, *exprtype,
TEXTOID, -1,
COERCION_IMPLICIT,
COERCE_IMPLICIT_CAST, -1);
*exprtype = TEXTOID;
}
if (format->encoding != JS_ENC_DEFAULT)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
parser_errposition(pstate, format->location),
errmsg("cannot use JSON FORMAT ENCODING clause for non-bytea input types")));
}
return expr;
}
/*
* Transform IS JSON predicate.
*/
static Node *
transformJsonIsPredicate(ParseState *pstate, JsonIsPredicate *pred)
{
Oid exprtype;
Node *expr = transformJsonParseArg(pstate, pred->expr, pred->format,
&exprtype);
/* make resulting expression */
if (exprtype != TEXTOID && exprtype != JSONOID && exprtype != JSONBOID)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("cannot use type %s in IS JSON predicate",
format_type_be(exprtype))));
/* This intentionally(?) drops the format clause. */
return makeJsonIsPredicate(expr, NULL, pred->item_type,
pred->unique_keys, pred->location);
}