1997-11-25 23:07:18 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* parse_target.c
|
|
|
|
* handle target lists
|
|
|
|
*
|
2021-01-02 19:06:25 +01:00
|
|
|
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1997-11-25 23:07:18 +01:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/parser/parse_target.c
|
1997-11-25 23:07:18 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
2002-03-12 01:52:10 +01:00
|
|
|
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "catalog/pg_type.h"
|
2003-06-27 19:07:03 +02:00
|
|
|
#include "commands/dbcommands.h"
|
2005-04-25 23:03:25 +02:00
|
|
|
#include "funcapi.h"
|
2002-03-21 17:02:16 +01:00
|
|
|
#include "miscadmin.h"
|
1997-11-25 23:07:18 +01:00
|
|
|
#include "nodes/makefuncs.h"
|
2008-08-26 00:42:34 +02:00
|
|
|
#include "nodes/nodeFuncs.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "parser/parse_coerce.h"
|
1997-11-25 23:07:18 +01:00
|
|
|
#include "parser/parse_expr.h"
|
1998-01-20 06:05:08 +01:00
|
|
|
#include "parser/parse_func.h"
|
1997-11-25 23:07:18 +01:00
|
|
|
#include "parser/parse_relation.h"
|
|
|
|
#include "parser/parse_target.h"
|
2000-06-15 05:33:12 +02:00
|
|
|
#include "parser/parse_type.h"
|
2019-11-12 04:00:16 +01:00
|
|
|
#include "parser/parsetree.h"
|
2001-08-09 20:28:18 +02:00
|
|
|
#include "utils/builtins.h"
|
2004-06-09 21:08:20 +02:00
|
|
|
#include "utils/lsyscache.h"
|
2011-02-23 18:18:09 +01:00
|
|
|
#include "utils/rel.h"
|
2004-06-19 20:19:56 +02:00
|
|
|
#include "utils/typcache.h"
|
1998-05-10 01:31:34 +02:00
|
|
|
|
2005-04-06 18:34:07 +02:00
|
|
|
static void markTargetListOrigin(ParseState *pstate, TargetEntry *tle,
|
2019-05-22 19:04:48 +02:00
|
|
|
Var *var, int levelsup);
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
static Node *transformAssignmentSubscripts(ParseState *pstate,
|
2019-05-22 19:04:48 +02:00
|
|
|
Node *basenode,
|
|
|
|
const char *targetName,
|
|
|
|
Oid targetTypeId,
|
|
|
|
int32 targetTypMod,
|
|
|
|
Oid targetCollation,
|
|
|
|
List *subscripts,
|
|
|
|
bool isSlice,
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
List *indirection,
|
2019-05-22 19:04:48 +02:00
|
|
|
ListCell *next_indirection,
|
|
|
|
Node *rhs,
|
2021-01-04 17:52:00 +01:00
|
|
|
CoercionContext ccontext,
|
2019-05-22 19:04:48 +02:00
|
|
|
int location);
|
2006-08-02 03:59:48 +02:00
|
|
|
static List *ExpandColumnRefStar(ParseState *pstate, ColumnRef *cref,
|
2019-05-22 19:04:48 +02:00
|
|
|
bool make_target_entry);
|
2008-09-01 22:42:46 +02:00
|
|
|
static List *ExpandAllTables(ParseState *pstate, int location);
|
2006-08-02 03:59:48 +02:00
|
|
|
static List *ExpandIndirectionStar(ParseState *pstate, A_Indirection *ind,
|
2019-05-22 19:04:48 +02:00
|
|
|
bool make_target_entry, ParseExprKind exprKind);
|
2019-12-26 17:16:42 +01:00
|
|
|
static List *ExpandSingleTable(ParseState *pstate, ParseNamespaceItem *nsitem,
|
|
|
|
int sublevels_up, int location,
|
|
|
|
bool make_target_entry);
|
2009-10-31 02:41:31 +01:00
|
|
|
static List *ExpandRowReference(ParseState *pstate, Node *expr,
|
2019-05-22 19:04:48 +02:00
|
|
|
bool make_target_entry);
|
2001-10-25 07:50:21 +02:00
|
|
|
static int FigureColnameInternal(Node *node, char **name);
|
1997-11-26 04:43:18 +01:00
|
|
|
|
1998-05-21 05:53:51 +02:00
|
|
|
|
1999-07-19 02:26:20 +02:00
|
|
|
/*
|
|
|
|
* transformTargetEntry()
|
|
|
|
* Transform any ordinary "expression-type" node into a targetlist entry.
|
|
|
|
* This is exported so that parse_clause.c can generate targetlist entries
|
|
|
|
* for ORDER/GROUP BY items that are not already in the targetlist.
|
1998-07-08 16:04:11 +02:00
|
|
|
*
|
1999-07-19 02:26:20 +02:00
|
|
|
* node the (untransformed) parse tree for the value expression.
|
|
|
|
* expr the transformed expression, or NULL if caller didn't do it yet.
|
2013-05-29 22:58:43 +02:00
|
|
|
* exprKind expression kind (EXPR_KIND_SELECT_TARGET, etc)
|
1999-07-19 02:26:20 +02:00
|
|
|
* colname the column name to be assigned, or NULL if none yet set.
|
|
|
|
* resjunk true if the target should be marked resjunk, ie, it is not
|
|
|
|
* wanted in the final projected tuple.
|
1998-05-21 05:53:51 +02:00
|
|
|
*/
|
1998-07-08 16:04:11 +02:00
|
|
|
TargetEntry *
|
1999-07-19 02:26:20 +02:00
|
|
|
transformTargetEntry(ParseState *pstate,
|
1998-07-08 16:04:11 +02:00
|
|
|
Node *node,
|
1999-07-19 02:26:20 +02:00
|
|
|
Node *expr,
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
ParseExprKind exprKind,
|
1998-07-08 16:04:11 +02:00
|
|
|
char *colname,
|
2003-02-13 06:53:46 +01:00
|
|
|
bool resjunk)
|
1998-05-21 05:53:51 +02:00
|
|
|
{
|
1999-07-19 02:26:20 +02:00
|
|
|
/* Transform the node if caller didn't do it already */
|
1998-08-23 16:43:46 +02:00
|
|
|
if (expr == NULL)
|
Improve handling of "UPDATE ... SET (column_list) = row_constructor".
Previously, the right-hand side of a multiple-column assignment, if it
wasn't a sub-SELECT, had to be a simple parenthesized expression list,
because gram.y was responsible for "bursting" the construct into
independent column assignments. This had the minor defect that you
couldn't write ROW (though you should be able to, since the standard says
this is a row constructor), and the rather larger defect that unlike other
uses of row constructors, we would not expand a "foo.*" item into multiple
columns.
Fix that by changing the RHS to be just "a_expr" in the grammar, leaving
it to transformMultiAssignRef to separate the elements of a RowExpr;
which it will do only after performing standard transformation of the
RowExpr, so that "foo.*" behaves as expected.
The key reason we didn't do that before was the hard-wired handling of
DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by
allowing DEFAULT in any a_expr and having parse analysis throw an error
if SetToDefault is found in an unexpected place. That's an improvement
anyway since the error can be more specific than just "syntax error".
The SQL standard suggests that the RHS could be any a_expr yielding a
suitable row value. This patch doesn't really move the goal posts in that
respect --- you're still limited to RowExpr or a sub-SELECT --- but it does
fix the grammar restriction, so it provides some tangible progress towards
a full implementation. And the limitation is now documented by an explicit
error message rather than an unhelpful "syntax error".
Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If it's a SetToDefault node and we should allow that, pass it
|
|
|
|
* through unmodified. (transformExpr will throw the appropriate
|
|
|
|
* error if we're disallowing it.)
|
|
|
|
*/
|
|
|
|
if (exprKind == EXPR_KIND_UPDATE_SOURCE && IsA(node, SetToDefault))
|
|
|
|
expr = node;
|
|
|
|
else
|
|
|
|
expr = transformExpr(pstate, node, exprKind);
|
|
|
|
}
|
1998-08-23 16:43:46 +02:00
|
|
|
|
2003-08-12 01:04:50 +02:00
|
|
|
if (colname == NULL && !resjunk)
|
1998-08-23 16:43:46 +02:00
|
|
|
{
|
2000-04-12 19:17:23 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Generate a suitable column name for a column without any explicit
|
|
|
|
* 'AS ColumnName' clause.
|
1998-08-23 16:43:46 +02:00
|
|
|
*/
|
2001-09-17 03:06:36 +02:00
|
|
|
colname = FigureColname(node);
|
1998-08-23 16:43:46 +02:00
|
|
|
}
|
|
|
|
|
2005-04-06 18:34:07 +02:00
|
|
|
return makeTargetEntry((Expr *) expr,
|
|
|
|
(AttrNumber) pstate->p_next_resno++,
|
|
|
|
colname,
|
|
|
|
resjunk);
|
1999-07-19 02:26:20 +02:00
|
|
|
}
|
1998-12-04 16:34:49 +01:00
|
|
|
|
|
|
|
|
1998-08-25 05:17:29 +02:00
|
|
|
/*
|
1999-07-19 02:26:20 +02:00
|
|
|
* transformTargetList()
|
|
|
|
* Turns a list of ResTarget's into a list of TargetEntry's.
|
|
|
|
*
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
* This code acts mostly the same for SELECT, UPDATE, or RETURNING lists;
|
|
|
|
* the main thing is to transform the given expressions (the "val" fields).
|
2016-03-15 23:06:11 +01:00
|
|
|
* The exprKind parameter distinguishes these cases when necessary.
|
1998-08-25 05:17:29 +02:00
|
|
|
*/
|
1999-07-19 02:26:20 +02:00
|
|
|
List *
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
transformTargetList(ParseState *pstate, List *targetlist,
|
|
|
|
ParseExprKind exprKind)
|
1998-08-25 05:17:29 +02:00
|
|
|
{
|
2004-06-01 05:28:48 +02:00
|
|
|
List *p_target = NIL;
|
2016-11-20 20:26:19 +01:00
|
|
|
bool expand_star;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *o_target;
|
1998-08-25 05:17:29 +02:00
|
|
|
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
/* Shouldn't have any leftover multiassign items at start */
|
|
|
|
Assert(pstate->p_multiassign_exprs == NIL);
|
|
|
|
|
2016-11-20 20:26:19 +01:00
|
|
|
/* Expand "something.*" in SELECT and RETURNING, but not UPDATE */
|
|
|
|
expand_star = (exprKind != EXPR_KIND_UPDATE_SOURCE);
|
|
|
|
|
2003-08-11 22:46:47 +02:00
|
|
|
foreach(o_target, targetlist)
|
1998-08-25 05:17:29 +02:00
|
|
|
{
|
2003-08-11 22:46:47 +02:00
|
|
|
ResTarget *res = (ResTarget *) lfirst(o_target);
|
1998-08-23 16:43:46 +02:00
|
|
|
|
2004-06-19 20:19:56 +02:00
|
|
|
/*
|
|
|
|
* Check for "something.*". Depending on the complexity of the
|
2008-08-30 03:39:14 +02:00
|
|
|
* "something", the star could appear as the last field in ColumnRef,
|
2005-10-15 04:49:52 +02:00
|
|
|
* or as the last indirection item in A_Indirection.
|
2004-06-19 20:19:56 +02:00
|
|
|
*/
|
2016-11-20 20:26:19 +01:00
|
|
|
if (expand_star)
|
1999-07-19 02:26:20 +02:00
|
|
|
{
|
2016-11-20 20:26:19 +01:00
|
|
|
if (IsA(res->val, ColumnRef))
|
1999-07-19 02:26:20 +02:00
|
|
|
{
|
2016-11-20 20:26:19 +01:00
|
|
|
ColumnRef *cref = (ColumnRef *) res->val;
|
2004-06-19 20:19:56 +02:00
|
|
|
|
2016-11-20 20:26:19 +01:00
|
|
|
if (IsA(llast(cref->fields), A_Star))
|
|
|
|
{
|
|
|
|
/* It is something.*, expand into multiple items */
|
|
|
|
p_target = list_concat(p_target,
|
|
|
|
ExpandColumnRefStar(pstate,
|
|
|
|
cref,
|
|
|
|
true));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (IsA(res->val, A_Indirection))
|
2004-06-19 20:19:56 +02:00
|
|
|
{
|
2016-11-20 20:26:19 +01:00
|
|
|
A_Indirection *ind = (A_Indirection *) res->val;
|
|
|
|
|
|
|
|
if (IsA(llast(ind->indirection), A_Star))
|
|
|
|
{
|
|
|
|
/* It is something.*, expand into multiple items */
|
|
|
|
p_target = list_concat(p_target,
|
|
|
|
ExpandIndirectionStar(pstate,
|
|
|
|
ind,
|
|
|
|
true,
|
|
|
|
exprKind));
|
|
|
|
continue;
|
|
|
|
}
|
2004-06-19 20:19:56 +02:00
|
|
|
}
|
1998-08-25 05:17:29 +02:00
|
|
|
}
|
2004-06-19 20:19:56 +02:00
|
|
|
|
|
|
|
/*
|
2016-11-20 20:26:19 +01:00
|
|
|
* Not "something.*", or we want to treat that as a plain whole-row
|
|
|
|
* variable, so transform as a single expression
|
2004-06-19 20:19:56 +02:00
|
|
|
*/
|
|
|
|
p_target = lappend(p_target,
|
|
|
|
transformTargetEntry(pstate,
|
|
|
|
res->val,
|
|
|
|
NULL,
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
exprKind,
|
2004-06-19 20:19:56 +02:00
|
|
|
res->name,
|
|
|
|
false));
|
1998-08-25 05:17:29 +02:00
|
|
|
}
|
1999-07-19 02:26:20 +02:00
|
|
|
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
/*
|
|
|
|
* If any multiassign resjunk items were created, attach them to the end
|
|
|
|
* of the targetlist. This should only happen in an UPDATE tlist. We
|
|
|
|
* don't need to worry about numbering of these items; transformUpdateStmt
|
|
|
|
* will set their resnos.
|
|
|
|
*/
|
|
|
|
if (pstate->p_multiassign_exprs)
|
|
|
|
{
|
|
|
|
Assert(exprKind == EXPR_KIND_UPDATE_SOURCE);
|
|
|
|
p_target = list_concat(p_target, pstate->p_multiassign_exprs);
|
|
|
|
pstate->p_multiassign_exprs = NIL;
|
|
|
|
}
|
|
|
|
|
2004-06-01 05:28:48 +02:00
|
|
|
return p_target;
|
1998-08-25 05:17:29 +02:00
|
|
|
}
|
|
|
|
|
1999-07-19 02:26:20 +02:00
|
|
|
|
2006-08-02 03:59:48 +02:00
|
|
|
/*
|
|
|
|
* transformExpressionList()
|
|
|
|
*
|
|
|
|
* This is the identical transformation to transformTargetList, except that
|
|
|
|
* the input list elements are bare expressions without ResTarget decoration,
|
|
|
|
* and the output elements are likewise just expressions without TargetEntry
|
2014-05-06 18:12:18 +02:00
|
|
|
* decoration. We use this for ROW() and VALUES() constructs.
|
Improve handling of "UPDATE ... SET (column_list) = row_constructor".
Previously, the right-hand side of a multiple-column assignment, if it
wasn't a sub-SELECT, had to be a simple parenthesized expression list,
because gram.y was responsible for "bursting" the construct into
independent column assignments. This had the minor defect that you
couldn't write ROW (though you should be able to, since the standard says
this is a row constructor), and the rather larger defect that unlike other
uses of row constructors, we would not expand a "foo.*" item into multiple
columns.
Fix that by changing the RHS to be just "a_expr" in the grammar, leaving
it to transformMultiAssignRef to separate the elements of a RowExpr;
which it will do only after performing standard transformation of the
RowExpr, so that "foo.*" behaves as expected.
The key reason we didn't do that before was the hard-wired handling of
DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by
allowing DEFAULT in any a_expr and having parse analysis throw an error
if SetToDefault is found in an unexpected place. That's an improvement
anyway since the error can be more specific than just "syntax error".
The SQL standard suggests that the RHS could be any a_expr yielding a
suitable row value. This patch doesn't really move the goal posts in that
respect --- you're still limited to RowExpr or a sub-SELECT --- but it does
fix the grammar restriction, so it provides some tangible progress towards
a full implementation. And the limitation is now documented by an explicit
error message rather than an unhelpful "syntax error".
Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
|
|
|
*
|
|
|
|
* exprKind is not enough to tell us whether to allow SetToDefault, so
|
|
|
|
* an additional flag is needed for that.
|
2006-08-02 03:59:48 +02:00
|
|
|
*/
|
|
|
|
List *
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
transformExpressionList(ParseState *pstate, List *exprlist,
|
Improve handling of "UPDATE ... SET (column_list) = row_constructor".
Previously, the right-hand side of a multiple-column assignment, if it
wasn't a sub-SELECT, had to be a simple parenthesized expression list,
because gram.y was responsible for "bursting" the construct into
independent column assignments. This had the minor defect that you
couldn't write ROW (though you should be able to, since the standard says
this is a row constructor), and the rather larger defect that unlike other
uses of row constructors, we would not expand a "foo.*" item into multiple
columns.
Fix that by changing the RHS to be just "a_expr" in the grammar, leaving
it to transformMultiAssignRef to separate the elements of a RowExpr;
which it will do only after performing standard transformation of the
RowExpr, so that "foo.*" behaves as expected.
The key reason we didn't do that before was the hard-wired handling of
DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by
allowing DEFAULT in any a_expr and having parse analysis throw an error
if SetToDefault is found in an unexpected place. That's an improvement
anyway since the error can be more specific than just "syntax error".
The SQL standard suggests that the RHS could be any a_expr yielding a
suitable row value. This patch doesn't really move the goal posts in that
respect --- you're still limited to RowExpr or a sub-SELECT --- but it does
fix the grammar restriction, so it provides some tangible progress towards
a full implementation. And the limitation is now documented by an explicit
error message rather than an unhelpful "syntax error".
Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
|
|
|
ParseExprKind exprKind, bool allowDefault)
|
2006-08-02 03:59:48 +02:00
|
|
|
{
|
|
|
|
List *result = NIL;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
foreach(lc, exprlist)
|
|
|
|
{
|
|
|
|
Node *e = (Node *) lfirst(lc);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for "something.*". Depending on the complexity of the
|
2008-08-30 03:39:14 +02:00
|
|
|
* "something", the star could appear as the last field in ColumnRef,
|
2006-08-02 03:59:48 +02:00
|
|
|
* or as the last indirection item in A_Indirection.
|
|
|
|
*/
|
|
|
|
if (IsA(e, ColumnRef))
|
|
|
|
{
|
|
|
|
ColumnRef *cref = (ColumnRef *) e;
|
|
|
|
|
2008-08-30 03:39:14 +02:00
|
|
|
if (IsA(llast(cref->fields), A_Star))
|
2006-08-02 03:59:48 +02:00
|
|
|
{
|
|
|
|
/* It is something.*, expand into multiple items */
|
|
|
|
result = list_concat(result,
|
|
|
|
ExpandColumnRefStar(pstate, cref,
|
|
|
|
false));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (IsA(e, A_Indirection))
|
|
|
|
{
|
|
|
|
A_Indirection *ind = (A_Indirection *) e;
|
|
|
|
|
2008-08-30 03:39:14 +02:00
|
|
|
if (IsA(llast(ind->indirection), A_Star))
|
2006-08-02 03:59:48 +02:00
|
|
|
{
|
|
|
|
/* It is something.*, expand into multiple items */
|
|
|
|
result = list_concat(result,
|
|
|
|
ExpandIndirectionStar(pstate, ind,
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
false, exprKind));
|
2006-08-02 03:59:48 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Improve handling of "UPDATE ... SET (column_list) = row_constructor".
Previously, the right-hand side of a multiple-column assignment, if it
wasn't a sub-SELECT, had to be a simple parenthesized expression list,
because gram.y was responsible for "bursting" the construct into
independent column assignments. This had the minor defect that you
couldn't write ROW (though you should be able to, since the standard says
this is a row constructor), and the rather larger defect that unlike other
uses of row constructors, we would not expand a "foo.*" item into multiple
columns.
Fix that by changing the RHS to be just "a_expr" in the grammar, leaving
it to transformMultiAssignRef to separate the elements of a RowExpr;
which it will do only after performing standard transformation of the
RowExpr, so that "foo.*" behaves as expected.
The key reason we didn't do that before was the hard-wired handling of
DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by
allowing DEFAULT in any a_expr and having parse analysis throw an error
if SetToDefault is found in an unexpected place. That's an improvement
anyway since the error can be more specific than just "syntax error".
The SQL standard suggests that the RHS could be any a_expr yielding a
suitable row value. This patch doesn't really move the goal posts in that
respect --- you're still limited to RowExpr or a sub-SELECT --- but it does
fix the grammar restriction, so it provides some tangible progress towards
a full implementation. And the limitation is now documented by an explicit
error message rather than an unhelpful "syntax error".
Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
|
|
|
* Not "something.*", so transform as a single expression. If it's a
|
|
|
|
* SetToDefault node and we should allow that, pass it through
|
|
|
|
* unmodified. (transformExpr will throw the appropriate error if
|
|
|
|
* we're disallowing it.)
|
2006-08-02 03:59:48 +02:00
|
|
|
*/
|
Improve handling of "UPDATE ... SET (column_list) = row_constructor".
Previously, the right-hand side of a multiple-column assignment, if it
wasn't a sub-SELECT, had to be a simple parenthesized expression list,
because gram.y was responsible for "bursting" the construct into
independent column assignments. This had the minor defect that you
couldn't write ROW (though you should be able to, since the standard says
this is a row constructor), and the rather larger defect that unlike other
uses of row constructors, we would not expand a "foo.*" item into multiple
columns.
Fix that by changing the RHS to be just "a_expr" in the grammar, leaving
it to transformMultiAssignRef to separate the elements of a RowExpr;
which it will do only after performing standard transformation of the
RowExpr, so that "foo.*" behaves as expected.
The key reason we didn't do that before was the hard-wired handling of
DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by
allowing DEFAULT in any a_expr and having parse analysis throw an error
if SetToDefault is found in an unexpected place. That's an improvement
anyway since the error can be more specific than just "syntax error".
The SQL standard suggests that the RHS could be any a_expr yielding a
suitable row value. This patch doesn't really move the goal posts in that
respect --- you're still limited to RowExpr or a sub-SELECT --- but it does
fix the grammar restriction, so it provides some tangible progress towards
a full implementation. And the limitation is now documented by an explicit
error message rather than an unhelpful "syntax error".
Discussion: <8542.1479742008@sss.pgh.pa.us>
2016-11-22 21:19:57 +01:00
|
|
|
if (allowDefault && IsA(e, SetToDefault))
|
|
|
|
/* do nothing */ ;
|
|
|
|
else
|
|
|
|
e = transformExpr(pstate, e, exprKind);
|
|
|
|
|
|
|
|
result = lappend(result, e);
|
2006-08-02 03:59:48 +02:00
|
|
|
}
|
|
|
|
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
/* Shouldn't have any multiassign items here */
|
|
|
|
Assert(pstate->p_multiassign_exprs == NIL);
|
|
|
|
|
2006-08-02 03:59:48 +02:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Change unknown-type literals to type text in SELECT and RETURNING lists.
Previously, we left such literals alone if the query or subquery had
no properties forcing a type decision to be made (such as an ORDER BY or
DISTINCT clause using that output column). This meant that "unknown" could
be an exposed output column type, which has never been a great idea because
it could result in strange failures later on. For example, an outer query
that tried to do any operations on an unknown-type subquery output would
generally fail with some weird error like "failed to find conversion
function from unknown to text" or "could not determine which collation to
use for string comparison". Also, if the case occurred in a CREATE VIEW's
query then the view would have an unknown-type column, causing similar
failures in queries trying to use the view.
To fix, at the tail end of parse analysis of a query, forcibly convert any
remaining "unknown" literals in its SELECT or RETURNING list to type text.
However, provide a switch to suppress that, and use it in the cases of
SELECT inside a set operation or INSERT command. In those cases we already
had type resolution rules that make use of context information from outside
the subquery proper, and we don't want to change that behavior.
Also, change creation of an unknown-type column in a relation from a
warning to a hard error. The error should be unreachable now in CREATE
VIEW or CREATE MATVIEW, but it's still possible to explicitly say "unknown"
in CREATE TABLE or CREATE (composite) TYPE. We want to forbid that because
it's nothing but a foot-gun.
This change creates a pg_upgrade failure case: a matview that contains an
unknown-type column can't be pg_upgraded, because reparsing the matview's
defining query will now decide that the column is of type text, which
doesn't match the cstring-like storage that the old materialized column
would actually have. Add a checking pass to detect that. While at it,
we can detect tables or composite types that would fail, essentially
for free. Those would fail safely anyway later on, but we might as
well fail earlier.
This patch is by me, but it owes something to previous investigations
by Rahila Syed. Also thanks to Ashutosh Bapat and Michael Paquier for
review.
Discussion: https://postgr.es/m/CAH2L28uwwbL9HUM-WR=hromW1Cvamkn7O-g8fPY2m=_7muJ0oA@mail.gmail.com
2017-01-25 15:17:18 +01:00
|
|
|
/*
|
|
|
|
* resolveTargetListUnknowns()
|
|
|
|
* Convert any unknown-type targetlist entries to type TEXT.
|
|
|
|
*
|
|
|
|
* We do this after we've exhausted all other ways of identifying the output
|
|
|
|
* column types of a query.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
resolveTargetListUnknowns(ParseState *pstate, List *targetlist)
|
|
|
|
{
|
|
|
|
ListCell *l;
|
|
|
|
|
|
|
|
foreach(l, targetlist)
|
|
|
|
{
|
|
|
|
TargetEntry *tle = (TargetEntry *) lfirst(l);
|
|
|
|
Oid restype = exprType((Node *) tle->expr);
|
|
|
|
|
|
|
|
if (restype == UNKNOWNOID)
|
|
|
|
{
|
|
|
|
tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
|
|
|
|
restype, TEXTOID, -1,
|
|
|
|
COERCION_IMPLICIT,
|
|
|
|
COERCE_IMPLICIT_CAST,
|
|
|
|
-1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-05-06 02:20:33 +02:00
|
|
|
/*
|
|
|
|
* markTargetListOrigins()
|
|
|
|
* Mark targetlist columns that are simple Vars with the source
|
|
|
|
* table's OID and column number.
|
|
|
|
*
|
Change unknown-type literals to type text in SELECT and RETURNING lists.
Previously, we left such literals alone if the query or subquery had
no properties forcing a type decision to be made (such as an ORDER BY or
DISTINCT clause using that output column). This meant that "unknown" could
be an exposed output column type, which has never been a great idea because
it could result in strange failures later on. For example, an outer query
that tried to do any operations on an unknown-type subquery output would
generally fail with some weird error like "failed to find conversion
function from unknown to text" or "could not determine which collation to
use for string comparison". Also, if the case occurred in a CREATE VIEW's
query then the view would have an unknown-type column, causing similar
failures in queries trying to use the view.
To fix, at the tail end of parse analysis of a query, forcibly convert any
remaining "unknown" literals in its SELECT or RETURNING list to type text.
However, provide a switch to suppress that, and use it in the cases of
SELECT inside a set operation or INSERT command. In those cases we already
had type resolution rules that make use of context information from outside
the subquery proper, and we don't want to change that behavior.
Also, change creation of an unknown-type column in a relation from a
warning to a hard error. The error should be unreachable now in CREATE
VIEW or CREATE MATVIEW, but it's still possible to explicitly say "unknown"
in CREATE TABLE or CREATE (composite) TYPE. We want to forbid that because
it's nothing but a foot-gun.
This change creates a pg_upgrade failure case: a matview that contains an
unknown-type column can't be pg_upgraded, because reparsing the matview's
defining query will now decide that the column is of type text, which
doesn't match the cstring-like storage that the old materialized column
would actually have. Add a checking pass to detect that. While at it,
we can detect tables or composite types that would fail, essentially
for free. Those would fail safely anyway later on, but we might as
well fail earlier.
This patch is by me, but it owes something to previous investigations
by Rahila Syed. Also thanks to Ashutosh Bapat and Michael Paquier for
review.
Discussion: https://postgr.es/m/CAH2L28uwwbL9HUM-WR=hromW1Cvamkn7O-g8fPY2m=_7muJ0oA@mail.gmail.com
2017-01-25 15:17:18 +01:00
|
|
|
* Currently, this is done only for SELECT targetlists and RETURNING lists,
|
|
|
|
* since we only need the info if we are going to send it to the frontend.
|
2003-05-06 02:20:33 +02:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
markTargetListOrigins(ParseState *pstate, List *targetlist)
|
|
|
|
{
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
2003-05-06 02:20:33 +02:00
|
|
|
|
|
|
|
foreach(l, targetlist)
|
|
|
|
{
|
|
|
|
TargetEntry *tle = (TargetEntry *) lfirst(l);
|
|
|
|
|
2005-04-06 18:34:07 +02:00
|
|
|
markTargetListOrigin(pstate, tle, (Var *) tle->expr, 0);
|
2003-05-06 02:20:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* markTargetListOrigin()
|
2005-04-06 18:34:07 +02:00
|
|
|
* If 'var' is a Var of a plain relation, mark 'tle' with its origin
|
2003-05-06 02:20:33 +02:00
|
|
|
*
|
2005-01-13 18:19:10 +01:00
|
|
|
* levelsup is an extra offset to interpret the Var's varlevelsup correctly.
|
|
|
|
*
|
Reconsider the representation of join alias Vars.
The core idea of this patch is to make the parser generate join alias
Vars (that is, ones with varno pointing to a JOIN RTE) only when the
alias Var is actually different from any raw join input, that is a type
coercion and/or COALESCE is necessary to generate the join output value.
Otherwise just generate varno/varattno pointing to the relevant join
input column.
In effect, this means that the planner's flatten_join_alias_vars()
transformation is already done in the parser, for all cases except
(a) columns that are merged by JOIN USING and are transformed in the
process, and (b) whole-row join Vars. In principle that would allow
us to skip doing flatten_join_alias_vars() in many more queries than
we do now, but we don't have quite enough infrastructure to know that
we can do so --- in particular there's no cheap way to know whether
there are any whole-row join Vars. I'm not sure if it's worth the
trouble to add a Query-level flag for that, and in any case it seems
like fit material for a separate patch. But even without skipping the
work entirely, this should make flatten_join_alias_vars() faster,
particularly where there are nested joins that it previously had to
flatten recursively.
An essential part of this change is to replace Var nodes'
varnoold/varoattno fields with varnosyn/varattnosyn, which have
considerably more tightly-defined meanings than the old fields: when
they differ from varno/varattno, they identify the Var's position in
an aliased JOIN RTE, and the join alias is what ruleutils.c should
print for the Var. This is necessary because the varno change
destroyed ruleutils.c's ability to find the JOIN RTE from the Var's
varno.
Another way in which this change broke ruleutils.c is that it's no
longer feasible to determine, from a JOIN RTE's joinaliasvars list,
which join columns correspond to which columns of the join's immediate
input relations. (If those are sub-joins, the joinaliasvars entries
may point to columns of their base relations, not the sub-joins.)
But that was a horrid mess requiring a lot of fragile assumptions
already, so let's just bite the bullet and add some more JOIN RTE
fields to make it more straightforward to figure that out. I added
two integer-List fields containing the relevant column numbers from
the left and right input rels, plus a count of how many merged columns
there are.
This patch depends on the ParseNamespaceColumn infrastructure that
I added in commit 5815696bc. The biggest bit of code change is
restructuring transformFromClauseItem's handling of JOINs so that
the ParseNamespaceColumn data is propagated upward correctly.
Other than that and the ruleutils fixes, everything pretty much
just works, though some processing is now inessential. I grabbed
two pieces of low-hanging fruit in that line:
1. In find_expr_references, we don't need to recurse into join alias
Vars anymore. There aren't any except for references to merged USING
columns, which are more properly handled when we scan the join's RTE.
This change actually fixes an edge-case issue: we will now record a
dependency on any type-coercion function present in a USING column's
joinaliasvar, even if that join column has no references in the query
text. The odds of the missing dependency causing a problem seem quite
small: you'd have to posit somebody dropping an implicit cast between
two data types, without removing the types themselves, and then having
a stored rule containing a whole-row Var for a join whose USING merge
depends on that cast. So I don't feel a great need to change this in
the back branches. But in theory this way is more correct.
2. markRTEForSelectPriv and markTargetListOrigin don't need to recurse
into join alias Vars either, because the cases they care about don't
apply to alias Vars for USING columns that are semantically distinct
from the underlying columns. This removes the only case in which
markVarForSelectPriv could be called with NULL for the RTE, so adjust
the comments to describe that hack as being strictly internal to
markRTEForSelectPriv.
catversion bump required due to changes in stored rules.
Discussion: https://postgr.es/m/7115.1577986646@sss.pgh.pa.us
2020-01-09 17:56:59 +01:00
|
|
|
* Note that we do not drill down into views, but report the view as the
|
|
|
|
* column owner. There's also no need to drill down into joins: if we see
|
|
|
|
* a join alias Var, it must be a merged JOIN USING column (or possibly a
|
|
|
|
* whole-row Var); that is not a direct reference to any plain table column,
|
|
|
|
* so we don't report it.
|
2003-05-06 02:20:33 +02:00
|
|
|
*/
|
|
|
|
static void
|
2005-04-06 18:34:07 +02:00
|
|
|
markTargetListOrigin(ParseState *pstate, TargetEntry *tle,
|
|
|
|
Var *var, int levelsup)
|
2003-05-06 02:20:33 +02:00
|
|
|
{
|
2005-01-13 18:19:10 +01:00
|
|
|
int netlevelsup;
|
2003-05-06 02:20:33 +02:00
|
|
|
RangeTblEntry *rte;
|
|
|
|
AttrNumber attnum;
|
|
|
|
|
|
|
|
if (var == NULL || !IsA(var, Var))
|
|
|
|
return;
|
2005-01-13 18:19:10 +01:00
|
|
|
netlevelsup = var->varlevelsup + levelsup;
|
|
|
|
rte = GetRTEByRangeTablePosn(pstate, var->varno, netlevelsup);
|
2003-05-06 02:20:33 +02:00
|
|
|
attnum = var->varattno;
|
|
|
|
|
|
|
|
switch (rte->rtekind)
|
|
|
|
{
|
|
|
|
case RTE_RELATION:
|
|
|
|
/* It's a table or view, report it */
|
2005-04-06 18:34:07 +02:00
|
|
|
tle->resorigtbl = rte->relid;
|
|
|
|
tle->resorigcol = attnum;
|
2003-05-06 02:20:33 +02:00
|
|
|
break;
|
|
|
|
case RTE_SUBQUERY:
|
2005-04-26 00:02:30 +02:00
|
|
|
/* Subselect-in-FROM: copy up from the subselect */
|
|
|
|
if (attnum != InvalidAttrNumber)
|
2003-05-06 02:20:33 +02:00
|
|
|
{
|
2005-04-06 18:34:07 +02:00
|
|
|
TargetEntry *ste = get_tle_by_resno(rte->subquery->targetList,
|
|
|
|
attnum);
|
2003-05-06 02:20:33 +02:00
|
|
|
|
2005-04-06 18:34:07 +02:00
|
|
|
if (ste == NULL || ste->resjunk)
|
2003-07-19 22:20:53 +02:00
|
|
|
elog(ERROR, "subquery %s does not have attribute %d",
|
2003-05-06 02:20:33 +02:00
|
|
|
rte->eref->aliasname, attnum);
|
2005-04-06 18:34:07 +02:00
|
|
|
tle->resorigtbl = ste->resorigtbl;
|
|
|
|
tle->resorigcol = ste->resorigcol;
|
2003-05-06 02:20:33 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case RTE_JOIN:
|
|
|
|
case RTE_FUNCTION:
|
2006-08-02 03:59:48 +02:00
|
|
|
case RTE_VALUES:
|
2017-03-08 16:39:37 +01:00
|
|
|
case RTE_TABLEFUNC:
|
2017-04-01 06:17:18 +02:00
|
|
|
case RTE_NAMEDTUPLESTORE:
|
In the planner, replace an empty FROM clause with a dummy RTE.
The fact that "SELECT expression" has no base relations has long been a
thorn in the side of the planner. It makes it hard to flatten a sub-query
that looks like that, or is a trivial VALUES() item, because the planner
generally uses relid sets to identify sub-relations, and such a sub-query
would have an empty relid set if we flattened it. prepjointree.c contains
some baroque logic that works around this in certain special cases --- but
there is a much better answer. We can replace an empty FROM clause with a
dummy RTE that acts like a table of one row and no columns, and then there
are no such corner cases to worry about. Instead we need some logic to
get rid of useless dummy RTEs, but that's simpler and covers more cases
than what was there before.
For really trivial cases, where the query is just "SELECT expression" and
nothing else, there's a hazard that adding the extra RTE makes for a
noticeable slowdown; even though it's not much processing, there's not
that much for the planner to do overall. However testing says that the
penalty is very small, close to the noise level. In more complex queries,
this is able to find optimizations that we could not find before.
The new RTE type is called RTE_RESULT, since the "scan" plan type it
gives rise to is a Result node (the same plan we produced for a "SELECT
expression" query before). To avoid confusion, rename the old ResultPath
path type to GroupResultPath, reflecting that it's only used in degenerate
grouping cases where we know the query produces just one grouped row.
(It wouldn't work to unify the two cases, because there are different
rules about where the associated quals live during query_planner.)
Note: although this touches readfuncs.c, I don't think a catversion
bump is required, because the added case can't occur in stored rules,
only plans.
Patch by me, reviewed by David Rowley and Mark Dilger
Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
|
|
|
case RTE_RESULT:
|
2003-05-06 02:20:33 +02:00
|
|
|
/* not a simple relation, leave it unmarked */
|
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case RTE_CTE:
|
2009-06-11 16:49:15 +02:00
|
|
|
|
2008-10-06 00:20:17 +02:00
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* CTE reference: copy up from the subquery, if possible. If the
|
|
|
|
* RTE is a recursive self-reference then we can't do anything
|
|
|
|
* because we haven't finished analyzing it yet. However, it's no
|
|
|
|
* big loss because we must be down inside the recursive term of a
|
|
|
|
* recursive CTE, and so any markings on the current targetlist
|
|
|
|
* are not going to affect the results anyway.
|
2008-10-06 00:20:17 +02:00
|
|
|
*/
|
|
|
|
if (attnum != InvalidAttrNumber && !rte->self_reference)
|
2008-10-04 23:56:55 +02:00
|
|
|
{
|
2008-10-06 17:15:22 +02:00
|
|
|
CommonTableExpr *cte = GetCTEForRTE(pstate, rte, netlevelsup);
|
2008-10-04 23:56:55 +02:00
|
|
|
TargetEntry *ste;
|
2021-02-01 13:54:59 +01:00
|
|
|
List *tl = GetCTETargetList(cte);
|
|
|
|
int extra_cols = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* RTE for CTE will already have the search and cycle columns
|
|
|
|
* added, but the subquery won't, so skip looking those up.
|
|
|
|
*/
|
|
|
|
if (cte->search_clause)
|
|
|
|
extra_cols += 1;
|
|
|
|
if (cte->cycle_clause)
|
|
|
|
extra_cols += 2;
|
|
|
|
if (extra_cols &&
|
|
|
|
attnum > list_length(tl) &&
|
|
|
|
attnum <= list_length(tl) + extra_cols)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ste = get_tle_by_resno(tl, attnum);
|
2008-10-04 23:56:55 +02:00
|
|
|
if (ste == NULL || ste->resjunk)
|
2020-10-14 07:54:14 +02:00
|
|
|
elog(ERROR, "CTE %s does not have attribute %d",
|
2008-10-04 23:56:55 +02:00
|
|
|
rte->eref->aliasname, attnum);
|
|
|
|
tle->resorigtbl = ste->resorigtbl;
|
|
|
|
tle->resorigcol = ste->resorigcol;
|
|
|
|
}
|
|
|
|
break;
|
2003-05-06 02:20:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
1998-08-25 05:17:29 +02:00
|
|
|
/*
|
2006-08-02 03:59:48 +02:00
|
|
|
* transformAssignedExpr()
|
2014-05-06 18:12:18 +02:00
|
|
|
* This is used in INSERT and UPDATE statements only. It prepares an
|
2006-08-02 03:59:48 +02:00
|
|
|
* expression for assignment to a column of the target table.
|
1999-07-19 02:26:20 +02:00
|
|
|
* This includes coercing the given value to the target column's type
|
2004-06-09 21:08:20 +02:00
|
|
|
* (if necessary), and dealing with any subfield names or subscripts
|
2006-08-02 03:59:48 +02:00
|
|
|
* attached to the target column itself. The input expression has
|
|
|
|
* already been through transformExpr().
|
1999-07-19 02:26:20 +02:00
|
|
|
*
|
|
|
|
* pstate parse state
|
2006-08-02 03:59:48 +02:00
|
|
|
* expr expression to be modified
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
* exprKind indicates which type of statement we're dealing with
|
1999-07-19 02:26:20 +02:00
|
|
|
* colname target column name (ie, name of attribute to be assigned to)
|
1999-11-01 06:06:21 +01:00
|
|
|
* attrno target attribute number
|
2004-06-09 21:08:20 +02:00
|
|
|
* indirection subscripts/field names for target column, if any
|
2008-08-29 01:09:48 +02:00
|
|
|
* location error cursor position for the target column, or -1
|
2006-08-02 03:59:48 +02:00
|
|
|
*
|
|
|
|
* Returns the modified expression.
|
2008-10-07 03:47:55 +02:00
|
|
|
*
|
|
|
|
* Note: location points at the target column name (SET target or INSERT
|
|
|
|
* column name list entry), and must therefore be -1 in an INSERT that
|
2014-05-06 18:12:18 +02:00
|
|
|
* omits the column name list. So we should usually prefer to use
|
2008-10-07 03:47:55 +02:00
|
|
|
* exprLocation(expr) for errors that can happen in a default INSERT.
|
1998-08-25 05:17:29 +02:00
|
|
|
*/
|
2006-08-02 03:59:48 +02:00
|
|
|
Expr *
|
|
|
|
transformAssignedExpr(ParseState *pstate,
|
|
|
|
Expr *expr,
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
ParseExprKind exprKind,
|
2017-10-31 15:34:31 +01:00
|
|
|
const char *colname,
|
1999-11-01 06:06:21 +01:00
|
|
|
int attrno,
|
2006-03-23 01:19:30 +01:00
|
|
|
List *indirection,
|
|
|
|
int location)
|
1998-08-25 05:17:29 +02:00
|
|
|
{
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
Relation rd = pstate->p_target_relation;
|
2003-08-04 02:43:34 +02:00
|
|
|
Oid type_id; /* type of value provided */
|
1999-07-19 02:26:20 +02:00
|
|
|
Oid attrtype; /* type of target column */
|
|
|
|
int32 attrtypmod;
|
2011-04-22 23:43:18 +02:00
|
|
|
Oid attrcollation; /* collation of target column */
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
ParseExprKind sv_expr_kind;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Save and restore identity of expression type we're parsing. We must
|
|
|
|
* set p_expr_kind here because we can parse subscripts without going
|
|
|
|
* through transformExpr().
|
|
|
|
*/
|
|
|
|
Assert(exprKind != EXPR_KIND_NONE);
|
|
|
|
sv_expr_kind = pstate->p_expr_kind;
|
|
|
|
pstate->p_expr_kind = exprKind;
|
1998-08-25 05:17:29 +02:00
|
|
|
|
1999-07-19 02:26:20 +02:00
|
|
|
Assert(rd != NULL);
|
1999-11-01 06:06:21 +01:00
|
|
|
if (attrno <= 0)
|
2003-07-19 22:20:53 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2003-09-25 08:58:07 +02:00
|
|
|
errmsg("cannot assign to system column \"%s\"",
|
2006-03-23 01:19:30 +01:00
|
|
|
colname),
|
|
|
|
parser_errposition(pstate, location)));
|
1999-11-01 06:06:21 +01:00
|
|
|
attrtype = attnumTypeId(rd, attrno);
|
2017-08-20 20:19:07 +02:00
|
|
|
attrtypmod = TupleDescAttr(rd->rd_att, attrno - 1)->atttypmod;
|
|
|
|
attrcollation = TupleDescAttr(rd->rd_att, attrno - 1)->attcollation;
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2003-07-03 18:34:26 +02:00
|
|
|
/*
|
|
|
|
* If the expression is a DEFAULT placeholder, insert the attribute's
|
2011-04-22 23:43:18 +02:00
|
|
|
* type/typmod/collation into it so that exprType etc will report the
|
|
|
|
* right things. (We expect that the eventually substituted default
|
|
|
|
* expression will in fact have this type and typmod. The collation
|
|
|
|
* likely doesn't matter, but let's set it correctly anyway.) Also,
|
|
|
|
* reject trying to update a subfield or array element with DEFAULT, since
|
|
|
|
* there can't be any default for portions of a column.
|
2003-07-03 18:34:26 +02:00
|
|
|
*/
|
2006-08-02 03:59:48 +02:00
|
|
|
if (expr && IsA(expr, SetToDefault))
|
2003-06-25 06:19:24 +02:00
|
|
|
{
|
2006-08-02 03:59:48 +02:00
|
|
|
SetToDefault *def = (SetToDefault *) expr;
|
2003-07-03 18:34:26 +02:00
|
|
|
|
|
|
|
def->typeId = attrtype;
|
|
|
|
def->typeMod = attrtypmod;
|
2011-03-20 01:29:08 +01:00
|
|
|
def->collation = attrcollation;
|
2003-07-03 18:34:26 +02:00
|
|
|
if (indirection)
|
2004-06-09 21:08:20 +02:00
|
|
|
{
|
|
|
|
if (IsA(linitial(indirection), A_Indices))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2006-03-23 01:19:30 +01:00
|
|
|
errmsg("cannot set an array element to DEFAULT"),
|
|
|
|
parser_errposition(pstate, location)));
|
2004-06-09 21:08:20 +02:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2006-03-23 01:19:30 +01:00
|
|
|
errmsg("cannot set a subfield to DEFAULT"),
|
|
|
|
parser_errposition(pstate, location)));
|
2004-06-09 21:08:20 +02:00
|
|
|
}
|
2003-06-25 06:19:24 +02:00
|
|
|
}
|
|
|
|
|
2003-07-03 18:34:26 +02:00
|
|
|
/* Now we can use exprType() safely. */
|
2006-08-02 03:59:48 +02:00
|
|
|
type_id = exprType((Node *) expr);
|
2003-06-25 06:19:24 +02:00
|
|
|
|
1998-08-25 05:17:29 +02:00
|
|
|
/*
|
2004-06-09 21:08:20 +02:00
|
|
|
* If there is indirection on the target column, prepare an array or
|
2014-05-06 18:12:18 +02:00
|
|
|
* subfield assignment expression. This will generate a new column value
|
2005-10-15 04:49:52 +02:00
|
|
|
* that the source value has been inserted into, which can then be placed
|
|
|
|
* in the new tuple constructed by INSERT or UPDATE.
|
1998-08-25 05:17:29 +02:00
|
|
|
*/
|
1999-07-19 02:26:20 +02:00
|
|
|
if (indirection)
|
1998-08-25 05:17:29 +02:00
|
|
|
{
|
2004-06-09 21:08:20 +02:00
|
|
|
Node *colVar;
|
1999-07-19 02:26:20 +02:00
|
|
|
|
|
|
|
if (pstate->p_is_insert)
|
1998-08-25 05:17:29 +02:00
|
|
|
{
|
1999-07-19 02:26:20 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* The command is INSERT INTO table (col.something) ... so there
|
|
|
|
* is not really a source value to work with. Insert a NULL
|
|
|
|
* constant as the source value.
|
2001-02-14 22:35:07 +01:00
|
|
|
*/
|
2011-03-26 01:10:42 +01:00
|
|
|
colVar = (Node *) makeNullConst(attrtype, attrtypmod,
|
|
|
|
attrcollation);
|
2001-02-14 22:35:07 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2004-06-09 21:08:20 +02:00
|
|
|
* Build a Var for the column to be updated.
|
1999-07-19 02:26:20 +02:00
|
|
|
*/
|
2019-12-26 17:16:42 +01:00
|
|
|
Var *var;
|
|
|
|
|
Make parser rely more heavily on the ParseNamespaceItem data structure.
When I added the ParseNamespaceItem data structure (in commit 5ebaaa494),
it wasn't very tightly integrated into the parser's APIs. In the wake of
adding p_rtindex to that struct (commit b541e9acc), there is a good reason
to make more use of it: by passing around ParseNamespaceItem pointers
instead of bare RTE pointers, we can get rid of various messy methods for
passing back or deducing the rangetable index of an RTE during parsing.
Hence, refactor the addRangeTableEntryXXX functions to build and return
a ParseNamespaceItem struct, not just the RTE proper; and replace
addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem
rather than building one internally.
Also, add per-column data (a ParseNamespaceColumn array) to each
ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX,
where we have column type data at hand so that it's nearly free to fill
the data structure. Later, when we need to build Vars referencing RTEs,
we can use the ParseNamespaceColumn info to avoid the rather expensive
operations done in get_rte_attribute_type() or expandRTE().
get_rte_attribute_type() is indeed dead code now, so I've removed it.
This makes for a useful improvement in parse analysis speed, around 20%
in one moderately-complex test query.
The ParseNamespaceColumn structs also include Var identity information
(varno/varattno). That info isn't actually being used in this patch,
except that p_varno == 0 is a handy test for a dropped column.
A follow-on patch will make more use of it.
Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
|
|
|
var = makeVar(pstate->p_target_nsitem->p_rtindex, attrno,
|
2019-12-26 17:16:42 +01:00
|
|
|
attrtype, attrtypmod, attrcollation, 0);
|
|
|
|
var->location = location;
|
|
|
|
|
|
|
|
colVar = (Node *) var;
|
1998-08-25 05:17:29 +02:00
|
|
|
}
|
2001-02-14 22:35:07 +01:00
|
|
|
|
2006-08-02 03:59:48 +02:00
|
|
|
expr = (Expr *)
|
2004-06-09 21:08:20 +02:00
|
|
|
transformAssignmentIndirection(pstate,
|
|
|
|
colVar,
|
|
|
|
colname,
|
|
|
|
false,
|
|
|
|
attrtype,
|
|
|
|
attrtypmod,
|
2011-03-26 19:25:48 +01:00
|
|
|
attrcollation,
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
indirection,
|
2004-06-09 21:08:20 +02:00
|
|
|
list_head(indirection),
|
2006-08-02 03:59:48 +02:00
|
|
|
(Node *) expr,
|
2021-01-04 17:52:00 +01:00
|
|
|
COERCION_ASSIGNMENT,
|
2006-03-23 01:19:30 +01:00
|
|
|
location);
|
1998-08-25 05:17:29 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2004-06-09 21:08:20 +02:00
|
|
|
* For normal non-qualified target column, do type checking and
|
|
|
|
* coercion.
|
1998-08-25 05:17:29 +02:00
|
|
|
*/
|
2009-06-11 16:49:15 +02:00
|
|
|
Node *orig_expr = (Node *) expr;
|
2008-10-07 03:47:55 +02:00
|
|
|
|
2006-08-02 03:59:48 +02:00
|
|
|
expr = (Expr *)
|
2004-06-09 21:08:20 +02:00
|
|
|
coerce_to_target_type(pstate,
|
2008-10-07 03:47:55 +02:00
|
|
|
orig_expr, type_id,
|
2004-06-09 21:08:20 +02:00
|
|
|
attrtype, attrtypmod,
|
|
|
|
COERCION_ASSIGNMENT,
|
2008-08-29 01:09:48 +02:00
|
|
|
COERCE_IMPLICIT_CAST,
|
|
|
|
-1);
|
2006-08-02 03:59:48 +02:00
|
|
|
if (expr == NULL)
|
2004-06-09 21:08:20 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
|
|
errmsg("column \"%s\" is of type %s"
|
|
|
|
" but expression is of type %s",
|
|
|
|
colname,
|
|
|
|
format_type_be(attrtype),
|
|
|
|
format_type_be(type_id)),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errhint("You will need to rewrite or cast the expression."),
|
2008-10-07 03:47:55 +02:00
|
|
|
parser_errposition(pstate, exprLocation(orig_expr))));
|
1997-11-25 23:07:18 +01:00
|
|
|
}
|
|
|
|
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
pstate->p_expr_kind = sv_expr_kind;
|
|
|
|
|
2006-08-02 03:59:48 +02:00
|
|
|
return expr;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* updateTargetListEntry()
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
* This is used in UPDATE statements (and ON CONFLICT DO UPDATE)
|
|
|
|
* only. It prepares an UPDATE TargetEntry for assignment to a
|
|
|
|
* column of the target table. This includes coercing the given
|
|
|
|
* value to the target column's type (if necessary), and dealing with
|
|
|
|
* any subfield names or subscripts attached to the target column
|
|
|
|
* itself.
|
2006-08-02 03:59:48 +02:00
|
|
|
*
|
|
|
|
* pstate parse state
|
|
|
|
* tle target list entry to be modified
|
|
|
|
* colname target column name (ie, name of attribute to be assigned to)
|
|
|
|
* attrno target attribute number
|
|
|
|
* indirection subscripts/field names for target column, if any
|
|
|
|
* location error cursor position (should point at column name), or -1
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
updateTargetListEntry(ParseState *pstate,
|
|
|
|
TargetEntry *tle,
|
|
|
|
char *colname,
|
|
|
|
int attrno,
|
|
|
|
List *indirection,
|
|
|
|
int location)
|
|
|
|
{
|
|
|
|
/* Fix up expression as needed */
|
|
|
|
tle->expr = transformAssignedExpr(pstate,
|
|
|
|
tle->expr,
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
EXPR_KIND_UPDATE_TARGET,
|
2006-08-02 03:59:48 +02:00
|
|
|
colname,
|
|
|
|
attrno,
|
|
|
|
indirection,
|
|
|
|
location);
|
|
|
|
|
2003-08-12 01:04:50 +02:00
|
|
|
/*
|
|
|
|
* Set the resno to identify the target column --- the rewriter and
|
2014-05-06 18:12:18 +02:00
|
|
|
* planner depend on this. We also set the resname to identify the target
|
2005-10-15 04:49:52 +02:00
|
|
|
* column, but this is only for debugging purposes; it should not be
|
|
|
|
* relied on. (In particular, it might be out of date in a stored rule.)
|
2003-08-12 01:04:50 +02:00
|
|
|
*/
|
2005-04-06 18:34:07 +02:00
|
|
|
tle->resno = (AttrNumber) attrno;
|
|
|
|
tle->resname = colname;
|
1999-07-19 02:26:20 +02:00
|
|
|
}
|
1997-11-25 23:07:18 +01:00
|
|
|
|
2006-08-02 03:59:48 +02:00
|
|
|
|
2004-06-09 21:08:20 +02:00
|
|
|
/*
|
|
|
|
* Process indirection (field selection or subscripting) of the target
|
2021-01-04 17:52:00 +01:00
|
|
|
* column in INSERT/UPDATE/assignment. This routine recurses for multiple
|
|
|
|
* levels of indirection --- but note that several adjacent A_Indices nodes
|
|
|
|
* in the indirection list are treated as a single multidimensional subscript
|
2004-06-09 21:08:20 +02:00
|
|
|
* operation.
|
|
|
|
*
|
|
|
|
* In the initial call, basenode is a Var for the target column in UPDATE,
|
2021-01-04 17:52:00 +01:00
|
|
|
* or a null Const of the target's type in INSERT, or a Param for the target
|
|
|
|
* variable in PL/pgSQL assignment. In recursive calls, basenode is NULL,
|
|
|
|
* indicating that a substitute node should be consed up if needed.
|
2004-06-09 21:08:20 +02:00
|
|
|
*
|
|
|
|
* targetName is the name of the field or subfield we're assigning to, and
|
2019-02-01 16:50:32 +01:00
|
|
|
* targetIsSubscripting is true if we're subscripting it. These are just for
|
2004-06-09 21:08:20 +02:00
|
|
|
* error reporting.
|
|
|
|
*
|
2011-03-26 19:25:48 +01:00
|
|
|
* targetTypeId, targetTypMod, targetCollation indicate the datatype and
|
|
|
|
* collation of the object to be assigned to (initially the target column,
|
|
|
|
* later some subobject).
|
2004-06-09 21:08:20 +02:00
|
|
|
*
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
* indirection is the list of indirection nodes, and indirection_cell is the
|
|
|
|
* start of the sublist remaining to process. When it's NULL, we're done
|
|
|
|
* recursing and can just coerce and return the RHS.
|
2004-06-09 21:08:20 +02:00
|
|
|
*
|
|
|
|
* rhs is the already-transformed value to be assigned; note it has not been
|
|
|
|
* coerced to any particular type.
|
2006-03-23 01:19:30 +01:00
|
|
|
*
|
2021-01-04 17:52:00 +01:00
|
|
|
* ccontext is the coercion level to use while coercing the rhs. For
|
|
|
|
* normal statements it'll be COERCION_ASSIGNMENT, but PL/pgSQL uses
|
|
|
|
* a special value.
|
|
|
|
*
|
2006-03-23 01:19:30 +01:00
|
|
|
* location is the cursor error position for any errors. (Note: this points
|
|
|
|
* to the head of the target clause, eg "foo" in "foo.bar[baz]". Later we
|
|
|
|
* might want to decorate indirection cells with their own location info,
|
|
|
|
* in which case the location argument could probably be dropped.)
|
2004-06-09 21:08:20 +02:00
|
|
|
*/
|
2021-01-04 17:52:00 +01:00
|
|
|
Node *
|
2004-06-09 21:08:20 +02:00
|
|
|
transformAssignmentIndirection(ParseState *pstate,
|
|
|
|
Node *basenode,
|
|
|
|
const char *targetName,
|
2019-02-01 16:50:32 +01:00
|
|
|
bool targetIsSubscripting,
|
2004-06-09 21:08:20 +02:00
|
|
|
Oid targetTypeId,
|
|
|
|
int32 targetTypMod,
|
2011-03-26 19:25:48 +01:00
|
|
|
Oid targetCollation,
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
List *indirection,
|
|
|
|
ListCell *indirection_cell,
|
2006-03-23 01:19:30 +01:00
|
|
|
Node *rhs,
|
2021-01-04 17:52:00 +01:00
|
|
|
CoercionContext ccontext,
|
2006-03-23 01:19:30 +01:00
|
|
|
int location)
|
2004-06-09 21:08:20 +02:00
|
|
|
{
|
|
|
|
Node *result;
|
|
|
|
List *subscripts = NIL;
|
|
|
|
bool isSlice = false;
|
|
|
|
ListCell *i;
|
|
|
|
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
if (indirection_cell && !basenode)
|
2004-06-09 21:08:20 +02:00
|
|
|
{
|
2018-10-30 20:26:11 +01:00
|
|
|
/*
|
|
|
|
* Set up a substitution. We abuse CaseTestExpr for this. It's safe
|
|
|
|
* to do so because the only nodes that will be above the CaseTestExpr
|
2019-07-01 03:00:23 +02:00
|
|
|
* in the finished expression will be FieldStore and SubscriptingRef
|
|
|
|
* nodes. (There could be other stuff in the tree, but it will be
|
|
|
|
* within other child fields of those node types.)
|
2018-10-30 20:26:11 +01:00
|
|
|
*/
|
2004-06-09 21:08:20 +02:00
|
|
|
CaseTestExpr *ctest = makeNode(CaseTestExpr);
|
|
|
|
|
|
|
|
ctest->typeId = targetTypeId;
|
|
|
|
ctest->typeMod = targetTypMod;
|
2011-03-26 19:25:48 +01:00
|
|
|
ctest->collation = targetCollation;
|
2004-06-09 21:08:20 +02:00
|
|
|
basenode = (Node *) ctest;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We have to split any field-selection operations apart from
|
2005-10-15 04:49:52 +02:00
|
|
|
* subscripting. Adjacent A_Indices nodes have to be treated as a single
|
|
|
|
* multidimensional subscript operation.
|
2004-06-09 21:08:20 +02:00
|
|
|
*/
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
for_each_cell(i, indirection, indirection_cell)
|
2004-06-09 21:08:20 +02:00
|
|
|
{
|
2004-08-29 07:07:03 +02:00
|
|
|
Node *n = lfirst(i);
|
2004-06-09 21:08:20 +02:00
|
|
|
|
|
|
|
if (IsA(n, A_Indices))
|
|
|
|
{
|
|
|
|
subscripts = lappend(subscripts, n);
|
2015-12-23 03:05:16 +01:00
|
|
|
if (((A_Indices *) n)->is_slice)
|
2004-06-09 21:08:20 +02:00
|
|
|
isSlice = true;
|
|
|
|
}
|
2008-08-30 03:39:14 +02:00
|
|
|
else if (IsA(n, A_Star))
|
|
|
|
{
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("row expansion via \"*\" is not supported here"),
|
|
|
|
parser_errposition(pstate, location)));
|
|
|
|
}
|
2004-06-09 21:08:20 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
FieldStore *fstore;
|
2017-10-26 19:47:45 +02:00
|
|
|
Oid baseTypeId;
|
|
|
|
int32 baseTypeMod;
|
2004-08-29 07:07:03 +02:00
|
|
|
Oid typrelid;
|
|
|
|
AttrNumber attnum;
|
|
|
|
Oid fieldTypeId;
|
|
|
|
int32 fieldTypMod;
|
2011-03-26 19:25:48 +01:00
|
|
|
Oid fieldCollation;
|
2004-06-09 21:08:20 +02:00
|
|
|
|
|
|
|
Assert(IsA(n, String));
|
|
|
|
|
|
|
|
/* process subscripts before this field selection */
|
|
|
|
if (subscripts)
|
|
|
|
{
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
/* recurse, and then return because we're done */
|
|
|
|
return transformAssignmentSubscripts(pstate,
|
|
|
|
basenode,
|
2004-06-09 21:08:20 +02:00
|
|
|
targetName,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
targetTypeId,
|
2004-06-09 21:08:20 +02:00
|
|
|
targetTypMod,
|
2011-03-26 19:25:48 +01:00
|
|
|
targetCollation,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
subscripts,
|
|
|
|
isSlice,
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
indirection,
|
2004-06-09 21:08:20 +02:00
|
|
|
i,
|
2006-03-23 01:19:30 +01:00
|
|
|
rhs,
|
2021-01-04 17:52:00 +01:00
|
|
|
ccontext,
|
2006-03-23 01:19:30 +01:00
|
|
|
location);
|
2004-06-09 21:08:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* No subscripts, so can process field selection here */
|
|
|
|
|
2017-10-26 19:47:45 +02:00
|
|
|
/*
|
|
|
|
* Look up the composite type, accounting for possibility that
|
|
|
|
* what we are given is a domain over composite.
|
|
|
|
*/
|
|
|
|
baseTypeMod = targetTypMod;
|
|
|
|
baseTypeId = getBaseTypeAndTypmod(targetTypeId, &baseTypeMod);
|
|
|
|
|
|
|
|
typrelid = typeidTypeRelid(baseTypeId);
|
2004-06-09 21:08:20 +02:00
|
|
|
if (!typrelid)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
2004-10-25 05:08:29 +02:00
|
|
|
errmsg("cannot assign to field \"%s\" of column \"%s\" because its type %s is not a composite type",
|
|
|
|
strVal(n), targetName,
|
2006-03-23 01:19:30 +01:00
|
|
|
format_type_be(targetTypeId)),
|
|
|
|
parser_errposition(pstate, location)));
|
2004-06-09 21:08:20 +02:00
|
|
|
|
|
|
|
attnum = get_attnum(typrelid, strVal(n));
|
|
|
|
if (attnum == InvalidAttrNumber)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
2004-10-25 05:08:29 +02:00
|
|
|
errmsg("cannot assign to field \"%s\" of column \"%s\" because there is no such column in data type %s",
|
|
|
|
strVal(n), targetName,
|
2006-03-23 01:19:30 +01:00
|
|
|
format_type_be(targetTypeId)),
|
|
|
|
parser_errposition(pstate, location)));
|
2004-06-09 21:08:20 +02:00
|
|
|
if (attnum < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("cannot assign to system column \"%s\"",
|
2006-03-23 01:19:30 +01:00
|
|
|
strVal(n)),
|
|
|
|
parser_errposition(pstate, location)));
|
2004-06-09 21:08:20 +02:00
|
|
|
|
2011-03-26 19:25:48 +01:00
|
|
|
get_atttypetypmodcoll(typrelid, attnum,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
&fieldTypeId, &fieldTypMod, &fieldCollation);
|
2004-06-09 21:08:20 +02:00
|
|
|
|
|
|
|
/* recurse to create appropriate RHS for field assign */
|
|
|
|
rhs = transformAssignmentIndirection(pstate,
|
|
|
|
NULL,
|
|
|
|
strVal(n),
|
|
|
|
false,
|
|
|
|
fieldTypeId,
|
|
|
|
fieldTypMod,
|
2011-03-26 19:25:48 +01:00
|
|
|
fieldCollation,
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
indirection,
|
|
|
|
lnext(indirection, i),
|
2006-03-23 01:19:30 +01:00
|
|
|
rhs,
|
2021-01-04 17:52:00 +01:00
|
|
|
ccontext,
|
2006-03-23 01:19:30 +01:00
|
|
|
location);
|
2004-06-09 21:08:20 +02:00
|
|
|
|
|
|
|
/* and build a FieldStore node */
|
|
|
|
fstore = makeNode(FieldStore);
|
|
|
|
fstore->arg = (Expr *) basenode;
|
|
|
|
fstore->newvals = list_make1(rhs);
|
|
|
|
fstore->fieldnums = list_make1_int(attnum);
|
2017-10-26 19:47:45 +02:00
|
|
|
fstore->resulttype = baseTypeId;
|
|
|
|
|
|
|
|
/* If target is a domain, apply constraints */
|
|
|
|
if (baseTypeId != targetTypeId)
|
|
|
|
return coerce_to_domain((Node *) fstore,
|
|
|
|
baseTypeId, baseTypeMod,
|
|
|
|
targetTypeId,
|
|
|
|
COERCION_IMPLICIT,
|
|
|
|
COERCE_IMPLICIT_CAST,
|
|
|
|
location,
|
|
|
|
false);
|
2004-06-09 21:08:20 +02:00
|
|
|
|
|
|
|
return (Node *) fstore;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* process trailing subscripts, if any */
|
|
|
|
if (subscripts)
|
|
|
|
{
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
/* recurse, and then return because we're done */
|
|
|
|
return transformAssignmentSubscripts(pstate,
|
|
|
|
basenode,
|
2004-06-09 21:08:20 +02:00
|
|
|
targetName,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
targetTypeId,
|
2004-06-09 21:08:20 +02:00
|
|
|
targetTypMod,
|
2011-03-26 19:25:48 +01:00
|
|
|
targetCollation,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
subscripts,
|
|
|
|
isSlice,
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
indirection,
|
2004-06-09 21:08:20 +02:00
|
|
|
NULL,
|
2006-03-23 01:19:30 +01:00
|
|
|
rhs,
|
2021-01-04 17:52:00 +01:00
|
|
|
ccontext,
|
2006-03-23 01:19:30 +01:00
|
|
|
location);
|
2004-06-09 21:08:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* base case: just coerce RHS to match target type ID */
|
|
|
|
|
|
|
|
result = coerce_to_target_type(pstate,
|
|
|
|
rhs, exprType(rhs),
|
|
|
|
targetTypeId, targetTypMod,
|
2021-01-04 17:52:00 +01:00
|
|
|
ccontext,
|
2008-08-29 01:09:48 +02:00
|
|
|
COERCE_IMPLICIT_CAST,
|
|
|
|
-1);
|
2004-06-09 21:08:20 +02:00
|
|
|
if (result == NULL)
|
|
|
|
{
|
2019-02-01 16:50:32 +01:00
|
|
|
if (targetIsSubscripting)
|
2004-06-09 21:08:20 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
errmsg("subscripted assignment to \"%s\" requires type %s"
|
2004-06-09 21:08:20 +02:00
|
|
|
" but expression is of type %s",
|
|
|
|
targetName,
|
|
|
|
format_type_be(targetTypeId),
|
|
|
|
format_type_be(exprType(rhs))),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errhint("You will need to rewrite or cast the expression."),
|
2006-03-23 01:19:30 +01:00
|
|
|
parser_errposition(pstate, location)));
|
2004-06-09 21:08:20 +02:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
|
|
errmsg("subfield \"%s\" is of type %s"
|
|
|
|
" but expression is of type %s",
|
|
|
|
targetName,
|
|
|
|
format_type_be(targetTypeId),
|
|
|
|
format_type_be(exprType(rhs))),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errhint("You will need to rewrite or cast the expression."),
|
2006-03-23 01:19:30 +01:00
|
|
|
parser_errposition(pstate, location)));
|
2004-06-09 21:08:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
/*
|
2019-02-01 16:50:32 +01:00
|
|
|
* helper for transformAssignmentIndirection: process container assignment
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
*/
|
|
|
|
static Node *
|
|
|
|
transformAssignmentSubscripts(ParseState *pstate,
|
|
|
|
Node *basenode,
|
|
|
|
const char *targetName,
|
|
|
|
Oid targetTypeId,
|
|
|
|
int32 targetTypMod,
|
2011-03-26 19:25:48 +01:00
|
|
|
Oid targetCollation,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
List *subscripts,
|
|
|
|
bool isSlice,
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
List *indirection,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
ListCell *next_indirection,
|
|
|
|
Node *rhs,
|
2021-01-04 17:52:00 +01:00
|
|
|
CoercionContext ccontext,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
int location)
|
|
|
|
{
|
|
|
|
Node *result;
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
SubscriptingRef *sbsref;
|
2019-02-01 16:50:32 +01:00
|
|
|
Oid containerType;
|
|
|
|
int32 containerTypMod;
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
Oid typeNeeded;
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
int32 typmodNeeded;
|
2011-03-26 19:25:48 +01:00
|
|
|
Oid collationNeeded;
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
|
|
|
|
Assert(subscripts != NIL);
|
|
|
|
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
/* Identify the actual container type involved */
|
2019-02-01 16:50:32 +01:00
|
|
|
containerType = targetTypeId;
|
|
|
|
containerTypMod = targetTypMod;
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
transformContainerType(&containerType, &containerTypMod);
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
/* Process subscripts and identify required type for RHS */
|
|
|
|
sbsref = transformContainerSubscripts(pstate,
|
|
|
|
basenode,
|
|
|
|
containerType,
|
|
|
|
containerTypMod,
|
|
|
|
subscripts,
|
|
|
|
true);
|
|
|
|
|
|
|
|
typeNeeded = sbsref->refrestype;
|
|
|
|
typmodNeeded = sbsref->reftypmod;
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
|
2011-03-26 19:25:48 +01:00
|
|
|
/*
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
* Container normally has same collation as its elements, but there's an
|
|
|
|
* exception: we might be subscripting a domain over a container type. In
|
|
|
|
* that case use collation of the base type. (This is shaky for arbitrary
|
|
|
|
* subscripting semantics, but it doesn't matter all that much since we
|
|
|
|
* only use this to label the collation of a possible CaseTestExpr.)
|
2011-03-26 19:25:48 +01:00
|
|
|
*/
|
2019-02-01 16:50:32 +01:00
|
|
|
if (containerType == targetTypeId)
|
2011-03-26 19:25:48 +01:00
|
|
|
collationNeeded = targetCollation;
|
|
|
|
else
|
2019-02-01 16:50:32 +01:00
|
|
|
collationNeeded = get_typcollation(containerType);
|
2011-03-26 19:25:48 +01:00
|
|
|
|
2019-02-01 16:50:32 +01:00
|
|
|
/* recurse to create appropriate RHS for container assign */
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
rhs = transformAssignmentIndirection(pstate,
|
|
|
|
NULL,
|
|
|
|
targetName,
|
|
|
|
true,
|
|
|
|
typeNeeded,
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
typmodNeeded,
|
2011-03-26 19:25:48 +01:00
|
|
|
collationNeeded,
|
Represent Lists as expansible arrays, not chains of cons-cells.
Originally, Postgres Lists were a more or less exact reimplementation of
Lisp lists, which consist of chains of separately-allocated cons cells,
each having a value and a next-cell link. We'd hacked that once before
(commit d0b4399d8) to add a separate List header, but the data was still
in cons cells. That makes some operations -- notably list_nth() -- O(N),
and it's bulky because of the next-cell pointers and per-cell palloc
overhead, and it's very cache-unfriendly if the cons cells end up
scattered around rather than being adjacent.
In this rewrite, we still have List headers, but the data is in a
resizable array of values, with no next-cell links. Now we need at
most two palloc's per List, and often only one, since we can allocate
some values in the same palloc call as the List header. (Of course,
extending an existing List may require repalloc's to enlarge the array.
But this involves just O(log N) allocations not O(N).)
Of course this is not without downsides. The key difficulty is that
addition or deletion of a list entry may now cause other entries to
move, which it did not before.
For example, that breaks foreach() and sister macros, which historically
used a pointer to the current cons-cell as loop state. We can repair
those macros transparently by making their actual loop state be an
integer list index; the exposed "ListCell *" pointer is no longer state
carried across loop iterations, but is just a derived value. (In
practice, modern compilers can optimize things back to having just one
loop state value, at least for simple cases with inline loop bodies.)
In principle, this is a semantics change for cases where the loop body
inserts or deletes list entries ahead of the current loop index; but
I found no such cases in the Postgres code.
The change is not at all transparent for code that doesn't use foreach()
but chases lists "by hand" using lnext(). The largest share of such
code in the backend is in loops that were maintaining "prev" and "next"
variables in addition to the current-cell pointer, in order to delete
list cells efficiently using list_delete_cell(). However, we no longer
need a previous-cell pointer to delete a list cell efficiently. Keeping
a next-cell pointer doesn't work, as explained above, but we can improve
matters by changing such code to use a regular foreach() loop and then
using the new macro foreach_delete_current() to delete the current cell.
(This macro knows how to update the associated foreach loop's state so
that no cells will be missed in the traversal.)
There remains a nontrivial risk of code assuming that a ListCell *
pointer will remain good over an operation that could now move the list
contents. To help catch such errors, list.c can be compiled with a new
define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents
whenever that could possibly happen. This makes list operations
significantly more expensive so it's not normally turned on (though it
is on by default if USE_VALGRIND is on).
There are two notable API differences from the previous code:
* lnext() now requires the List's header pointer in addition to the
current cell's address.
* list_delete_cell() no longer requires a previous-cell argument.
These changes are somewhat unfortunate, but on the other hand code using
either function needs inspection to see if it is assuming anything
it shouldn't, so it's not all bad.
Programmers should be aware of these significant performance changes:
* list_nth() and related functions are now O(1); so there's no
major access-speed difference between a list and an array.
* Inserting or deleting a list element now takes time proportional to
the distance to the end of the list, due to moving the array elements.
(However, it typically *doesn't* require palloc or pfree, so except in
long lists it's probably still faster than before.) Notably, lcons()
used to be about the same cost as lappend(), but that's no longer true
if the list is long. Code that uses lcons() and list_delete_first()
to maintain a stack might usefully be rewritten to push and pop at the
end of the list rather than the beginning.
* There are now list_insert_nth...() and list_delete_nth...() functions
that add or remove a list cell identified by index. These have the
data-movement penalty explained above, but there's no search penalty.
* list_concat() and variants now copy the second list's data into
storage belonging to the first list, so there is no longer any
sharing of cells between the input lists. The second argument is
now declared "const List *" to reflect that it isn't changed.
This patch just does the minimum needed to get the new implementation
in place and fix bugs exposed by the regression tests. As suggested
by the foregoing, there's a fair amount of followup work remaining to
do.
Also, the ENABLE_LIST_COMPAT macros are finally removed in this
commit. Code using those should have been gone a dozen years ago.
Patch by me; thanks to David Rowley, Jesper Pedersen, and others
for review.
Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
|
|
|
indirection,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
next_indirection,
|
|
|
|
rhs,
|
2021-01-04 17:52:00 +01:00
|
|
|
ccontext,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
location);
|
|
|
|
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
/*
|
|
|
|
* Insert the already-properly-coerced RHS into the SubscriptingRef. Then
|
|
|
|
* set refrestype and reftypmod back to the container type's values.
|
|
|
|
*/
|
|
|
|
sbsref->refassgnexpr = (Expr *) rhs;
|
|
|
|
sbsref->refrestype = containerType;
|
|
|
|
sbsref->reftypmod = containerTypMod;
|
|
|
|
|
|
|
|
result = (Node *) sbsref;
|
2019-02-01 16:50:32 +01:00
|
|
|
|
|
|
|
/* If target was a domain over container, need to coerce up to the domain */
|
|
|
|
if (containerType != targetTypeId)
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
{
|
Fix array slicing of int2vector and oidvector values.
The previous coding labeled expressions such as pg_index.indkey[1:3] as
being of int2vector type; which is not right because the subscript bounds
of such a result don't, in general, satisfy the restrictions of int2vector.
To fix, implicitly promote the result of slicing int2vector to int2[],
or oidvector to oid[]. This is similar to what we've done with domains
over arrays, which is a good analogy because these types are very much
like restricted domains of the corresponding regular-array types.
A side-effect is that we now also forbid array-element updates on such
columns, eg while "update pg_index set indkey[4] = 42" would have worked
before if you were superuser (and corrupted your catalogs irretrievably,
no doubt) it's now disallowed. This seems like a good thing since, again,
some choices of subscripting would've led to results not satisfying the
restrictions of int2vector. The case of an array-slice update was
rejected before, though with a different error message than you get now.
We could make these cases work in future if we added a cast from int2[]
to int2vector (with a cast function checking the subscript restrictions)
but it seems unlikely that there's any value in that.
Per report from Ronan Dunklau. Back-patch to all supported branches
because of the crash risks involved.
2013-11-24 02:03:56 +01:00
|
|
|
Oid resulttype = exprType(result);
|
|
|
|
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
result = coerce_to_target_type(pstate,
|
Fix array slicing of int2vector and oidvector values.
The previous coding labeled expressions such as pg_index.indkey[1:3] as
being of int2vector type; which is not right because the subscript bounds
of such a result don't, in general, satisfy the restrictions of int2vector.
To fix, implicitly promote the result of slicing int2vector to int2[],
or oidvector to oid[]. This is similar to what we've done with domains
over arrays, which is a good analogy because these types are very much
like restricted domains of the corresponding regular-array types.
A side-effect is that we now also forbid array-element updates on such
columns, eg while "update pg_index set indkey[4] = 42" would have worked
before if you were superuser (and corrupted your catalogs irretrievably,
no doubt) it's now disallowed. This seems like a good thing since, again,
some choices of subscripting would've led to results not satisfying the
restrictions of int2vector. The case of an array-slice update was
rejected before, though with a different error message than you get now.
We could make these cases work in future if we added a cast from int2[]
to int2vector (with a cast function checking the subscript restrictions)
but it seems unlikely that there's any value in that.
Per report from Ronan Dunklau. Back-patch to all supported branches
because of the crash risks involved.
2013-11-24 02:03:56 +01:00
|
|
|
result, resulttype,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
targetTypeId, targetTypMod,
|
2021-01-04 17:52:00 +01:00
|
|
|
ccontext,
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
COERCE_IMPLICIT_CAST,
|
|
|
|
-1);
|
Fix array slicing of int2vector and oidvector values.
The previous coding labeled expressions such as pg_index.indkey[1:3] as
being of int2vector type; which is not right because the subscript bounds
of such a result don't, in general, satisfy the restrictions of int2vector.
To fix, implicitly promote the result of slicing int2vector to int2[],
or oidvector to oid[]. This is similar to what we've done with domains
over arrays, which is a good analogy because these types are very much
like restricted domains of the corresponding regular-array types.
A side-effect is that we now also forbid array-element updates on such
columns, eg while "update pg_index set indkey[4] = 42" would have worked
before if you were superuser (and corrupted your catalogs irretrievably,
no doubt) it's now disallowed. This seems like a good thing since, again,
some choices of subscripting would've led to results not satisfying the
restrictions of int2vector. The case of an array-slice update was
rejected before, though with a different error message than you get now.
We could make these cases work in future if we added a cast from int2[]
to int2vector (with a cast function checking the subscript restrictions)
but it seems unlikely that there's any value in that.
Per report from Ronan Dunklau. Back-patch to all supported branches
because of the crash risks involved.
2013-11-24 02:03:56 +01:00
|
|
|
/* can fail if we had int2vector/oidvector, but not for true domains */
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
if (result == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CANNOT_COERCE),
|
|
|
|
errmsg("cannot cast type %s to %s",
|
Fix array slicing of int2vector and oidvector values.
The previous coding labeled expressions such as pg_index.indkey[1:3] as
being of int2vector type; which is not right because the subscript bounds
of such a result don't, in general, satisfy the restrictions of int2vector.
To fix, implicitly promote the result of slicing int2vector to int2[],
or oidvector to oid[]. This is similar to what we've done with domains
over arrays, which is a good analogy because these types are very much
like restricted domains of the corresponding regular-array types.
A side-effect is that we now also forbid array-element updates on such
columns, eg while "update pg_index set indkey[4] = 42" would have worked
before if you were superuser (and corrupted your catalogs irretrievably,
no doubt) it's now disallowed. This seems like a good thing since, again,
some choices of subscripting would've led to results not satisfying the
restrictions of int2vector. The case of an array-slice update was
rejected before, though with a different error message than you get now.
We could make these cases work in future if we added a cast from int2[]
to int2vector (with a cast function checking the subscript restrictions)
but it seems unlikely that there's any value in that.
Per report from Ronan Dunklau. Back-patch to all supported branches
because of the crash risks involved.
2013-11-24 02:03:56 +01:00
|
|
|
format_type_be(resulttype),
|
Improve handling of domains over arrays.
This patch eliminates various bizarre behaviors caused by sloppy thinking
about the difference between a domain type and its underlying array type.
In particular, the operation of updating one element of such an array
has to be considered as yielding a value of the underlying array type,
*not* a value of the domain, because there's no assurance that the
domain's CHECK constraints are still satisfied. If we're intending to
store the result back into a domain column, we have to re-cast to the
domain type so that constraints are re-checked.
For similar reasons, such a domain can't be blindly matched to an ANYARRAY
polymorphic parameter, because the polymorphic function is likely to apply
array-ish operations that could invalidate the domain constraints. For the
moment, we just forbid such matching. We might later wish to insert an
automatic downcast to the underlying array type, but such a change should
also change matching of domains to ANYELEMENT for consistency.
To ensure that all such logic is rechecked, this patch removes the original
hack of setting a domain's pg_type.typelem field to match its base type;
the typelem will always be zero instead. In those places where it's really
okay to look through the domain type with no other logic changes, use the
newly added get_base_element_type function in place of get_element_type.
catversion bumped due to change in pg_type contents.
Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
|
|
|
format_type_be(targetTypeId)),
|
|
|
|
parser_errposition(pstate, location)));
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
1997-11-25 23:07:18 +01:00
|
|
|
|
|
|
|
/*
|
1999-11-01 06:06:21 +01:00
|
|
|
* checkInsertTargets -
|
2002-03-21 17:02:16 +01:00
|
|
|
* generate a list of INSERT column targets if not supplied, or
|
1999-07-19 02:26:20 +02:00
|
|
|
* test supplied column names to make sure they are in target table.
|
1999-11-01 06:06:21 +01:00
|
|
|
* Also return an integer list of the columns' attribute numbers.
|
1997-11-25 23:07:18 +01:00
|
|
|
*/
|
|
|
|
List *
|
1999-11-01 06:06:21 +01:00
|
|
|
checkInsertTargets(ParseState *pstate, List *cols, List **attrnos)
|
1997-11-25 23:07:18 +01:00
|
|
|
{
|
1999-11-01 06:06:21 +01:00
|
|
|
*attrnos = NIL;
|
|
|
|
|
1997-11-25 23:07:18 +01:00
|
|
|
if (cols == NIL)
|
|
|
|
{
|
1999-07-19 02:26:20 +02:00
|
|
|
/*
|
|
|
|
* Generate default column list for INSERT.
|
|
|
|
*/
|
2018-04-07 22:00:39 +02:00
|
|
|
int numcol = RelationGetNumberOfAttributes(pstate->p_target_relation);
|
|
|
|
|
1999-07-19 02:26:20 +02:00
|
|
|
int i;
|
1997-11-25 23:07:18 +01:00
|
|
|
|
|
|
|
for (i = 0; i < numcol; i++)
|
|
|
|
{
|
2002-09-04 22:31:48 +02:00
|
|
|
ResTarget *col;
|
2017-08-20 20:19:07 +02:00
|
|
|
Form_pg_attribute attr;
|
1997-11-25 23:07:18 +01:00
|
|
|
|
2017-08-20 20:19:07 +02:00
|
|
|
attr = TupleDescAttr(pstate->p_target_relation->rd_att, i);
|
|
|
|
|
|
|
|
if (attr->attisdropped)
|
2002-08-02 20:15:10 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
col = makeNode(ResTarget);
|
2017-08-20 20:19:07 +02:00
|
|
|
col->name = pstrdup(NameStr(attr->attname));
|
2002-03-21 17:02:16 +01:00
|
|
|
col->indirection = NIL;
|
|
|
|
col->val = NULL;
|
2006-03-23 01:19:30 +01:00
|
|
|
col->location = -1;
|
2002-03-21 17:02:16 +01:00
|
|
|
cols = lappend(cols, col);
|
2004-05-31 01:40:41 +02:00
|
|
|
*attrnos = lappend_int(*attrnos, i + 1);
|
1997-11-25 23:07:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
1999-07-19 02:26:20 +02:00
|
|
|
/*
|
|
|
|
* Do initial validation of user-supplied INSERT column list.
|
|
|
|
*/
|
2005-03-26 07:28:59 +01:00
|
|
|
Bitmapset *wholecols = NULL;
|
|
|
|
Bitmapset *partialcols = NULL;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *tl;
|
1999-07-19 02:26:20 +02:00
|
|
|
|
1997-11-25 23:07:18 +01:00
|
|
|
foreach(tl, cols)
|
|
|
|
{
|
2004-06-09 21:08:20 +02:00
|
|
|
ResTarget *col = (ResTarget *) lfirst(tl);
|
|
|
|
char *name = col->name;
|
1999-11-01 06:06:21 +01:00
|
|
|
int attrno;
|
1998-02-26 05:46:47 +01:00
|
|
|
|
2003-07-19 22:20:53 +02:00
|
|
|
/* Lookup column name, ereport on failure */
|
2002-08-02 20:15:10 +02:00
|
|
|
attrno = attnameAttNum(pstate->p_target_relation, name, false);
|
2006-03-23 01:19:30 +01:00
|
|
|
if (attrno == InvalidAttrNumber)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errmsg("column \"%s\" of relation \"%s\" does not exist",
|
|
|
|
name,
|
|
|
|
RelationGetRelationName(pstate->p_target_relation)),
|
2006-03-23 01:19:30 +01:00
|
|
|
parser_errposition(pstate, col->location)));
|
2004-06-09 21:08:20 +02:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Check for duplicates, but only of whole columns --- we allow
|
|
|
|
* INSERT INTO foo (col.subcol1, col.subcol2)
|
2004-06-09 21:08:20 +02:00
|
|
|
*/
|
|
|
|
if (col->indirection == NIL)
|
|
|
|
{
|
|
|
|
/* whole column; must not have any other assignment */
|
2005-03-26 07:28:59 +01:00
|
|
|
if (bms_is_member(attrno, wholecols) ||
|
|
|
|
bms_is_member(attrno, partialcols))
|
2004-06-09 21:08:20 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DUPLICATE_COLUMN),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("column \"%s\" specified more than once",
|
2006-03-23 01:19:30 +01:00
|
|
|
name),
|
|
|
|
parser_errposition(pstate, col->location)));
|
2005-03-26 07:28:59 +01:00
|
|
|
wholecols = bms_add_member(wholecols, attrno);
|
2004-06-09 21:08:20 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* partial column; must not have any whole assignment */
|
2005-03-26 07:28:59 +01:00
|
|
|
if (bms_is_member(attrno, wholecols))
|
2004-06-09 21:08:20 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DUPLICATE_COLUMN),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("column \"%s\" specified more than once",
|
2006-03-23 01:19:30 +01:00
|
|
|
name),
|
|
|
|
parser_errposition(pstate, col->location)));
|
2005-03-26 07:28:59 +01:00
|
|
|
partialcols = bms_add_member(partialcols, attrno);
|
2004-06-09 21:08:20 +02:00
|
|
|
}
|
|
|
|
|
2004-05-31 01:40:41 +02:00
|
|
|
*attrnos = lappend_int(*attrnos, attrno);
|
1997-11-25 23:07:18 +01:00
|
|
|
}
|
|
|
|
}
|
1998-02-26 05:46:47 +01:00
|
|
|
|
1997-11-25 23:07:18 +01:00
|
|
|
return cols;
|
|
|
|
}
|
|
|
|
|
2004-06-19 20:19:56 +02:00
|
|
|
/*
|
|
|
|
* ExpandColumnRefStar()
|
2006-06-26 19:24:41 +02:00
|
|
|
* Transforms foo.* into a list of expressions or targetlist entries.
|
2004-06-19 20:19:56 +02:00
|
|
|
*
|
2008-08-30 03:39:14 +02:00
|
|
|
* This handles the case where '*' appears as the last or only item in a
|
2006-06-26 19:24:41 +02:00
|
|
|
* ColumnRef. The code is shared between the case of foo.* at the top level
|
|
|
|
* in a SELECT target list (where we want TargetEntry nodes in the result)
|
2006-08-02 03:59:48 +02:00
|
|
|
* and foo.* in a ROW() or VALUES() construct (where we want just bare
|
|
|
|
* expressions).
|
2009-01-22 21:16:10 +01:00
|
|
|
*
|
|
|
|
* The referenced columns are marked as requiring SELECT access.
|
2004-06-19 20:19:56 +02:00
|
|
|
*/
|
2006-08-02 03:59:48 +02:00
|
|
|
static List *
|
2006-06-26 19:24:41 +02:00
|
|
|
ExpandColumnRefStar(ParseState *pstate, ColumnRef *cref,
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
bool make_target_entry)
|
2004-06-19 20:19:56 +02:00
|
|
|
{
|
|
|
|
List *fields = cref->fields;
|
|
|
|
int numnames = list_length(fields);
|
|
|
|
|
|
|
|
if (numnames == 1)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Target item is a bare '*', expand all tables
|
|
|
|
*
|
|
|
|
* (e.g., SELECT * FROM emp, dept)
|
2006-06-26 19:24:41 +02:00
|
|
|
*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Since the grammar only accepts bare '*' at top level of SELECT, we
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
* need not handle the make_target_entry==false case here.
|
2004-06-19 20:19:56 +02:00
|
|
|
*/
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
Assert(make_target_entry);
|
2008-09-01 22:42:46 +02:00
|
|
|
return ExpandAllTables(pstate, cref->location);
|
2004-06-19 20:19:56 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Target item is relation.*, expand that table
|
|
|
|
*
|
|
|
|
* (e.g., SELECT emp.*, dname FROM emp, dept)
|
2009-10-31 02:41:31 +01:00
|
|
|
*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Note: this code is a lot like transformColumnRef; it's tempting to
|
|
|
|
* call that instead and then replace the resulting whole-row Var with
|
2014-05-06 18:12:18 +02:00
|
|
|
* a list of Vars. However, that would leave us with the RTE's
|
2010-02-26 03:01:40 +01:00
|
|
|
* selectedCols bitmap showing the whole row as needing select
|
|
|
|
* permission, as well as the individual columns. That would be
|
|
|
|
* incorrect (since columns added later shouldn't need select
|
2009-10-31 02:41:31 +01:00
|
|
|
* permissions). We could try to remove the whole-row permission bit
|
|
|
|
* after the fact, but duplicating code is less messy.
|
2004-06-19 20:19:56 +02:00
|
|
|
*/
|
2009-10-31 02:41:31 +01:00
|
|
|
char *nspname = NULL;
|
|
|
|
char *relname = NULL;
|
2019-12-26 17:16:42 +01:00
|
|
|
ParseNamespaceItem *nsitem = NULL;
|
2009-10-31 02:41:31 +01:00
|
|
|
int levels_up;
|
2010-02-26 03:01:40 +01:00
|
|
|
enum
|
|
|
|
{
|
2009-10-31 02:41:31 +01:00
|
|
|
CRSERR_NO_RTE,
|
|
|
|
CRSERR_WRONG_DB,
|
|
|
|
CRSERR_TOO_MANY
|
|
|
|
} crserr = CRSERR_NO_RTE;
|
|
|
|
|
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Give the PreParseColumnRefHook, if any, first shot. If it returns
|
2009-10-31 02:41:31 +01:00
|
|
|
* non-null then we should use that expression.
|
|
|
|
*/
|
|
|
|
if (pstate->p_pre_columnref_hook != NULL)
|
|
|
|
{
|
|
|
|
Node *node;
|
|
|
|
|
2017-09-07 18:06:23 +02:00
|
|
|
node = pstate->p_pre_columnref_hook(pstate, cref);
|
2009-10-31 02:41:31 +01:00
|
|
|
if (node != NULL)
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
return ExpandRowReference(pstate, node, make_target_entry);
|
2009-10-31 02:41:31 +01:00
|
|
|
}
|
2004-06-19 20:19:56 +02:00
|
|
|
|
|
|
|
switch (numnames)
|
|
|
|
{
|
|
|
|
case 2:
|
|
|
|
relname = strVal(linitial(fields));
|
2019-12-26 17:16:42 +01:00
|
|
|
nsitem = refnameNamespaceItem(pstate, nspname, relname,
|
|
|
|
cref->location,
|
|
|
|
&levels_up);
|
2004-06-19 20:19:56 +02:00
|
|
|
break;
|
|
|
|
case 3:
|
2009-10-31 02:41:31 +01:00
|
|
|
nspname = strVal(linitial(fields));
|
2004-06-19 20:19:56 +02:00
|
|
|
relname = strVal(lsecond(fields));
|
2019-12-26 17:16:42 +01:00
|
|
|
nsitem = refnameNamespaceItem(pstate, nspname, relname,
|
|
|
|
cref->location,
|
|
|
|
&levels_up);
|
2004-06-19 20:19:56 +02:00
|
|
|
break;
|
|
|
|
case 4:
|
2004-08-29 07:07:03 +02:00
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
char *catname = strVal(linitial(fields));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We check the catalog name and then ignore it.
|
|
|
|
*/
|
|
|
|
if (strcmp(catname, get_database_name(MyDatabaseId)) != 0)
|
|
|
|
{
|
|
|
|
crserr = CRSERR_WRONG_DB;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
nspname = strVal(lsecond(fields));
|
|
|
|
relname = strVal(lthird(fields));
|
2019-12-26 17:16:42 +01:00
|
|
|
nsitem = refnameNamespaceItem(pstate, nspname, relname,
|
|
|
|
cref->location,
|
|
|
|
&levels_up);
|
2004-08-29 07:07:03 +02:00
|
|
|
break;
|
|
|
|
}
|
2004-06-19 20:19:56 +02:00
|
|
|
default:
|
2009-10-31 02:41:31 +01:00
|
|
|
crserr = CRSERR_TOO_MANY;
|
2004-06-19 20:19:56 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2009-10-31 02:41:31 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Now give the PostParseColumnRefHook, if any, a chance. We cheat a
|
|
|
|
* bit by passing the RangeTblEntry, not a Var, as the planned
|
|
|
|
* translation. (A single Var wouldn't be strictly correct anyway.
|
|
|
|
* This convention allows hooks that really care to know what is
|
2019-12-26 17:16:42 +01:00
|
|
|
* happening. It might be better to pass the nsitem, but we'd have to
|
|
|
|
* promote that struct to a full-fledged Node type so that callees
|
|
|
|
* could identify its type.)
|
2009-10-31 02:41:31 +01:00
|
|
|
*/
|
|
|
|
if (pstate->p_post_columnref_hook != NULL)
|
2006-06-26 19:24:41 +02:00
|
|
|
{
|
2009-10-31 02:41:31 +01:00
|
|
|
Node *node;
|
2009-01-22 21:16:10 +01:00
|
|
|
|
2017-09-07 18:06:23 +02:00
|
|
|
node = pstate->p_post_columnref_hook(pstate, cref,
|
2019-12-26 17:16:42 +01:00
|
|
|
(Node *) (nsitem ? nsitem->p_rte : NULL));
|
2009-10-31 02:41:31 +01:00
|
|
|
if (node != NULL)
|
2009-01-22 21:16:10 +01:00
|
|
|
{
|
2019-12-26 17:16:42 +01:00
|
|
|
if (nsitem != NULL)
|
2009-10-31 02:41:31 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_AMBIGUOUS_COLUMN),
|
|
|
|
errmsg("column reference \"%s\" is ambiguous",
|
|
|
|
NameListToString(cref->fields)),
|
|
|
|
parser_errposition(pstate, cref->location)));
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
return ExpandRowReference(pstate, node, make_target_entry);
|
2009-01-22 21:16:10 +01:00
|
|
|
}
|
2009-10-31 02:41:31 +01:00
|
|
|
}
|
2009-01-22 21:16:10 +01:00
|
|
|
|
2009-10-31 02:41:31 +01:00
|
|
|
/*
|
|
|
|
* Throw error if no translation found.
|
|
|
|
*/
|
2019-12-26 17:16:42 +01:00
|
|
|
if (nsitem == NULL)
|
2009-10-31 02:41:31 +01:00
|
|
|
{
|
|
|
|
switch (crserr)
|
|
|
|
{
|
|
|
|
case CRSERR_NO_RTE:
|
|
|
|
errorMissingRTE(pstate, makeRangeVar(nspname, relname,
|
|
|
|
cref->location));
|
|
|
|
break;
|
|
|
|
case CRSERR_WRONG_DB:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("cross-database references are not implemented: %s",
|
|
|
|
NameListToString(cref->fields)),
|
|
|
|
parser_errposition(pstate, cref->location)));
|
|
|
|
break;
|
|
|
|
case CRSERR_TOO_MANY:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("improper qualified name (too many dotted names): %s",
|
|
|
|
NameListToString(cref->fields)),
|
|
|
|
parser_errposition(pstate, cref->location)));
|
|
|
|
break;
|
|
|
|
}
|
2006-06-26 19:24:41 +02:00
|
|
|
}
|
2009-10-31 02:41:31 +01:00
|
|
|
|
|
|
|
/*
|
2019-12-26 17:16:42 +01:00
|
|
|
* OK, expand the nsitem into fields.
|
2009-10-31 02:41:31 +01:00
|
|
|
*/
|
2019-12-26 17:16:42 +01:00
|
|
|
return ExpandSingleTable(pstate, nsitem, levels_up, cref->location,
|
|
|
|
make_target_entry);
|
2004-06-19 20:19:56 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExpandAllTables()
|
2006-06-26 19:24:41 +02:00
|
|
|
* Transforms '*' (in the target list) into a list of targetlist entries.
|
2000-09-12 23:07:18 +02:00
|
|
|
*
|
2012-08-08 22:41:04 +02:00
|
|
|
* tlist entries are generated for each relation visible for unqualified
|
2014-05-06 18:12:18 +02:00
|
|
|
* column name access. We do not consider qualified-name-only entries because
|
2012-08-08 22:41:04 +02:00
|
|
|
* that would include input tables of aliasless JOINs, NEW/OLD pseudo-entries,
|
|
|
|
* etc.
|
2009-01-22 21:16:10 +01:00
|
|
|
*
|
|
|
|
* The referenced relations/columns are marked as requiring SELECT access.
|
1997-11-25 23:07:18 +01:00
|
|
|
*/
|
1997-11-26 04:43:18 +01:00
|
|
|
static List *
|
2008-09-01 22:42:46 +02:00
|
|
|
ExpandAllTables(ParseState *pstate, int location)
|
1997-11-25 23:07:18 +01:00
|
|
|
{
|
|
|
|
List *target = NIL;
|
2012-08-08 22:41:04 +02:00
|
|
|
bool found_table = false;
|
2005-06-05 02:38:11 +02:00
|
|
|
ListCell *l;
|
1997-11-25 23:07:18 +01:00
|
|
|
|
2012-08-08 22:41:04 +02:00
|
|
|
foreach(l, pstate->p_namespace)
|
2005-06-05 02:38:11 +02:00
|
|
|
{
|
2012-08-08 01:02:54 +02:00
|
|
|
ParseNamespaceItem *nsitem = (ParseNamespaceItem *) lfirst(l);
|
2002-03-12 01:52:10 +01:00
|
|
|
|
2012-08-08 22:41:04 +02:00
|
|
|
/* Ignore table-only items */
|
|
|
|
if (!nsitem->p_cols_visible)
|
|
|
|
continue;
|
2012-08-08 01:02:54 +02:00
|
|
|
/* Should not have any lateral-only items when parsing targetlist */
|
|
|
|
Assert(!nsitem->p_lateral_only);
|
2012-08-08 22:41:04 +02:00
|
|
|
/* Remember we found a p_cols_visible item */
|
|
|
|
found_table = true;
|
2012-08-08 01:02:54 +02:00
|
|
|
|
2004-08-19 22:57:41 +02:00
|
|
|
target = list_concat(target,
|
2019-12-26 17:16:42 +01:00
|
|
|
expandNSItemAttrs(pstate,
|
|
|
|
nsitem,
|
|
|
|
0,
|
|
|
|
location));
|
1997-11-25 23:07:18 +01:00
|
|
|
}
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2012-08-08 22:41:04 +02:00
|
|
|
/*
|
|
|
|
* Check for "SELECT *;". We do it this way, rather than checking for
|
|
|
|
* target == NIL, because we want to allow SELECT * FROM a zero_column
|
|
|
|
* table.
|
|
|
|
*/
|
|
|
|
if (!found_table)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("SELECT * with no tables specified is not valid"),
|
|
|
|
parser_errposition(pstate, location)));
|
|
|
|
|
1997-11-25 23:07:18 +01:00
|
|
|
return target;
|
|
|
|
}
|
|
|
|
|
2004-06-19 20:19:56 +02:00
|
|
|
/*
|
|
|
|
* ExpandIndirectionStar()
|
2006-06-26 19:24:41 +02:00
|
|
|
* Transforms foo.* into a list of expressions or targetlist entries.
|
2004-06-19 20:19:56 +02:00
|
|
|
*
|
|
|
|
* This handles the case where '*' appears as the last item in A_Indirection.
|
2006-06-26 19:24:41 +02:00
|
|
|
* The code is shared between the case of foo.* at the top level in a SELECT
|
|
|
|
* target list (where we want TargetEntry nodes in the result) and foo.* in
|
2006-08-02 03:59:48 +02:00
|
|
|
* a ROW() or VALUES() construct (where we want just bare expressions).
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
* For robustness, we use a separate "make_target_entry" flag to control
|
|
|
|
* this rather than relying on exprKind.
|
2004-06-19 20:19:56 +02:00
|
|
|
*/
|
2006-08-02 03:59:48 +02:00
|
|
|
static List *
|
2006-06-26 19:24:41 +02:00
|
|
|
ExpandIndirectionStar(ParseState *pstate, A_Indirection *ind,
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
bool make_target_entry, ParseExprKind exprKind)
|
2004-06-19 20:19:56 +02:00
|
|
|
{
|
|
|
|
Node *expr;
|
|
|
|
|
|
|
|
/* Strip off the '*' to create a reference to the rowtype object */
|
|
|
|
ind = copyObject(ind);
|
|
|
|
ind->indirection = list_truncate(ind->indirection,
|
|
|
|
list_length(ind->indirection) - 1);
|
|
|
|
|
|
|
|
/* And transform that */
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
expr = transformExpr(pstate, (Node *) ind, exprKind);
|
2004-06-19 20:19:56 +02:00
|
|
|
|
2009-10-31 02:41:31 +01:00
|
|
|
/* Expand the rowtype expression into individual fields */
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
return ExpandRowReference(pstate, expr, make_target_entry);
|
2009-10-31 02:41:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExpandSingleTable()
|
|
|
|
* Transforms foo.* into a list of expressions or targetlist entries.
|
|
|
|
*
|
|
|
|
* This handles the case where foo has been determined to be a simple
|
|
|
|
* reference to an RTE, so we can just generate Vars for the expressions.
|
|
|
|
*
|
|
|
|
* The referenced columns are marked as requiring SELECT access.
|
|
|
|
*/
|
|
|
|
static List *
|
2019-12-26 17:16:42 +01:00
|
|
|
ExpandSingleTable(ParseState *pstate, ParseNamespaceItem *nsitem,
|
|
|
|
int sublevels_up, int location, bool make_target_entry)
|
2009-10-31 02:41:31 +01:00
|
|
|
{
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
if (make_target_entry)
|
2009-10-31 02:41:31 +01:00
|
|
|
{
|
2019-12-26 17:16:42 +01:00
|
|
|
/* expandNSItemAttrs handles permissions marking */
|
|
|
|
return expandNSItemAttrs(pstate, nsitem, sublevels_up, location);
|
2009-10-31 02:41:31 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-12-26 17:16:42 +01:00
|
|
|
RangeTblEntry *rte = nsitem->p_rte;
|
2009-10-31 02:41:31 +01:00
|
|
|
List *vars;
|
|
|
|
ListCell *l;
|
|
|
|
|
Make parser rely more heavily on the ParseNamespaceItem data structure.
When I added the ParseNamespaceItem data structure (in commit 5ebaaa494),
it wasn't very tightly integrated into the parser's APIs. In the wake of
adding p_rtindex to that struct (commit b541e9acc), there is a good reason
to make more use of it: by passing around ParseNamespaceItem pointers
instead of bare RTE pointers, we can get rid of various messy methods for
passing back or deducing the rangetable index of an RTE during parsing.
Hence, refactor the addRangeTableEntryXXX functions to build and return
a ParseNamespaceItem struct, not just the RTE proper; and replace
addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem
rather than building one internally.
Also, add per-column data (a ParseNamespaceColumn array) to each
ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX,
where we have column type data at hand so that it's nearly free to fill
the data structure. Later, when we need to build Vars referencing RTEs,
we can use the ParseNamespaceColumn info to avoid the rather expensive
operations done in get_rte_attribute_type() or expandRTE().
get_rte_attribute_type() is indeed dead code now, so I've removed it.
This makes for a useful improvement in parse analysis speed, around 20%
in one moderately-complex test query.
The ParseNamespaceColumn structs also include Var identity information
(varno/varattno). That info isn't actually being used in this patch,
except that p_varno == 0 is a handy test for a dropped column.
A follow-on patch will make more use of it.
Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
|
|
|
vars = expandNSItemVars(nsitem, sublevels_up, location, NULL);
|
2009-10-31 02:41:31 +01:00
|
|
|
|
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Require read access to the table. This is normally redundant with
|
|
|
|
* the markVarForSelectPriv calls below, but not if the table has zero
|
Fix mishandling of column-level SELECT privileges for join aliases.
scanNSItemForColumn, expandNSItemAttrs, and ExpandSingleTable would
pass the wrong RTE to markVarForSelectPriv when dealing with a join
ParseNamespaceItem: they'd pass the join RTE, when what we need to
mark is the base table that the join column came from. The end
result was to not fill the base table's selectedCols bitmap correctly,
resulting in an understatement of the set of columns that are read
by the query. The executor would still insist on there being at
least one selectable column; but with a correctly crafted query,
a user having SELECT privilege on just one column of a table would
nonetheless be allowed to read all its columns.
To fix, make markRTEForSelectPriv fetch the correct RTE for itself,
ignoring the possibly-mismatched RTE passed by the caller. Later,
we'll get rid of some now-unused RTE arguments, but that risks
API breaks so we won't do it in released branches.
This problem was introduced by commit 9ce77d75c, so back-patch
to v13 where that came in. Thanks to Sven Klemm for reporting
the problem.
Security: CVE-2021-20229
2021-02-08 16:14:09 +01:00
|
|
|
* columns. We need not do anything if the nsitem is for a join: its
|
|
|
|
* component tables will have been marked ACL_SELECT when they were
|
|
|
|
* added to the rangetable. (This step changes things only for the
|
|
|
|
* target relation of UPDATE/DELETE, which cannot be under a join.)
|
2009-10-31 02:41:31 +01:00
|
|
|
*/
|
Fix mishandling of column-level SELECT privileges for join aliases.
scanNSItemForColumn, expandNSItemAttrs, and ExpandSingleTable would
pass the wrong RTE to markVarForSelectPriv when dealing with a join
ParseNamespaceItem: they'd pass the join RTE, when what we need to
mark is the base table that the join column came from. The end
result was to not fill the base table's selectedCols bitmap correctly,
resulting in an understatement of the set of columns that are read
by the query. The executor would still insist on there being at
least one selectable column; but with a correctly crafted query,
a user having SELECT privilege on just one column of a table would
nonetheless be allowed to read all its columns.
To fix, make markRTEForSelectPriv fetch the correct RTE for itself,
ignoring the possibly-mismatched RTE passed by the caller. Later,
we'll get rid of some now-unused RTE arguments, but that risks
API breaks so we won't do it in released branches.
This problem was introduced by commit 9ce77d75c, so back-patch
to v13 where that came in. Thanks to Sven Klemm for reporting
the problem.
Security: CVE-2021-20229
2021-02-08 16:14:09 +01:00
|
|
|
if (rte->rtekind == RTE_RELATION)
|
|
|
|
rte->requiredPerms |= ACL_SELECT;
|
2009-10-31 02:41:31 +01:00
|
|
|
|
|
|
|
/* Require read access to each column */
|
|
|
|
foreach(l, vars)
|
|
|
|
{
|
|
|
|
Var *var = (Var *) lfirst(l);
|
|
|
|
|
|
|
|
markVarForSelectPriv(pstate, var, rte);
|
|
|
|
}
|
|
|
|
|
|
|
|
return vars;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExpandRowReference()
|
|
|
|
* Transforms foo.* into a list of expressions or targetlist entries.
|
|
|
|
*
|
|
|
|
* This handles the case where foo is an arbitrary expression of composite
|
|
|
|
* type.
|
|
|
|
*/
|
|
|
|
static List *
|
|
|
|
ExpandRowReference(ParseState *pstate, Node *expr,
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
bool make_target_entry)
|
2009-10-31 02:41:31 +01:00
|
|
|
{
|
|
|
|
List *result = NIL;
|
|
|
|
TupleDesc tupleDesc;
|
|
|
|
int numAttrs;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the rowtype expression is a whole-row Var, we can expand the fields
|
2014-05-06 18:12:18 +02:00
|
|
|
* as simple Vars. Note: if the RTE is a relation, this case leaves us
|
2009-10-31 02:41:31 +01:00
|
|
|
* with the RTE's selectedCols bitmap showing the whole row as needing
|
|
|
|
* select permission, as well as the individual columns. However, we can
|
|
|
|
* only get here for weird notations like (table.*).*, so it's not worth
|
|
|
|
* trying to clean up --- arguably, the permissions marking is correct
|
|
|
|
* anyway for such cases.
|
|
|
|
*/
|
|
|
|
if (IsA(expr, Var) &&
|
|
|
|
((Var *) expr)->varattno == InvalidAttrNumber)
|
|
|
|
{
|
|
|
|
Var *var = (Var *) expr;
|
2019-12-26 17:16:42 +01:00
|
|
|
ParseNamespaceItem *nsitem;
|
2009-10-31 02:41:31 +01:00
|
|
|
|
2019-12-26 17:16:42 +01:00
|
|
|
nsitem = GetNSItemByRangeTablePosn(pstate, var->varno, var->varlevelsup);
|
|
|
|
return ExpandSingleTable(pstate, nsitem, var->varlevelsup, var->location, make_target_entry);
|
2009-10-31 02:41:31 +01:00
|
|
|
}
|
|
|
|
|
2005-04-25 23:03:25 +02:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Otherwise we have to do it the hard way. Our current implementation is
|
|
|
|
* to generate multiple copies of the expression and do FieldSelects.
|
2009-10-31 02:41:31 +01:00
|
|
|
* (This can be pretty inefficient if the expression involves nontrivial
|
|
|
|
* computation :-(.)
|
|
|
|
*
|
2017-10-26 19:47:45 +02:00
|
|
|
* Verify it's a composite type, and get the tupdesc.
|
|
|
|
* get_expr_result_tupdesc() handles this conveniently.
|
2005-04-25 23:03:25 +02:00
|
|
|
*
|
2005-11-22 19:17:34 +01:00
|
|
|
* If it's a Var of type RECORD, we have to work even harder: we have to
|
2017-10-26 19:47:45 +02:00
|
|
|
* find what the Var refers to, and pass that to get_expr_result_tupdesc.
|
2005-11-22 19:17:34 +01:00
|
|
|
* That task is handled by expandRecordVariable().
|
2005-04-25 23:03:25 +02:00
|
|
|
*/
|
|
|
|
if (IsA(expr, Var) &&
|
|
|
|
((Var *) expr)->vartype == RECORDOID)
|
|
|
|
tupleDesc = expandRecordVariable(pstate, (Var *) expr, 0);
|
2017-10-26 19:47:45 +02:00
|
|
|
else
|
|
|
|
tupleDesc = get_expr_result_tupdesc(expr, false);
|
2005-04-25 23:03:25 +02:00
|
|
|
Assert(tupleDesc);
|
2004-06-19 20:19:56 +02:00
|
|
|
|
|
|
|
/* Generate a list of references to the individual fields */
|
|
|
|
numAttrs = tupleDesc->natts;
|
|
|
|
for (i = 0; i < numAttrs; i++)
|
|
|
|
{
|
2017-08-20 20:19:07 +02:00
|
|
|
Form_pg_attribute att = TupleDescAttr(tupleDesc, i);
|
2009-10-31 02:41:31 +01:00
|
|
|
FieldSelect *fselect;
|
2004-06-19 20:19:56 +02:00
|
|
|
|
|
|
|
if (att->attisdropped)
|
|
|
|
continue;
|
|
|
|
|
2009-10-31 02:41:31 +01:00
|
|
|
fselect = makeNode(FieldSelect);
|
|
|
|
fselect->arg = (Expr *) copyObject(expr);
|
|
|
|
fselect->fieldnum = i + 1;
|
|
|
|
fselect->resulttype = att->atttypid;
|
|
|
|
fselect->resulttypmod = att->atttypmod;
|
2011-04-09 20:40:09 +02:00
|
|
|
/* save attribute's collation for parse_collate.c */
|
2011-03-20 01:29:08 +01:00
|
|
|
fselect->resultcollid = att->attcollation;
|
2004-06-19 20:19:56 +02:00
|
|
|
|
Centralize the logic for detecting misplaced aggregates, window funcs, etc.
Formerly we relied on checking after-the-fact to see if an expression
contained aggregates, window functions, or sub-selects when it shouldn't.
This is grotty, easily forgotten (indeed, we had forgotten to teach
DefineIndex about rejecting window functions), and none too efficient
since it requires extra traversals of the parse tree. To improve matters,
define an enum type that classifies all SQL sub-expressions, store it in
ParseState to show what kind of expression we are currently parsing, and
make transformAggregateCall, transformWindowFuncCall, and transformSubLink
check the expression type and throw error if the type indicates the
construct is disallowed. This allows removal of a large number of ad-hoc
checks scattered around the code base. The enum type is sufficiently
fine-grained that we can still produce error messages of at least the
same specificity as before.
Bringing these error checks together revealed that we'd been none too
consistent about phrasing of the error messages, so standardize the wording
a bit.
Also, rewrite checking of aggregate arguments so that it requires only one
traversal of the arguments, rather than up to three as before.
In passing, clean up some more comments left over from add_missing_from
support, and annotate some tests that I think are dead code now that that's
gone. (I didn't risk actually removing said dead code, though.)
2012-08-10 17:35:33 +02:00
|
|
|
if (make_target_entry)
|
2006-06-26 19:24:41 +02:00
|
|
|
{
|
|
|
|
/* add TargetEntry decoration */
|
|
|
|
TargetEntry *te;
|
|
|
|
|
2009-10-31 02:41:31 +01:00
|
|
|
te = makeTargetEntry((Expr *) fselect,
|
2006-06-26 19:24:41 +02:00
|
|
|
(AttrNumber) pstate->p_next_resno++,
|
|
|
|
pstrdup(NameStr(att->attname)),
|
|
|
|
false);
|
|
|
|
result = lappend(result, te);
|
|
|
|
}
|
|
|
|
else
|
2009-10-31 02:41:31 +01:00
|
|
|
result = lappend(result, fselect);
|
2004-06-19 20:19:56 +02:00
|
|
|
}
|
|
|
|
|
2006-06-26 19:24:41 +02:00
|
|
|
return result;
|
2004-06-19 20:19:56 +02:00
|
|
|
}
|
|
|
|
|
2005-04-25 23:03:25 +02:00
|
|
|
/*
|
|
|
|
* expandRecordVariable
|
|
|
|
* Get the tuple descriptor for a Var of type RECORD, if possible.
|
|
|
|
*
|
|
|
|
* Since no actual table or view column is allowed to have type RECORD, such
|
2014-05-06 18:12:18 +02:00
|
|
|
* a Var must refer to a JOIN or FUNCTION RTE or to a subquery output. We
|
2005-04-25 23:03:25 +02:00
|
|
|
* drill down to find the ultimate defining expression and attempt to infer
|
|
|
|
* the tupdesc from it. We ereport if we can't determine the tupdesc.
|
|
|
|
*
|
|
|
|
* levelsup is an extra offset to interpret the Var's varlevelsup correctly.
|
|
|
|
*/
|
2005-05-31 03:03:23 +02:00
|
|
|
TupleDesc
|
2005-04-25 23:03:25 +02:00
|
|
|
expandRecordVariable(ParseState *pstate, Var *var, int levelsup)
|
|
|
|
{
|
|
|
|
TupleDesc tupleDesc;
|
|
|
|
int netlevelsup;
|
|
|
|
RangeTblEntry *rte;
|
|
|
|
AttrNumber attnum;
|
|
|
|
Node *expr;
|
|
|
|
|
|
|
|
/* Check my caller didn't mess up */
|
|
|
|
Assert(IsA(var, Var));
|
|
|
|
Assert(var->vartype == RECORDOID);
|
|
|
|
|
Make parser rely more heavily on the ParseNamespaceItem data structure.
When I added the ParseNamespaceItem data structure (in commit 5ebaaa494),
it wasn't very tightly integrated into the parser's APIs. In the wake of
adding p_rtindex to that struct (commit b541e9acc), there is a good reason
to make more use of it: by passing around ParseNamespaceItem pointers
instead of bare RTE pointers, we can get rid of various messy methods for
passing back or deducing the rangetable index of an RTE during parsing.
Hence, refactor the addRangeTableEntryXXX functions to build and return
a ParseNamespaceItem struct, not just the RTE proper; and replace
addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem
rather than building one internally.
Also, add per-column data (a ParseNamespaceColumn array) to each
ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX,
where we have column type data at hand so that it's nearly free to fill
the data structure. Later, when we need to build Vars referencing RTEs,
we can use the ParseNamespaceColumn info to avoid the rather expensive
operations done in get_rte_attribute_type() or expandRTE().
get_rte_attribute_type() is indeed dead code now, so I've removed it.
This makes for a useful improvement in parse analysis speed, around 20%
in one moderately-complex test query.
The ParseNamespaceColumn structs also include Var identity information
(varno/varattno). That info isn't actually being used in this patch,
except that p_varno == 0 is a handy test for a dropped column.
A follow-on patch will make more use of it.
Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
|
|
|
/*
|
|
|
|
* Note: it's tempting to use GetNSItemByRangeTablePosn here so that we
|
|
|
|
* can use expandNSItemVars instead of expandRTE; but that does not work
|
|
|
|
* for some of the recursion cases below, where we have consed up a
|
|
|
|
* ParseState that lacks p_namespace data.
|
|
|
|
*/
|
2005-04-25 23:03:25 +02:00
|
|
|
netlevelsup = var->varlevelsup + levelsup;
|
|
|
|
rte = GetRTEByRangeTablePosn(pstate, var->varno, netlevelsup);
|
|
|
|
attnum = var->varattno;
|
|
|
|
|
2005-04-26 00:02:30 +02:00
|
|
|
if (attnum == InvalidAttrNumber)
|
|
|
|
{
|
|
|
|
/* Whole-row reference to an RTE, so expand the known fields */
|
|
|
|
List *names,
|
|
|
|
*vars;
|
|
|
|
ListCell *lname,
|
|
|
|
*lvar;
|
|
|
|
int i;
|
|
|
|
|
2008-09-01 22:42:46 +02:00
|
|
|
expandRTE(rte, var->varno, 0, var->location, false,
|
2005-06-04 21:19:42 +02:00
|
|
|
&names, &vars);
|
2005-04-26 00:02:30 +02:00
|
|
|
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
tupleDesc = CreateTemplateTupleDesc(list_length(vars));
|
2005-04-26 00:02:30 +02:00
|
|
|
i = 1;
|
|
|
|
forboth(lname, names, lvar, vars)
|
|
|
|
{
|
|
|
|
char *label = strVal(lfirst(lname));
|
|
|
|
Node *varnode = (Node *) lfirst(lvar);
|
|
|
|
|
|
|
|
TupleDescInitEntry(tupleDesc, i,
|
|
|
|
label,
|
|
|
|
exprType(varnode),
|
|
|
|
exprTypmod(varnode),
|
|
|
|
0);
|
2011-02-08 22:04:18 +01:00
|
|
|
TupleDescInitEntryCollation(tupleDesc, i,
|
|
|
|
exprCollation(varnode));
|
2005-04-26 00:02:30 +02:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
Assert(lname == NULL && lvar == NULL); /* lists same length? */
|
|
|
|
|
|
|
|
return tupleDesc;
|
|
|
|
}
|
|
|
|
|
2005-04-25 23:03:25 +02:00
|
|
|
expr = (Node *) var; /* default if we can't drill down */
|
|
|
|
|
|
|
|
switch (rte->rtekind)
|
|
|
|
{
|
|
|
|
case RTE_RELATION:
|
2006-08-02 03:59:48 +02:00
|
|
|
case RTE_VALUES:
|
2017-04-01 06:17:18 +02:00
|
|
|
case RTE_NAMEDTUPLESTORE:
|
In the planner, replace an empty FROM clause with a dummy RTE.
The fact that "SELECT expression" has no base relations has long been a
thorn in the side of the planner. It makes it hard to flatten a sub-query
that looks like that, or is a trivial VALUES() item, because the planner
generally uses relid sets to identify sub-relations, and such a sub-query
would have an empty relid set if we flattened it. prepjointree.c contains
some baroque logic that works around this in certain special cases --- but
there is a much better answer. We can replace an empty FROM clause with a
dummy RTE that acts like a table of one row and no columns, and then there
are no such corner cases to worry about. Instead we need some logic to
get rid of useless dummy RTEs, but that's simpler and covers more cases
than what was there before.
For really trivial cases, where the query is just "SELECT expression" and
nothing else, there's a hazard that adding the extra RTE makes for a
noticeable slowdown; even though it's not much processing, there's not
that much for the planner to do overall. However testing says that the
penalty is very small, close to the noise level. In more complex queries,
this is able to find optimizations that we could not find before.
The new RTE type is called RTE_RESULT, since the "scan" plan type it
gives rise to is a Result node (the same plan we produced for a "SELECT
expression" query before). To avoid confusion, rename the old ResultPath
path type to GroupResultPath, reflecting that it's only used in degenerate
grouping cases where we know the query produces just one grouped row.
(It wouldn't work to unify the two cases, because there are different
rules about where the associated quals live during query_planner.)
Note: although this touches readfuncs.c, I don't think a catversion
bump is required, because the added case can't occur in stored rules,
only plans.
Patch by me, reviewed by David Rowley and Mark Dilger
Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
|
|
|
case RTE_RESULT:
|
2005-10-15 04:49:52 +02:00
|
|
|
|
2005-04-25 23:03:25 +02:00
|
|
|
/*
|
2017-09-06 16:41:05 +02:00
|
|
|
* This case should not occur: a column of a table, values list,
|
|
|
|
* or ENR shouldn't have type RECORD. Fall through and fail (most
|
2006-10-04 02:30:14 +02:00
|
|
|
* likely) at the bottom.
|
2005-04-25 23:03:25 +02:00
|
|
|
*/
|
|
|
|
break;
|
|
|
|
case RTE_SUBQUERY:
|
|
|
|
{
|
|
|
|
/* Subselect-in-FROM: examine sub-select's output expr */
|
|
|
|
TargetEntry *ste = get_tle_by_resno(rte->subquery->targetList,
|
|
|
|
attnum);
|
|
|
|
|
|
|
|
if (ste == NULL || ste->resjunk)
|
|
|
|
elog(ERROR, "subquery %s does not have attribute %d",
|
|
|
|
rte->eref->aliasname, attnum);
|
|
|
|
expr = (Node *) ste->expr;
|
|
|
|
if (IsA(expr, Var))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Recurse into the sub-select to see what its Var refers
|
2014-05-06 18:12:18 +02:00
|
|
|
* to. We have to build an additional level of ParseState
|
2005-04-25 23:03:25 +02:00
|
|
|
* to keep in step with varlevelsup in the subselect.
|
|
|
|
*/
|
|
|
|
ParseState mypstate;
|
|
|
|
|
|
|
|
MemSet(&mypstate, 0, sizeof(mypstate));
|
|
|
|
mypstate.parentParseState = pstate;
|
|
|
|
mypstate.p_rtable = rte->subquery->rtable;
|
|
|
|
/* don't bother filling the rest of the fake pstate */
|
|
|
|
|
|
|
|
return expandRecordVariable(&mypstate, (Var *) expr, 0);
|
|
|
|
}
|
|
|
|
/* else fall through to inspect the expression */
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case RTE_JOIN:
|
2005-04-26 00:02:30 +02:00
|
|
|
/* Join RTE --- recursively inspect the alias variable */
|
2005-04-25 23:03:25 +02:00
|
|
|
Assert(attnum > 0 && attnum <= list_length(rte->joinaliasvars));
|
|
|
|
expr = (Node *) list_nth(rte->joinaliasvars, attnum - 1);
|
Change post-rewriter representation of dropped columns in joinaliasvars.
It's possible to drop a column from an input table of a JOIN clause in a
view, if that column is nowhere actually referenced in the view. But it
will still be there in the JOIN clause's joinaliasvars list. We used to
replace such entries with NULL Const nodes, which is handy for generation
of RowExpr expansion of a whole-row reference to the view. The trouble
with that is that it can't be distinguished from the situation after
subquery pull-up of a constant subquery output expression below the JOIN.
Instead, replace such joinaliasvars with null pointers (empty expression
trees), which can't be confused with pulled-up expressions. expandRTE()
still emits the old convention, though, for convenience of RowExpr
generation and to reduce the risk of breaking extension code.
In HEAD and 9.3, this patch also fixes a problem with some new code in
ruleutils.c that was failing to cope with implicitly-casted joinaliasvars
entries, as per recent report from Feike Steenbergen. That oversight was
because of an inadequate description of the data structure in parsenodes.h,
which I've now corrected. There were some pre-existing oversights of the
same ilk elsewhere, which I believe are now all fixed.
2013-07-23 22:23:01 +02:00
|
|
|
Assert(expr != NULL);
|
|
|
|
/* We intentionally don't strip implicit coercions here */
|
2005-04-25 23:03:25 +02:00
|
|
|
if (IsA(expr, Var))
|
|
|
|
return expandRecordVariable(pstate, (Var *) expr, netlevelsup);
|
|
|
|
/* else fall through to inspect the expression */
|
|
|
|
break;
|
|
|
|
case RTE_FUNCTION:
|
2005-10-15 04:49:52 +02:00
|
|
|
|
2005-04-26 00:02:30 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* We couldn't get here unless a function is declared with one of
|
|
|
|
* its result columns as RECORD, which is not allowed.
|
2005-04-26 00:02:30 +02:00
|
|
|
*/
|
2005-04-25 23:03:25 +02:00
|
|
|
break;
|
2017-03-08 16:39:37 +01:00
|
|
|
case RTE_TABLEFUNC:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Table function cannot have columns with RECORD type.
|
|
|
|
*/
|
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case RTE_CTE:
|
2008-10-06 00:20:17 +02:00
|
|
|
/* CTE reference: examine subquery's output expr */
|
|
|
|
if (!rte->self_reference)
|
2008-10-04 23:56:55 +02:00
|
|
|
{
|
2008-10-06 17:15:22 +02:00
|
|
|
CommonTableExpr *cte = GetCTEForRTE(pstate, rte, netlevelsup);
|
2008-10-04 23:56:55 +02:00
|
|
|
TargetEntry *ste;
|
|
|
|
|
2011-02-26 00:56:23 +01:00
|
|
|
ste = get_tle_by_resno(GetCTETargetList(cte), attnum);
|
2008-10-04 23:56:55 +02:00
|
|
|
if (ste == NULL || ste->resjunk)
|
2020-10-14 07:54:14 +02:00
|
|
|
elog(ERROR, "CTE %s does not have attribute %d",
|
2008-10-04 23:56:55 +02:00
|
|
|
rte->eref->aliasname, attnum);
|
|
|
|
expr = (Node *) ste->expr;
|
|
|
|
if (IsA(expr, Var))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Recurse into the CTE to see what its Var refers to. We
|
|
|
|
* have to build an additional level of ParseState to keep
|
|
|
|
* in step with varlevelsup in the CTE; furthermore it
|
|
|
|
* could be an outer CTE.
|
|
|
|
*/
|
|
|
|
ParseState mypstate;
|
|
|
|
Index levelsup;
|
|
|
|
|
|
|
|
MemSet(&mypstate, 0, sizeof(mypstate));
|
|
|
|
/* this loop must work, since GetCTEForRTE did */
|
2008-10-06 17:15:22 +02:00
|
|
|
for (levelsup = 0;
|
|
|
|
levelsup < rte->ctelevelsup + netlevelsup;
|
|
|
|
levelsup++)
|
2008-10-04 23:56:55 +02:00
|
|
|
pstate = pstate->parentParseState;
|
|
|
|
mypstate.parentParseState = pstate;
|
|
|
|
mypstate.p_rtable = ((Query *) cte->ctequery)->rtable;
|
|
|
|
/* don't bother filling the rest of the fake pstate */
|
|
|
|
|
|
|
|
return expandRecordVariable(&mypstate, (Var *) expr, 0);
|
|
|
|
}
|
|
|
|
/* else fall through to inspect the expression */
|
|
|
|
}
|
|
|
|
break;
|
2005-04-25 23:03:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We now have an expression we can't expand any more, so see if
|
2017-10-26 19:47:45 +02:00
|
|
|
* get_expr_result_tupdesc() can do anything with it.
|
2005-04-25 23:03:25 +02:00
|
|
|
*/
|
2017-10-26 19:47:45 +02:00
|
|
|
return get_expr_result_tupdesc(expr, false);
|
2005-04-25 23:03:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
1997-11-25 23:07:18 +01:00
|
|
|
/*
|
1998-07-08 16:04:11 +02:00
|
|
|
* FigureColname -
|
1997-11-25 23:07:18 +01:00
|
|
|
* if the name of the resulting column is not specified in the target
|
1999-07-19 02:26:20 +02:00
|
|
|
* list, we have to guess a suitable name. The SQL spec provides some
|
|
|
|
* guidance, but not much...
|
1997-11-25 23:07:18 +01:00
|
|
|
*
|
2001-09-17 03:06:36 +02:00
|
|
|
* Note that the argument is the *untransformed* parse tree for the target
|
|
|
|
* item. This is a shade easier to work with than the transformed tree.
|
1997-11-25 23:07:18 +01:00
|
|
|
*/
|
2004-09-30 02:24:27 +02:00
|
|
|
char *
|
2001-09-17 03:06:36 +02:00
|
|
|
FigureColname(Node *node)
|
1997-11-25 23:07:18 +01:00
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
char *name = NULL;
|
2001-10-08 23:48:51 +02:00
|
|
|
|
Adjust naming of indexes and their columns per recent discussion.
Index expression columns are now named after the FigureColname result for
their expressions, rather than always being "pg_expression_N". Digits are
appended to this name if needed to make the column name unique within the
index. (That happens for regular columns too, thus fixing the old problem
that CREATE INDEX fooi ON foo (f1, f1) fails. Before exclusion indexes
there was no real reason to do such a thing, but now maybe there is.)
Default names for indexes and associated constraints now include the column
names of all their columns, not only the first one as in previous practice.
(Of course, this will be truncated as needed to fit in NAMEDATALEN. Also,
pkey indexes retain the historical behavior of not naming specific columns
at all.)
An example of the results:
regression=# create table foo (f1 int, f2 text,
regression(# exclude (f1 with =, lower(f2) with =));
NOTICE: CREATE TABLE / EXCLUDE will create implicit index "foo_f1_lower_exclusion" for table "foo"
CREATE TABLE
regression=# \d foo_f1_lower_exclusion
Index "public.foo_f1_lower_exclusion"
Column | Type | Definition
--------+---------+------------
f1 | integer | f1
lower | text | lower(f2)
btree, for table "public.foo"
2009-12-23 03:35:25 +01:00
|
|
|
(void) FigureColnameInternal(node, &name);
|
2001-10-08 23:48:51 +02:00
|
|
|
if (name != NULL)
|
|
|
|
return name;
|
|
|
|
/* default result if we can't guess anything */
|
|
|
|
return "?column?";
|
|
|
|
}
|
|
|
|
|
Adjust naming of indexes and their columns per recent discussion.
Index expression columns are now named after the FigureColname result for
their expressions, rather than always being "pg_expression_N". Digits are
appended to this name if needed to make the column name unique within the
index. (That happens for regular columns too, thus fixing the old problem
that CREATE INDEX fooi ON foo (f1, f1) fails. Before exclusion indexes
there was no real reason to do such a thing, but now maybe there is.)
Default names for indexes and associated constraints now include the column
names of all their columns, not only the first one as in previous practice.
(Of course, this will be truncated as needed to fit in NAMEDATALEN. Also,
pkey indexes retain the historical behavior of not naming specific columns
at all.)
An example of the results:
regression=# create table foo (f1 int, f2 text,
regression(# exclude (f1 with =, lower(f2) with =));
NOTICE: CREATE TABLE / EXCLUDE will create implicit index "foo_f1_lower_exclusion" for table "foo"
CREATE TABLE
regression=# \d foo_f1_lower_exclusion
Index "public.foo_f1_lower_exclusion"
Column | Type | Definition
--------+---------+------------
f1 | integer | f1
lower | text | lower(f2)
btree, for table "public.foo"
2009-12-23 03:35:25 +01:00
|
|
|
/*
|
|
|
|
* FigureIndexColname -
|
|
|
|
* choose the name for an expression column in an index
|
|
|
|
*
|
|
|
|
* This is actually just like FigureColname, except we return NULL if
|
|
|
|
* we can't pick a good name.
|
|
|
|
*/
|
|
|
|
char *
|
|
|
|
FigureIndexColname(Node *node)
|
|
|
|
{
|
|
|
|
char *name = NULL;
|
|
|
|
|
|
|
|
(void) FigureColnameInternal(node, &name);
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* FigureColnameInternal -
|
|
|
|
* internal workhorse for FigureColname
|
|
|
|
*
|
|
|
|
* Return value indicates strength of confidence in result:
|
|
|
|
* 0 - no information
|
|
|
|
* 1 - second-best name choice
|
|
|
|
* 2 - good name choice
|
|
|
|
* The return value is actually only used internally.
|
|
|
|
* If the result isn't zero, *name is set to the chosen name.
|
|
|
|
*/
|
2001-10-08 23:48:51 +02:00
|
|
|
static int
|
|
|
|
FigureColnameInternal(Node *node, char **name)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
int strength = 0;
|
2001-10-08 23:48:51 +02:00
|
|
|
|
2001-09-17 03:06:36 +02:00
|
|
|
if (node == NULL)
|
2001-10-08 23:48:51 +02:00
|
|
|
return strength;
|
2001-09-28 10:09:14 +02:00
|
|
|
|
2001-09-17 03:06:36 +02:00
|
|
|
switch (nodeTag(node))
|
1997-11-25 23:07:18 +01:00
|
|
|
{
|
2002-03-21 17:02:16 +01:00
|
|
|
case T_ColumnRef:
|
1997-11-25 23:07:18 +01:00
|
|
|
{
|
2004-06-09 21:08:20 +02:00
|
|
|
char *fname = NULL;
|
|
|
|
ListCell *l;
|
2000-04-12 19:17:23 +02:00
|
|
|
|
2004-06-09 21:08:20 +02:00
|
|
|
/* find last field name, if any, ignoring "*" */
|
|
|
|
foreach(l, ((ColumnRef *) node)->fields)
|
1999-07-19 02:26:20 +02:00
|
|
|
{
|
2004-08-29 07:07:03 +02:00
|
|
|
Node *i = lfirst(l);
|
2004-06-09 21:08:20 +02:00
|
|
|
|
2008-08-30 03:39:14 +02:00
|
|
|
if (IsA(i, String))
|
2004-06-09 21:08:20 +02:00
|
|
|
fname = strVal(i);
|
|
|
|
}
|
|
|
|
if (fname)
|
|
|
|
{
|
|
|
|
*name = fname;
|
2001-10-08 23:48:51 +02:00
|
|
|
return 2;
|
1999-07-19 02:26:20 +02:00
|
|
|
}
|
1997-11-25 23:07:18 +01:00
|
|
|
}
|
|
|
|
break;
|
2004-06-09 21:08:20 +02:00
|
|
|
case T_A_Indirection:
|
2002-03-21 17:02:16 +01:00
|
|
|
{
|
2004-06-09 21:08:20 +02:00
|
|
|
A_Indirection *ind = (A_Indirection *) node;
|
|
|
|
char *fname = NULL;
|
|
|
|
ListCell *l;
|
2002-03-21 17:02:16 +01:00
|
|
|
|
2008-08-30 03:39:14 +02:00
|
|
|
/* find last field name, if any, ignoring "*" and subscripts */
|
2004-06-09 21:08:20 +02:00
|
|
|
foreach(l, ind->indirection)
|
2002-03-21 17:02:16 +01:00
|
|
|
{
|
2004-08-29 07:07:03 +02:00
|
|
|
Node *i = lfirst(l);
|
2003-04-09 01:20:04 +02:00
|
|
|
|
2008-08-30 03:39:14 +02:00
|
|
|
if (IsA(i, String))
|
2004-06-09 21:08:20 +02:00
|
|
|
fname = strVal(i);
|
|
|
|
}
|
|
|
|
if (fname)
|
|
|
|
{
|
|
|
|
*name = fname;
|
|
|
|
return 2;
|
2002-03-21 17:02:16 +01:00
|
|
|
}
|
2004-06-09 21:08:20 +02:00
|
|
|
return FigureColnameInternal(ind->arg, name);
|
2002-03-21 17:02:16 +01:00
|
|
|
}
|
|
|
|
break;
|
2001-09-17 03:06:36 +02:00
|
|
|
case T_FuncCall:
|
2002-04-09 22:35:55 +02:00
|
|
|
*name = strVal(llast(((FuncCall *) node)->funcname));
|
2001-10-08 23:48:51 +02:00
|
|
|
return 2;
|
2003-02-16 03:30:39 +01:00
|
|
|
case T_A_Expr:
|
|
|
|
if (((A_Expr *) node)->kind == AEXPR_NULLIF)
|
|
|
|
{
|
Make operator precedence follow the SQL standard more closely.
While the SQL standard is pretty vague on the overall topic of operator
precedence (because it never presents a unified BNF for all expressions),
it does seem reasonable to conclude from the spec for <boolean value
expression> that OR has the lowest precedence, then AND, then NOT, then IS
tests, then the six standard comparison operators, then everything else
(since any non-boolean operator in a WHERE clause would need to be an
argument of one of these).
We were only sort of on board with that: most notably, while "<" ">" and
"=" had properly low precedence, "<=" ">=" and "<>" were treated as generic
operators and so had significantly higher precedence. And "IS" tests were
even higher precedence than those, which is very clearly wrong per spec.
Another problem was that "foo NOT SOMETHING bar" constructs, such as
"x NOT LIKE y", were treated inconsistently because of a bison
implementation artifact: they had the documented precedence with respect
to operators to their right, but behaved like NOT (i.e., very low priority)
with respect to operators to their left.
Fixing the precedence issues is just a small matter of rearranging the
precedence declarations in gram.y, except for the NOT problem, which
requires adding an additional lookahead case in base_yylex() so that we
can attach a different token precedence to NOT LIKE and allied two-word
operators.
The bulk of this patch is not the bug fix per se, but adding logic to
parse_expr.c to allow giving warnings if an expression has changed meaning
because of these precedence changes. These warnings are off by default
and are enabled by the new GUC operator_precedence_warning. It's believed
that very few applications will be affected by these changes, but it was
agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
|
|
|
/* make nullif() act like a regular function */
|
2003-02-16 03:30:39 +01:00
|
|
|
*name = "nullif";
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
break;
|
2001-10-25 07:50:21 +02:00
|
|
|
case T_TypeCast:
|
2001-10-08 23:48:51 +02:00
|
|
|
strength = FigureColnameInternal(((TypeCast *) node)->arg,
|
|
|
|
name);
|
|
|
|
if (strength <= 1)
|
2001-09-28 10:09:14 +02:00
|
|
|
{
|
2009-07-16 08:33:46 +02:00
|
|
|
if (((TypeCast *) node)->typeName != NULL)
|
2001-10-08 23:48:51 +02:00
|
|
|
{
|
2009-07-16 08:33:46 +02:00
|
|
|
*name = strVal(llast(((TypeCast *) node)->typeName->names));
|
2002-04-09 22:35:55 +02:00
|
|
|
return 1;
|
2001-10-08 23:48:51 +02:00
|
|
|
}
|
2001-09-28 10:09:14 +02:00
|
|
|
}
|
|
|
|
break;
|
2011-02-08 22:04:18 +01:00
|
|
|
case T_CollateClause:
|
2011-03-11 22:27:51 +01:00
|
|
|
return FigureColnameInternal(((CollateClause *) node)->arg, name);
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
case T_GroupingFunc:
|
|
|
|
/* make GROUPING() act like a regular function */
|
|
|
|
*name = "grouping";
|
|
|
|
return 2;
|
2011-10-01 20:01:46 +02:00
|
|
|
case T_SubLink:
|
|
|
|
switch (((SubLink *) node)->subLinkType)
|
|
|
|
{
|
|
|
|
case EXISTS_SUBLINK:
|
|
|
|
*name = "exists";
|
|
|
|
return 2;
|
|
|
|
case ARRAY_SUBLINK:
|
|
|
|
*name = "array";
|
|
|
|
return 2;
|
|
|
|
case EXPR_SUBLINK:
|
|
|
|
{
|
|
|
|
/* Get column name of the subquery's single target */
|
2012-06-10 21:20:04 +02:00
|
|
|
SubLink *sublink = (SubLink *) node;
|
2011-10-01 20:01:46 +02:00
|
|
|
Query *query = (Query *) sublink->subselect;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The subquery has probably already been transformed,
|
|
|
|
* but let's be careful and check that. (The reason
|
|
|
|
* we can see a transformed subquery here is that
|
|
|
|
* transformSubLink is lazy and modifies the SubLink
|
|
|
|
* node in-place.)
|
|
|
|
*/
|
|
|
|
if (IsA(query, Query))
|
|
|
|
{
|
|
|
|
TargetEntry *te = (TargetEntry *) linitial(query->targetList);
|
|
|
|
|
|
|
|
if (te->resname)
|
|
|
|
{
|
|
|
|
*name = te->resname;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2012-06-10 21:20:04 +02:00
|
|
|
/* As with other operator-like nodes, these have no names */
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
case MULTIEXPR_SUBLINK:
|
2011-10-01 20:01:46 +02:00
|
|
|
case ALL_SUBLINK:
|
|
|
|
case ANY_SUBLINK:
|
|
|
|
case ROWCOMPARE_SUBLINK:
|
|
|
|
case CTE_SUBLINK:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
1998-12-14 00:56:44 +01:00
|
|
|
case T_CaseExpr:
|
2002-12-12 16:49:42 +01:00
|
|
|
strength = FigureColnameInternal((Node *) ((CaseExpr *) node)->defresult,
|
2001-10-08 23:48:51 +02:00
|
|
|
name);
|
|
|
|
if (strength <= 1)
|
1998-12-14 00:56:44 +01:00
|
|
|
{
|
2001-10-08 23:48:51 +02:00
|
|
|
*name = "case";
|
|
|
|
return 1;
|
1998-12-14 00:56:44 +01:00
|
|
|
}
|
|
|
|
break;
|
2008-03-20 22:42:48 +01:00
|
|
|
case T_A_ArrayExpr:
|
2003-04-09 01:20:04 +02:00
|
|
|
/* make ARRAY[] act like a function */
|
|
|
|
*name = "array";
|
|
|
|
return 2;
|
2004-05-11 00:44:49 +02:00
|
|
|
case T_RowExpr:
|
|
|
|
/* make ROW() act like a function */
|
|
|
|
*name = "row";
|
|
|
|
return 2;
|
2003-02-16 03:30:39 +01:00
|
|
|
case T_CoalesceExpr:
|
|
|
|
/* make coalesce() act like a regular function */
|
|
|
|
*name = "coalesce";
|
|
|
|
return 2;
|
2005-06-27 00:05:42 +02:00
|
|
|
case T_MinMaxExpr:
|
|
|
|
/* make greatest/least act like a regular function */
|
2005-10-15 04:49:52 +02:00
|
|
|
switch (((MinMaxExpr *) node)->op)
|
2005-06-27 00:05:42 +02:00
|
|
|
{
|
|
|
|
case IS_GREATEST:
|
|
|
|
*name = "greatest";
|
|
|
|
return 2;
|
|
|
|
case IS_LEAST:
|
|
|
|
*name = "least";
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
break;
|
2016-08-17 02:33:01 +02:00
|
|
|
case T_SQLValueFunction:
|
|
|
|
/* make these act like a function or variable */
|
|
|
|
switch (((SQLValueFunction *) node)->op)
|
|
|
|
{
|
|
|
|
case SVFOP_CURRENT_DATE:
|
|
|
|
*name = "current_date";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_CURRENT_TIME:
|
|
|
|
case SVFOP_CURRENT_TIME_N:
|
|
|
|
*name = "current_time";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_CURRENT_TIMESTAMP:
|
|
|
|
case SVFOP_CURRENT_TIMESTAMP_N:
|
|
|
|
*name = "current_timestamp";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_LOCALTIME:
|
|
|
|
case SVFOP_LOCALTIME_N:
|
|
|
|
*name = "localtime";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_LOCALTIMESTAMP:
|
|
|
|
case SVFOP_LOCALTIMESTAMP_N:
|
|
|
|
*name = "localtimestamp";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_CURRENT_ROLE:
|
|
|
|
*name = "current_role";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_CURRENT_USER:
|
|
|
|
*name = "current_user";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_USER:
|
|
|
|
*name = "user";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_SESSION_USER:
|
|
|
|
*name = "session_user";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_CURRENT_CATALOG:
|
|
|
|
*name = "current_catalog";
|
|
|
|
return 2;
|
|
|
|
case SVFOP_CURRENT_SCHEMA:
|
|
|
|
*name = "current_schema";
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
break;
|
2006-12-21 17:05:16 +01:00
|
|
|
case T_XmlExpr:
|
|
|
|
/* make SQL/XML functions act like a regular function */
|
2007-11-15 22:14:46 +01:00
|
|
|
switch (((XmlExpr *) node)->op)
|
|
|
|
{
|
2006-12-21 17:05:16 +01:00
|
|
|
case IS_XMLCONCAT:
|
|
|
|
*name = "xmlconcat";
|
|
|
|
return 2;
|
|
|
|
case IS_XMLELEMENT:
|
|
|
|
*name = "xmlelement";
|
|
|
|
return 2;
|
|
|
|
case IS_XMLFOREST:
|
|
|
|
*name = "xmlforest";
|
|
|
|
return 2;
|
2006-12-24 01:29:20 +01:00
|
|
|
case IS_XMLPARSE:
|
|
|
|
*name = "xmlparse";
|
|
|
|
return 2;
|
|
|
|
case IS_XMLPI:
|
|
|
|
*name = "xmlpi";
|
|
|
|
return 2;
|
|
|
|
case IS_XMLROOT:
|
|
|
|
*name = "xmlroot";
|
|
|
|
return 2;
|
2007-02-03 15:06:56 +01:00
|
|
|
case IS_XMLSERIALIZE:
|
|
|
|
*name = "xmlserialize";
|
|
|
|
return 2;
|
2007-01-14 14:11:54 +01:00
|
|
|
case IS_DOCUMENT:
|
|
|
|
/* nothing */
|
|
|
|
break;
|
2007-11-15 22:14:46 +01:00
|
|
|
}
|
2006-12-21 17:05:16 +01:00
|
|
|
break;
|
2007-02-03 15:06:56 +01:00
|
|
|
case T_XmlSerialize:
|
|
|
|
*name = "xmlserialize";
|
|
|
|
return 2;
|
1997-11-25 23:07:18 +01:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2001-10-08 23:48:51 +02:00
|
|
|
|
|
|
|
return strength;
|
1997-11-25 23:07:18 +01:00
|
|
|
}
|