postgresql/src/backend/parser/parse_oper.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1068 lines
30 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* parse_oper.c
* handle operator things for parser
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/parser/parse_oper.c
*
*-------------------------------------------------------------------------
*/
1997-11-26 02:14:33 +01:00
#include "postgres.h"
#include "access/htup_details.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "lib/stringinfo.h"
#include "nodes/nodeFuncs.h"
1999-07-16 07:00:38 +02:00
#include "parser/parse_coerce.h"
#include "parser/parse_func.h"
1997-11-26 02:14:33 +01:00
#include "parser/parse_oper.h"
#include "parser/parse_type.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
1997-11-26 02:14:33 +01:00
#include "utils/syscache.h"
#include "utils/typcache.h"
/*
* The lookup key for the operator lookaside hash table. Unused bits must be
* zeroes to ensure hashing works consistently --- in particular, oprname
* must be zero-padded and any unused entries in search_path must be zero.
*
* search_path contains the actual search_path with which the entry was
* derived (minus temp namespace if any), or else the single specified
* schema OID if we are looking up an explicitly-qualified operator name.
*
* search_path has to be fixed-length since the hashtable code insists on
* fixed-size keys. If your search path is longer than that, we just punt
* and don't cache anything.
*/
/* If your search_path is longer than this, sucks to be you ... */
#define MAX_CACHED_PATH_LEN 16
typedef struct OprCacheKey
{
char oprname[NAMEDATALEN];
Oid left_arg; /* Left input OID, or 0 if prefix op */
Oid right_arg; /* Right input OID */
Oid search_path[MAX_CACHED_PATH_LEN];
} OprCacheKey;
typedef struct OprCacheEntry
{
/* the hash lookup key MUST BE FIRST */
OprCacheKey key;
Oid opr_oid; /* OID of the resolved operator */
} OprCacheEntry;
static Oid binary_oper_exact(List *opname, Oid arg1, Oid arg2);
static FuncDetailCode oper_select_candidate(int nargs,
Oid *input_typeids,
FuncCandidateList candidates,
Oid *operOid);
static const char *op_signature_string(List *op, char oprkind,
Oid arg1, Oid arg2);
static void op_error(ParseState *pstate, List *op, char oprkind,
Oid arg1, Oid arg2,
FuncDetailCode fdresult, int location);
static bool make_oper_cache_key(ParseState *pstate, OprCacheKey *key,
List *opname, Oid ltypeId, Oid rtypeId,
int location);
static Oid find_oper_cache_entry(OprCacheKey *key);
static void make_oper_cache_entry(OprCacheKey *key, Oid opr_oid);
static void InvalidateOprCacheCallBack(Datum arg, int cacheid, uint32 hashvalue);
/*
* LookupOperName
* Given a possibly-qualified operator name and exact input datatypes,
* look up the operator.
*
* Pass oprleft = InvalidOid for a prefix op.
*
* If the operator name is not schema-qualified, it is sought in the current
* namespace search path.
*
* If the operator is not found, we return InvalidOid if noError is true,
* else raise an error. pstate and location are used only to report the
* error position; pass NULL/-1 if not available.
*/
Oid
LookupOperName(ParseState *pstate, List *opername, Oid oprleft, Oid oprright,
bool noError, int location)
{
Oid result;
result = OpernameGetOprid(opername, oprleft, oprright);
if (OidIsValid(result))
return result;
/* we don't use op_error here because only an exact match is wanted */
if (!noError)
{
char oprkind;
if (!OidIsValid(oprleft))
oprkind = 'l';
else if (OidIsValid(oprright))
oprkind = 'b';
else
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("postfix operators are not supported"),
parser_errposition(pstate, location)));
oprkind = 0; /* keep compiler quiet */
}
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("operator does not exist: %s",
op_signature_string(opername, oprkind,
oprleft, oprright)),
parser_errposition(pstate, location)));
}
return InvalidOid;
}
/*
* LookupOperWithArgs
* Like LookupOperName, but the argument types are specified by
* a ObjectWithArgs node.
*/
Oid
LookupOperWithArgs(ObjectWithArgs *oper, bool noError)
{
TypeName *oprleft,
*oprright;
Oid leftoid,
rightoid;
Assert(list_length(oper->objargs) == 2);
Reconsider the handling of procedure OUT parameters. Commit 2453ea142 redefined pg_proc.proargtypes to include the types of OUT parameters, for procedures only. While that had some advantages for implementing the SQL-spec behavior of DROP PROCEDURE, it was pretty disastrous from a number of other perspectives. Notably, since the primary key of pg_proc is name + proargtypes, this made it possible to have multiple procedures with identical names + input arguments and differing output argument types. That would make it impossible to call any one of the procedures by writing just NULL (or "?", or any other data-type-free notation) for the output argument(s). The change also seems likely to cause grave confusion for client applications that examine pg_proc and expect the traditional definition of proargtypes. Hence, revert the definition of proargtypes to what it was, and undo a number of complications that had been added to support that. To support the SQL-spec behavior of DROP PROCEDURE, when there are no argmode markers in the command's parameter list, we perform the lookup both ways (that is, matching against both proargtypes and proallargtypes), succeeding if we get just one unique match. In principle this could result in ambiguous-function failures that would not happen when using only one of the two rules. However, overloading of procedure names is thought to be a pretty rare usage, so this shouldn't cause many problems in practice. Postgres-specific code such as pg_dump can defend against any possibility of such failures by being careful to specify argmodes for all procedure arguments. This also fixes a few other bugs in the area of CALL statements with named parameters, and improves the documentation a little. catversion bump forced because the representation of procedures with OUT arguments changes. Discussion: https://postgr.es/m/3742981.1621533210@sss.pgh.pa.us
2021-06-10 23:11:36 +02:00
oprleft = linitial_node(TypeName, oper->objargs);
oprright = lsecond_node(TypeName, oper->objargs);
if (oprleft == NULL)
leftoid = InvalidOid;
else
leftoid = LookupTypeNameOid(NULL, oprleft, noError);
if (oprright == NULL)
rightoid = InvalidOid;
else
rightoid = LookupTypeNameOid(NULL, oprright, noError);
return LookupOperName(NULL, oper->objname, leftoid, rightoid,
noError, -1);
}
/*
* get_sort_group_operators - get default sorting/grouping operators for type
*
* We fetch the "<", "=", and ">" operators all at once to reduce lookup
* overhead (knowing that most callers will be interested in at least two).
* However, a given datatype might have only an "=" operator, if it is
* hashable but not sortable. (Other combinations of present and missing
* operators shouldn't happen, unless the system catalogs are messed up.)
*
* If an operator is missing and the corresponding needXX flag is true,
* throw a standard error message, else return InvalidOid.
*
* In addition to the operator OIDs themselves, this function can identify
* whether the "=" operator is hashable.
*
* Callers can pass NULL pointers for any results they don't care to get.
*
* Note: the results are guaranteed to be exact or binary-compatible matches,
* since most callers are not prepared to cope with adding any run-time type
* coercion steps.
*/
void
get_sort_group_operators(Oid argtype,
bool needLT, bool needEQ, bool needGT,
Oid *ltOpr, Oid *eqOpr, Oid *gtOpr,
bool *isHashable)
{
TypeCacheEntry *typentry;
int cache_flags;
Oid lt_opr;
Oid eq_opr;
Oid gt_opr;
bool hashable;
/*
* Look up the operators using the type cache.
*
* Note: the search algorithm used by typcache.c ensures that the results
* are consistent, ie all from matching opclasses.
*/
if (isHashable != NULL)
cache_flags = TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR | TYPECACHE_GT_OPR |
TYPECACHE_HASH_PROC;
else
cache_flags = TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR | TYPECACHE_GT_OPR;
typentry = lookup_type_cache(argtype, cache_flags);
lt_opr = typentry->lt_opr;
eq_opr = typentry->eq_opr;
gt_opr = typentry->gt_opr;
hashable = OidIsValid(typentry->hash_proc);
/* Report errors if needed */
if ((needLT && !OidIsValid(lt_opr)) ||
(needGT && !OidIsValid(gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify an ordering operator for type %s",
format_type_be(argtype)),
errhint("Use an explicit ordering operator or modify the query.")));
if (needEQ && !OidIsValid(eq_opr))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify an equality operator for type %s",
format_type_be(argtype))));
/* Return results as needed */
if (ltOpr)
*ltOpr = lt_opr;
if (eqOpr)
*eqOpr = eq_opr;
if (gtOpr)
*gtOpr = gt_opr;
if (isHashable)
*isHashable = hashable;
}
/* given operator tuple, return the operator OID */
Oid
oprid(Operator op)
{
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
return ((Form_pg_operator) GETSTRUCT(op))->oid;
}
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
/* given operator tuple, return the underlying function's OID */
Oid
oprfuncid(Operator op)
{
Form_pg_operator pgopform = (Form_pg_operator) GETSTRUCT(op);
return pgopform->oprcode;
}
/* binary_oper_exact()
* Check for an "exact" match to the specified operand types.
*
* If one operand is an unknown literal, assume it should be taken to be
* the same type as the other operand for this purpose. Also, consider
* the possibility that the other operand is a domain type that needs to
* be reduced to its base type to find an "exact" match.
*/
static Oid
binary_oper_exact(List *opname, Oid arg1, Oid arg2)
{
Oid result;
bool was_unknown = false;
/* Unspecified type for one of the arguments? then use the other */
if ((arg1 == UNKNOWNOID) && (arg2 != InvalidOid))
{
arg1 = arg2;
was_unknown = true;
}
else if ((arg2 == UNKNOWNOID) && (arg1 != InvalidOid))
{
arg2 = arg1;
was_unknown = true;
}
result = OpernameGetOprid(opname, arg1, arg2);
if (OidIsValid(result))
return result;
if (was_unknown)
{
/* arg1 and arg2 are the same here, need only look at arg1 */
Oid basetype = getBaseType(arg1);
if (basetype != arg1)
{
result = OpernameGetOprid(opname, basetype, basetype);
if (OidIsValid(result))
return result;
}
}
return InvalidOid;
}
/* oper_select_candidate()
* Given the input argtype array and one or more candidates
* for the operator, attempt to resolve the conflict.
*
* Returns FUNCDETAIL_NOTFOUND, FUNCDETAIL_MULTIPLE, or FUNCDETAIL_NORMAL.
* In the success case the Oid of the best candidate is stored in *operOid.
*
* Note that the caller has already determined that there is no candidate
* exactly matching the input argtype(s). Incompatible candidates are not yet
* pruned away, however.
*/
static FuncDetailCode
oper_select_candidate(int nargs,
Oid *input_typeids,
FuncCandidateList candidates,
Oid *operOid) /* output argument */
{
int ncandidates;
/*
* Delete any candidates that cannot actually accept the given input
* types, whether directly or by coercion.
*/
ncandidates = func_match_argtypes(nargs, input_typeids,
candidates, &candidates);
/* Done if no candidate or only one candidate survives */
if (ncandidates == 0)
{
*operOid = InvalidOid;
return FUNCDETAIL_NOTFOUND;
}
if (ncandidates == 1)
{
*operOid = candidates->oid;
return FUNCDETAIL_NORMAL;
}
/*
* Use the same heuristics as for ambiguous functions to resolve the
* conflict.
*/
candidates = func_select_candidate(nargs, input_typeids, candidates);
if (candidates)
{
*operOid = candidates->oid;
return FUNCDETAIL_NORMAL;
}
*operOid = InvalidOid;
return FUNCDETAIL_MULTIPLE; /* failed to select a best candidate */
}
/* oper() -- search for a binary operator
* Given operator name, types of arg1 and arg2, return oper struct.
*
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
* IMPORTANT: the returned operator (if any) is only promised to be
* coercion-compatible with the input datatypes. Do not use this if
* you need an exact- or binary-compatible match; see compatible_oper.
*
* If no matching operator found, return NULL if noError is true,
* raise an error if it is false. pstate and location are used only to report
* the error position; pass NULL/-1 if not available.
*
* NOTE: on success, the returned object is a syscache entry. The caller
* must ReleaseSysCache() the entry when done with it.
*/
Operator
oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId,
bool noError, int location)
{
Oid operOid;
OprCacheKey key;
bool key_ok;
FuncDetailCode fdresult = FUNCDETAIL_NOTFOUND;
HeapTuple tup = NULL;
/*
* Try to find the mapping in the lookaside cache.
*/
key_ok = make_oper_cache_key(pstate, &key, opname, ltypeId, rtypeId, location);
if (key_ok)
{
operOid = find_oper_cache_entry(&key);
if (OidIsValid(operOid))
{
tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid));
if (HeapTupleIsValid(tup))
return (Operator) tup;
}
}
/*
* First try for an "exact" match.
*/
operOid = binary_oper_exact(opname, ltypeId, rtypeId);
if (!OidIsValid(operOid))
{
/*
* Otherwise, search for the most suitable candidate.
*/
FuncCandidateList clist;
/* Get binary operators of given name */
clist = OpernameGetCandidates(opname, 'b', false);
/* No operators found? Then fail... */
if (clist != NULL)
{
/*
* Unspecified type for one of the arguments? then use the other
* (XXX this is probably dead code?)
*/
Oid inputOids[2];
if (rtypeId == InvalidOid)
rtypeId = ltypeId;
else if (ltypeId == InvalidOid)
ltypeId = rtypeId;
inputOids[0] = ltypeId;
inputOids[1] = rtypeId;
fdresult = oper_select_candidate(2, inputOids, clist, &operOid);
}
}
if (OidIsValid(operOid))
tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid));
if (HeapTupleIsValid(tup))
{
if (key_ok)
make_oper_cache_entry(&key, operOid);
}
else if (!noError)
op_error(pstate, opname, 'b', ltypeId, rtypeId, fdresult, location);
return (Operator) tup;
}
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
/* compatible_oper()
* given an opname and input datatypes, find a compatible binary operator
*
* This is tighter than oper() because it will not return an operator that
* requires coercion of the input datatypes (but binary-compatible operators
* are accepted). Otherwise, the semantics are the same.
*/
Operator
compatible_oper(ParseState *pstate, List *op, Oid arg1, Oid arg2,
bool noError, int location)
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
{
Operator optup;
Form_pg_operator opform;
/* oper() will find the best available match */
optup = oper(pstate, op, arg1, arg2, noError, location);
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
if (optup == (Operator) NULL)
return (Operator) NULL; /* must be noError case */
/* but is it good enough? */
opform = (Form_pg_operator) GETSTRUCT(optup);
if (IsBinaryCoercible(arg1, opform->oprleft) &&
IsBinaryCoercible(arg2, opform->oprright))
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
return optup;
/* nope... */
ReleaseSysCache(optup);
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
if (!noError)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("operator requires run-time type coercion: %s",
op_signature_string(op, 'b', arg1, arg2)),
parser_errposition(pstate, location)));
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
return (Operator) NULL;
}
/* compatible_oper_opid() -- get OID of a binary operator
*
* This is a convenience routine that extracts only the operator OID
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
* from the result of compatible_oper(). InvalidOid is returned if the
* lookup fails and noError is true.
*/
Oid
compatible_oper_opid(List *op, Oid arg1, Oid arg2, bool noError)
{
Operator optup;
Oid result;
optup = compatible_oper(NULL, op, arg1, arg2, noError, -1);
if (optup != NULL)
{
result = oprid(optup);
ReleaseSysCache(optup);
return result;
}
return InvalidOid;
}
/* left_oper() -- search for a unary left operator (prefix operator)
* Given operator name and type of arg, return oper struct.
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
*
* IMPORTANT: the returned operator (if any) is only promised to be
* coercion-compatible with the input datatype. Do not use this if
* you need an exact- or binary-compatible match.
*
* If no matching operator found, return NULL if noError is true,
* raise an error if it is false. pstate and location are used only to report
* the error position; pass NULL/-1 if not available.
*
* NOTE: on success, the returned object is a syscache entry. The caller
* must ReleaseSysCache() the entry when done with it.
*/
Operator
left_oper(ParseState *pstate, List *op, Oid arg, bool noError, int location)
{
Oid operOid;
OprCacheKey key;
bool key_ok;
FuncDetailCode fdresult = FUNCDETAIL_NOTFOUND;
HeapTuple tup = NULL;
/*
* Try to find the mapping in the lookaside cache.
*/
key_ok = make_oper_cache_key(pstate, &key, op, InvalidOid, arg, location);
if (key_ok)
{
operOid = find_oper_cache_entry(&key);
if (OidIsValid(operOid))
{
tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid));
if (HeapTupleIsValid(tup))
return (Operator) tup;
}
}
/*
* First try for an "exact" match.
*/
operOid = OpernameGetOprid(op, InvalidOid, arg);
if (!OidIsValid(operOid))
{
/*
* Otherwise, search for the most suitable candidate.
*/
FuncCandidateList clist;
/* Get prefix operators of given name */
clist = OpernameGetCandidates(op, 'l', false);
/* No operators found? Then fail... */
if (clist != NULL)
{
/*
* The returned list has args in the form (0, oprright). Move the
* useful data into args[0] to keep oper_select_candidate simple.
* XXX we are assuming here that we may scribble on the list!
*/
FuncCandidateList clisti;
for (clisti = clist; clisti != NULL; clisti = clisti->next)
{
clisti->args[0] = clisti->args[1];
}
Clean up two rather nasty bugs in operator selection code. 1. If there is exactly one pg_operator entry of the right name and oprkind, oper() and related routines would return that entry whether its input type had anything to do with the request or not. This is just premature optimization: we shouldn't return the single candidate until after we verify that it really is a valid candidate, ie, is at least coercion-compatible with the given types. 2. oper() and related routines only promise a coercion-compatible result. Unfortunately, there were quite a few callers that assumed the returned operator is binary-compatible with the given datatype; they would proceed to call it without making any datatype coercions. These callers include sorting, grouping, aggregation, and VACUUM ANALYZE. In general I think it is appropriate for these callers to require an exact or binary-compatible match, so I've added a new routine compatible_oper() that only succeeds if it can find an operator that doesn't require any run-time conversions. Callers now call oper() or compatible_oper() depending on whether they are prepared to deal with type conversion or not. The upshot of these bugs is revealed by the following silliness in PL/Tcl's selftest: it creates an operator @< on int4, and then tries to use it to sort a char(N) column. The system would let it do that :-( (and evidently has done so since 6.3 :-( :-(). The result in this case was just a silly sort order, but the reverse combination would've provoked coredump from trying to dereference integers. With this fix you get more reasonable behavior: pltcl_test=# select * from T_pkey1 order by key1, key2 using @<; ERROR: Unable to identify an operator '@<' for types 'bpchar' and 'bpchar' You will have to retype this query using an explicit cast
2001-02-16 04:16:58 +01:00
/*
* We must run oper_select_candidate even if only one candidate,
* otherwise we may falsely return a non-type-compatible operator.
*/
fdresult = oper_select_candidate(1, &arg, clist, &operOid);
}
}
if (OidIsValid(operOid))
tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid));
if (HeapTupleIsValid(tup))
{
if (key_ok)
make_oper_cache_entry(&key, operOid);
}
else if (!noError)
op_error(pstate, op, 'l', InvalidOid, arg, fdresult, location);
return (Operator) tup;
}
/*
* op_signature_string
* Build a string representing an operator name, including arg type(s).
* The result is something like "integer + integer".
*
* This is typically used in the construction of operator-not-found error
* messages.
*/
static const char *
op_signature_string(List *op, char oprkind, Oid arg1, Oid arg2)
{
StringInfoData argbuf;
initStringInfo(&argbuf);
if (oprkind != 'l')
appendStringInfo(&argbuf, "%s ", format_type_be(arg1));
appendStringInfoString(&argbuf, NameListToString(op));
appendStringInfo(&argbuf, " %s", format_type_be(arg2));
return argbuf.data; /* return palloc'd string buffer */
}
/*
* op_error - utility routine to complain about an unresolvable operator
*/
static void
op_error(ParseState *pstate, List *op, char oprkind,
Oid arg1, Oid arg2,
FuncDetailCode fdresult, int location)
{
if (fdresult == FUNCDETAIL_MULTIPLE)
ereport(ERROR,
(errcode(ERRCODE_AMBIGUOUS_FUNCTION),
errmsg("operator is not unique: %s",
op_signature_string(op, oprkind, arg1, arg2)),
errhint("Could not choose a best candidate operator. "
"You might need to add explicit type casts."),
parser_errposition(pstate, location)));
else
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("operator does not exist: %s",
op_signature_string(op, oprkind, arg1, arg2)),
(!arg1 || !arg2) ?
errhint("No operator matches the given name and argument type. "
"You might need to add an explicit type cast.") :
errhint("No operator matches the given name and argument types. "
"You might need to add explicit type casts."),
parser_errposition(pstate, location)));
}
/*
* make_op()
* Operator expression construction.
*
* Transform operator expression ensuring type compatibility.
* This is where some type conversion happens.
*
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
* last_srf should be a copy of pstate->p_last_srf from just before we
* started transforming the operator's arguments; this is used for nested-SRF
* detection. If the caller will throw an error anyway for a set-returning
* expression, it's okay to cheat and just pass pstate->p_last_srf.
*/
Expr *
make_op(ParseState *pstate, List *opname, Node *ltree, Node *rtree,
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
Node *last_srf, int location)
{
Oid ltypeId,
rtypeId;
Operator tup;
Form_pg_operator opform;
Oid actual_arg_types[2];
Oid declared_arg_types[2];
int nargs;
List *args;
Oid rettype;
OpExpr *result;
/* Check it's not a postfix operator */
if (rtree == NULL)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("postfix operators are not supported")));
/* Select the operator */
if (ltree == NULL)
{
/* prefix operator */
rtypeId = exprType(rtree);
ltypeId = InvalidOid;
tup = left_oper(pstate, opname, rtypeId, false, location);
}
else
{
/* otherwise, binary operator */
ltypeId = exprType(ltree);
rtypeId = exprType(rtree);
tup = oper(pstate, opname, ltypeId, rtypeId, false, location);
}
opform = (Form_pg_operator) GETSTRUCT(tup);
/* Check it's not a shell */
if (!RegProcedureIsValid(opform->oprcode))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("operator is only a shell: %s",
op_signature_string(opname,
opform->oprkind,
opform->oprleft,
opform->oprright)),
parser_errposition(pstate, location)));
/* Do typecasting and build the expression tree */
if (ltree == NULL)
{
/* prefix operator */
args = list_make1(rtree);
actual_arg_types[0] = rtypeId;
declared_arg_types[0] = opform->oprright;
nargs = 1;
}
else
{
/* otherwise, binary operator */
args = list_make2(ltree, rtree);
actual_arg_types[0] = ltypeId;
actual_arg_types[1] = rtypeId;
declared_arg_types[0] = opform->oprleft;
declared_arg_types[1] = opform->oprright;
nargs = 2;
}
/*
* enforce consistency with polymorphic argument and return types,
* possibly adjusting return type or declared_arg_types (which will be
* used as the cast destination by make_fn_arguments)
*/
rettype = enforce_generic_type_consistency(actual_arg_types,
declared_arg_types,
nargs,
opform->oprresult,
false);
/* perform the necessary typecasting of arguments */
make_fn_arguments(pstate, args, actual_arg_types, declared_arg_types);
/* and build the expression node */
result = makeNode(OpExpr);
result->opno = oprid(tup);
result->opfuncid = opform->oprcode;
result->opresulttype = rettype;
result->opretset = get_func_retset(opform->oprcode);
/* opcollid and inputcollid will be set by parse_collate.c */
result->args = args;
result->location = location;
Improve parser's and planner's handling of set-returning functions. Teach the parser to reject misplaced set-returning functions during parse analysis using p_expr_kind, in much the same way as we do for aggregates and window functions (cf commit eaccfded9). While this isn't complete (it misses nesting-based restrictions), it's much better than the previous error reporting for such cases, and it allows elimination of assorted ad-hoc expression_returns_set() error checks. We could add nesting checks later if it seems important to catch all cases at parse time. There is one case the parser will now throw error for although previous versions allowed it, which is SRFs in the tlist of an UPDATE. That never behaved sensibly (since it's ill-defined which generated row should be used to perform the update) and it's hard to see why it should not be treated as an error. It's a release-note-worthy change though. Also, add a new Query field hasTargetSRFs reporting whether there are any SRFs in the targetlist (including GROUP BY/ORDER BY expressions). The parser can now set that basically for free during parse analysis, and we can use it in a number of places to avoid expression_returns_set searches. (There will be more such checks soon.) In some places, this allows decontorting the logic since it's no longer expensive to check for SRFs in the tlist --- so I made the checks parallel to the handling of hasAggs/hasWindowFuncs wherever it seemed appropriate. catversion bump because adding a Query field changes stored rules. Andres Freund and Tom Lane Discussion: <24639.1473782855@sss.pgh.pa.us>
2016-09-13 19:54:24 +02:00
/* if it returns a set, check that's OK */
if (result->opretset)
Disallow set-returning functions inside CASE or COALESCE. When we reimplemented SRFs in commit 69f4b9c85, our initial choice was to allow the behavior to vary from historical practice in cases where a SRF call appeared within a conditional-execution construct (currently, only CASE or COALESCE). But that was controversial to begin with, and subsequent discussion has resulted in a consensus that it's better to throw an error instead of executing the query differently from before, so long as we can provide a reasonably clear error message and a way to rewrite the query. Hence, add a parser mechanism to allow detection of such cases during parse analysis. The mechanism just requires storing, in the ParseState, a pointer to the set-returning FuncExpr or OpExpr most recently emitted by parse analysis. Then the parsing functions for CASE and COALESCE can detect the presence of a SRF in their arguments by noting whether this pointer changes while analyzing their arguments. Furthermore, if it does, it provides a suitable error cursor location for the complaint. (This means that if there's more than one SRF in the arguments, the error will point at the last one to be analyzed not the first. While connoisseurs of parsing behavior might find that odd, it's unlikely the average user would ever notice.) While at it, we can also provide more specific error messages than before about some pre-existing restrictions, such as no-SRFs-within-aggregates. Also, reject at parse time cases where a NULLIF or IS DISTINCT FROM construct would need to return a set. We've never supported that, but the restriction is depended on in more subtle ways now, so it seems wise to detect it at the start. Also, provide some documentation about how to rewrite a SRF-within-CASE query using a custom wrapper SRF. It turns out that the information_schema.user_mapping_options view contained an instance of exactly the behavior we're now forbidding; but rewriting it makes it more clear and safer too. initdb forced because of user_mapping_options change. Patch by me, with error message suggestions from Alvaro Herrera and Andres Freund, pursuant to a complaint from Regina Obe. Discussion: https://postgr.es/m/000001d2d5de$d8d66170$8a832450$@pcorp.us
2017-06-14 05:46:39 +02:00
{
check_srf_call_placement(pstate, last_srf, location);
/* ... and remember it for error checks at higher levels */
pstate->p_last_srf = (Node *) result;
}
Improve parser's and planner's handling of set-returning functions. Teach the parser to reject misplaced set-returning functions during parse analysis using p_expr_kind, in much the same way as we do for aggregates and window functions (cf commit eaccfded9). While this isn't complete (it misses nesting-based restrictions), it's much better than the previous error reporting for such cases, and it allows elimination of assorted ad-hoc expression_returns_set() error checks. We could add nesting checks later if it seems important to catch all cases at parse time. There is one case the parser will now throw error for although previous versions allowed it, which is SRFs in the tlist of an UPDATE. That never behaved sensibly (since it's ill-defined which generated row should be used to perform the update) and it's hard to see why it should not be treated as an error. It's a release-note-worthy change though. Also, add a new Query field hasTargetSRFs reporting whether there are any SRFs in the targetlist (including GROUP BY/ORDER BY expressions). The parser can now set that basically for free during parse analysis, and we can use it in a number of places to avoid expression_returns_set searches. (There will be more such checks soon.) In some places, this allows decontorting the logic since it's no longer expensive to check for SRFs in the tlist --- so I made the checks parallel to the handling of hasAggs/hasWindowFuncs wherever it seemed appropriate. catversion bump because adding a Query field changes stored rules. Andres Freund and Tom Lane Discussion: <24639.1473782855@sss.pgh.pa.us>
2016-09-13 19:54:24 +02:00
ReleaseSysCache(tup);
return (Expr *) result;
}
/*
* make_scalar_array_op()
* Build expression tree for "scalar op ANY/ALL (array)" construct.
*/
Expr *
make_scalar_array_op(ParseState *pstate, List *opname,
bool useOr,
Node *ltree, Node *rtree,
int location)
{
Oid ltypeId,
rtypeId,
atypeId,
res_atypeId;
Operator tup;
Form_pg_operator opform;
Oid actual_arg_types[2];
Oid declared_arg_types[2];
List *args;
Oid rettype;
ScalarArrayOpExpr *result;
ltypeId = exprType(ltree);
atypeId = exprType(rtree);
2003-08-04 02:43:34 +02:00
/*
* The right-hand input of the operator will be the element type of the
* array. However, if we currently have just an untyped literal on the
* right, stay with that and hope we can resolve the operator.
*/
if (atypeId == UNKNOWNOID)
rtypeId = UNKNOWNOID;
else
{
Improve handling of domains over arrays. This patch eliminates various bizarre behaviors caused by sloppy thinking about the difference between a domain type and its underlying array type. In particular, the operation of updating one element of such an array has to be considered as yielding a value of the underlying array type, *not* a value of the domain, because there's no assurance that the domain's CHECK constraints are still satisfied. If we're intending to store the result back into a domain column, we have to re-cast to the domain type so that constraints are re-checked. For similar reasons, such a domain can't be blindly matched to an ANYARRAY polymorphic parameter, because the polymorphic function is likely to apply array-ish operations that could invalidate the domain constraints. For the moment, we just forbid such matching. We might later wish to insert an automatic downcast to the underlying array type, but such a change should also change matching of domains to ANYELEMENT for consistency. To ensure that all such logic is rechecked, this patch removes the original hack of setting a domain's pg_type.typelem field to match its base type; the typelem will always be zero instead. In those places where it's really okay to look through the domain type with no other logic changes, use the newly added get_base_element_type function in place of get_element_type. catversion bumped due to change in pg_type contents. Per bug #5717 from Richard Huxton and subsequent discussion.
2010-10-21 22:07:17 +02:00
rtypeId = get_base_element_type(atypeId);
if (!OidIsValid(rtypeId))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("op ANY/ALL (array) requires array on right side"),
parser_errposition(pstate, location)));
}
/* Now resolve the operator */
tup = oper(pstate, opname, ltypeId, rtypeId, false, location);
opform = (Form_pg_operator) GETSTRUCT(tup);
/* Check it's not a shell */
if (!RegProcedureIsValid(opform->oprcode))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("operator is only a shell: %s",
op_signature_string(opname,
opform->oprkind,
opform->oprleft,
opform->oprright)),
parser_errposition(pstate, location)));
args = list_make2(ltree, rtree);
actual_arg_types[0] = ltypeId;
actual_arg_types[1] = rtypeId;
declared_arg_types[0] = opform->oprleft;
declared_arg_types[1] = opform->oprright;
/*
* enforce consistency with polymorphic argument and return types,
* possibly adjusting return type or declared_arg_types (which will be
* used as the cast destination by make_fn_arguments)
*/
rettype = enforce_generic_type_consistency(actual_arg_types,
declared_arg_types,
2,
opform->oprresult,
false);
/*
* Check that operator result is boolean
*/
if (rettype != BOOLOID)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("op ANY/ALL (array) requires operator to yield boolean"),
parser_errposition(pstate, location)));
if (get_func_retset(opform->oprcode))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("op ANY/ALL (array) requires operator not to return a set"),
parser_errposition(pstate, location)));
/*
* Now switch back to the array type on the right, arranging for any
* needed cast to be applied. Beware of polymorphic operators here;
* enforce_generic_type_consistency may or may not have replaced a
* polymorphic type with a real one.
*/
if (IsPolymorphicType(declared_arg_types[1]))
{
/* assume the actual array type is OK */
res_atypeId = atypeId;
}
else
{
res_atypeId = get_array_type(declared_arg_types[1]);
if (!OidIsValid(res_atypeId))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find array type for data type %s",
format_type_be(declared_arg_types[1])),
parser_errposition(pstate, location)));
}
actual_arg_types[1] = atypeId;
declared_arg_types[1] = res_atypeId;
/* perform the necessary typecasting of arguments */
make_fn_arguments(pstate, args, actual_arg_types, declared_arg_types);
/* and build the expression node */
result = makeNode(ScalarArrayOpExpr);
result->opno = oprid(tup);
result->opfuncid = opform->oprcode;
Speedup ScalarArrayOpExpr evaluation ScalarArrayOpExprs with "useOr=true" and a set of Consts on the righthand side have traditionally been evaluated by using a linear search over the array. When these arrays contain large numbers of elements then this linear search could become a significant part of execution time. Here we add a new method of evaluating ScalarArrayOpExpr expressions to allow them to be evaluated by first building a hash table containing each element, then on subsequent evaluations, we just probe that hash table to determine if there is a match. The planner is in charge of determining when this optimization is possible and it enables it by setting hashfuncid in the ScalarArrayOpExpr. The executor will only perform the hash table evaluation when the hashfuncid is set. This means that not all cases are optimized. For example CHECK constraints containing an IN clause won't go through the planner, so won't get the hashfuncid set. We could maybe do something about that at some later date. The reason we're not doing it now is from fear that we may slow down cases where the expression is evaluated only once. Those cases can be common, for example, a single row INSERT to a table with a CHECK constraint containing an IN clause. In the planner, we enable this when there are suitable hash functions for the ScalarArrayOpExpr's operator and only when there is at least MIN_ARRAY_SIZE_FOR_HASHED_SAOP elements in the array. The threshold is currently set to 9. Author: James Coleman, David Rowley Reviewed-by: David Rowley, Tomas Vondra, Heikki Linnakangas Discussion: https://postgr.es/m/CAAaqYe8x62+=wn0zvNKCj55tPpg-JBHzhZFFc6ANovdqFw7-dA@mail.gmail.com
2021-04-08 13:51:22 +02:00
result->hashfuncid = InvalidOid;
result->negfuncid = InvalidOid;
result->useOr = useOr;
/* inputcollid will be set by parse_collate.c */
result->args = args;
result->location = location;
ReleaseSysCache(tup);
return (Expr *) result;
}
/*
* Lookaside cache to speed operator lookup. Possibly this should be in
* a separate module under utils/cache/ ?
*
* The idea here is that the mapping from operator name and given argument
* types is constant for a given search path (or single specified schema OID)
* so long as the contents of pg_operator and pg_cast don't change. And that
* mapping is pretty expensive to compute, especially for ambiguous operators;
* this is mainly because there are a *lot* of instances of popular operator
* names such as "=", and we have to check each one to see which is the
* best match. So once we have identified the correct mapping, we save it
* in a cache that need only be flushed on pg_operator or pg_cast change.
* (pg_cast must be considered because changes in the set of implicit casts
* affect the set of applicable operators for any given input datatype.)
*
* XXX in principle, ALTER TABLE ... INHERIT could affect the mapping as
* well, but we disregard that since there's no convenient way to find out
* about it, and it seems a pretty far-fetched corner-case anyway.
*
* Note: at some point it might be worth doing a similar cache for function
* lookups. However, the potential gain is a lot less since (a) function
* names are generally not overloaded as heavily as operator names, and
* (b) we'd have to flush on pg_proc updates, which are probably a good
* deal more common than pg_operator updates.
*/
/* The operator cache hashtable */
static HTAB *OprCacheHash = NULL;
/*
* make_oper_cache_key
* Fill the lookup key struct given operator name and arg types.
*
* Returns true if successful, false if the search_path overflowed
* (hence no caching is possible).
*
* pstate/location are used only to report the error position; pass NULL/-1
* if not available.
*/
static bool
make_oper_cache_key(ParseState *pstate, OprCacheKey *key, List *opname,
Oid ltypeId, Oid rtypeId, int location)
{
char *schemaname;
char *opername;
/* deconstruct the name list */
DeconstructQualifiedName(opname, &schemaname, &opername);
/* ensure zero-fill for stable hashing */
MemSet(key, 0, sizeof(OprCacheKey));
/* save operator name and input types into key */
strlcpy(key->oprname, opername, NAMEDATALEN);
key->left_arg = ltypeId;
key->right_arg = rtypeId;
if (schemaname)
{
ParseCallbackState pcbstate;
/* search only in exact schema given */
setup_parser_errposition_callback(&pcbstate, pstate, location);
key->search_path[0] = LookupExplicitNamespace(schemaname, false);
cancel_parser_errposition_callback(&pcbstate);
}
else
{
/* get the active search path */
if (fetch_search_path_array(key->search_path,
MAX_CACHED_PATH_LEN) > MAX_CACHED_PATH_LEN)
return false; /* oops, didn't fit */
}
return true;
}
/*
* find_oper_cache_entry
*
* Look for a cache entry matching the given key. If found, return the
* contained operator OID, else return InvalidOid.
*/
static Oid
find_oper_cache_entry(OprCacheKey *key)
{
OprCacheEntry *oprentry;
if (OprCacheHash == NULL)
{
/* First time through: initialize the hash table */
HASHCTL ctl;
ctl.keysize = sizeof(OprCacheKey);
ctl.entrysize = sizeof(OprCacheEntry);
OprCacheHash = hash_create("Operator lookup cache", 256,
Improve hash_create's API for selecting simple-binary-key hash functions. Previously, if you wanted anything besides C-string hash keys, you had to specify a custom hashing function to hash_create(). Nearly all such callers were specifying tag_hash or oid_hash; which is tedious, and rather error-prone, since a caller could easily miss the opportunity to optimize by using hash_uint32 when appropriate. Replace this with a design whereby callers using simple binary-data keys just specify HASH_BLOBS and don't need to mess with specific support functions. hash_create() itself will take care of optimizing when the key size is four bytes. This nets out saving a few hundred bytes of code space, and offers a measurable performance improvement in tidbitmap.c (which was not exploiting the opportunity to use hash_uint32 for its 4-byte keys). There might be some wins elsewhere too, I didn't analyze closely. In future we could look into offering a similar optimized hashing function for 8-byte keys. Under this design that could be done in a centralized and machine-independent fashion, whereas getting it right for keys of platform-dependent sizes would've been notationally painful before. For the moment, the old way still works fine, so as not to break source code compatibility for loadable modules. Eventually we might want to remove tag_hash and friends from the exported API altogether, since there's no real need for them to be explicitly referenced from outside dynahash.c. Teodor Sigaev and Tom Lane
2014-12-18 19:36:29 +01:00
&ctl, HASH_ELEM | HASH_BLOBS);
/* Arrange to flush cache on pg_operator and pg_cast changes */
CacheRegisterSyscacheCallback(OPERNAMENSP,
InvalidateOprCacheCallBack,
(Datum) 0);
CacheRegisterSyscacheCallback(CASTSOURCETARGET,
InvalidateOprCacheCallBack,
(Datum) 0);
}
/* Look for an existing entry */
oprentry = (OprCacheEntry *) hash_search(OprCacheHash,
key,
HASH_FIND, NULL);
if (oprentry == NULL)
return InvalidOid;
return oprentry->opr_oid;
}
/*
* make_oper_cache_entry
*
* Insert a cache entry for the given key.
*/
static void
make_oper_cache_entry(OprCacheKey *key, Oid opr_oid)
{
OprCacheEntry *oprentry;
Assert(OprCacheHash != NULL);
oprentry = (OprCacheEntry *) hash_search(OprCacheHash,
key,
HASH_ENTER, NULL);
oprentry->opr_oid = opr_oid;
}
/*
* Callback for pg_operator and pg_cast inval events
*/
static void
InvalidateOprCacheCallBack(Datum arg, int cacheid, uint32 hashvalue)
{
HASH_SEQ_STATUS status;
OprCacheEntry *hentry;
Assert(OprCacheHash != NULL);
/* Currently we just flush all entries; hard to be smarter ... */
hash_seq_init(&status, OprCacheHash);
while ((hentry = (OprCacheEntry *) hash_seq_search(&status)) != NULL)
{
if (hash_search(OprCacheHash,
&hentry->key,
HASH_REMOVE, NULL) == NULL)
elog(ERROR, "hash table corrupted");
}
}