Introduce parse_ident()
SQL-layer function to split qualified identifier into array parts. Author: Pavel Stehule with minor editorization by me and Jim Nasby
This commit is contained in:
parent
992b5ba30d
commit
3187d6de0e
|
@ -1818,6 +1818,32 @@
|
|||
<entry><literal>900150983cd24fb0 d6963f7d28e17f72</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<indexterm>
|
||||
<primary>parse_ident</primary>
|
||||
</indexterm>
|
||||
<literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
|
||||
[ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
|
||||
</entry>
|
||||
<entry><type>text[]</type></entry>
|
||||
<entry>Split <parameter>qualified identifier</parameter> into array
|
||||
<parameter>parts</parameter>. When <parameter>strictmode</parameter> is
|
||||
false, extra characters after the identifier are ignored. This is useful
|
||||
for parsing identifiers for objects like functions and arrays that may
|
||||
have trailing characters. By default, extra characters after the last
|
||||
identifier are considered an error, but if second parameter is false,
|
||||
then chararacters after last identifier are ignored. Note that this
|
||||
function does not truncate quoted identifiers. If you care about that
|
||||
you should cast the result of this function to name[]. A non-printable
|
||||
chararacters (like 0 to 31) are displayed as hexadecimal codes always,
|
||||
what can be different from PostgreSQL internal SQL identifiers
|
||||
processing, when the original escaped value is displayed.
|
||||
</entry>
|
||||
<entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
|
||||
<entry><literal>"SomeSchema,sometable"</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<indexterm>
|
||||
|
|
|
@ -990,3 +990,10 @@ RETURNS jsonb
|
|||
LANGUAGE INTERNAL
|
||||
STRICT IMMUTABLE
|
||||
AS 'jsonb_set';
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
parse_ident(str text, strict boolean DEFAULT true)
|
||||
RETURNS text[]
|
||||
LANGUAGE INTERNAL
|
||||
STRICT IMMUTABLE
|
||||
AS 'parse_ident';
|
||||
|
|
|
@ -129,6 +129,15 @@ scanstr(const char *s)
|
|||
*/
|
||||
char *
|
||||
downcase_truncate_identifier(const char *ident, int len, bool warn)
|
||||
{
|
||||
return downcase_identifier(ident, len, warn, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* a workhorse for downcase_truncate_identifier
|
||||
*/
|
||||
char *
|
||||
downcase_identifier(const char *ident, int len, bool warn, bool truncate)
|
||||
{
|
||||
char *result;
|
||||
int i;
|
||||
|
@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
|
|||
}
|
||||
result[i] = '\0';
|
||||
|
||||
if (i >= NAMEDATALEN)
|
||||
if (i >= NAMEDATALEN && truncate)
|
||||
truncate_identifier(result, i, warn);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
|
||||
*
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "commands/dbcommands.h"
|
||||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
#include "parser/scansup.h"
|
||||
#include "parser/keywords.h"
|
||||
#include "postmaster/syslogger.h"
|
||||
#include "rewrite/rewriteHandler.h"
|
||||
|
@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
|
|||
|
||||
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This simple parser utility are compatible with lexer implementation,
|
||||
* used only in parse_ident function
|
||||
*/
|
||||
static bool
|
||||
is_ident_start(unsigned char c)
|
||||
{
|
||||
if (c == '_')
|
||||
return true;
|
||||
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
|
||||
return true;
|
||||
|
||||
if (c >= 0200 && c <= 0377)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_ident_cont(unsigned char c)
|
||||
{
|
||||
if (c >= '0' && c <= '9')
|
||||
return true;
|
||||
|
||||
return is_ident_start(c);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanitize SQL string for using in error message.
|
||||
*/
|
||||
static char *
|
||||
sanitize_text(text *t)
|
||||
{
|
||||
int len = VARSIZE_ANY_EXHDR(t);
|
||||
const char *p = VARDATA_ANY(t);
|
||||
StringInfo dstr;
|
||||
|
||||
dstr = makeStringInfo();
|
||||
|
||||
appendStringInfoChar(dstr, '"');
|
||||
|
||||
while (len--)
|
||||
{
|
||||
switch (*p)
|
||||
{
|
||||
case '\b':
|
||||
appendStringInfoString(dstr, "\\b");
|
||||
break;
|
||||
case '\f':
|
||||
appendStringInfoString(dstr, "\\f");
|
||||
break;
|
||||
case '\n':
|
||||
appendStringInfoString(dstr, "\\n");
|
||||
break;
|
||||
case '\r':
|
||||
appendStringInfoString(dstr, "\\r");
|
||||
break;
|
||||
case '\t':
|
||||
appendStringInfoString(dstr, "\\t");
|
||||
break;
|
||||
case '\'':
|
||||
appendStringInfoString(dstr, "''");
|
||||
break;
|
||||
case '\\':
|
||||
appendStringInfoString(dstr, "\\\\");
|
||||
break;
|
||||
default:
|
||||
if ((unsigned char) *p < ' ')
|
||||
appendStringInfo(dstr, "\\u%04x", (int) *p);
|
||||
else
|
||||
appendStringInfoCharMacro(dstr, *p);
|
||||
break;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
|
||||
appendStringInfoChar(dstr, '"');
|
||||
|
||||
return dstr->data;
|
||||
}
|
||||
|
||||
/*
|
||||
* parse_ident - parse SQL composed identifier to separate identifiers.
|
||||
* When strict mode is active (second parameter), then any chars after
|
||||
* last identifiers are disallowed.
|
||||
*/
|
||||
Datum
|
||||
parse_ident(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *qualname;
|
||||
char *qualname_str;
|
||||
bool strict;
|
||||
char *nextp;
|
||||
bool after_dot = false;
|
||||
ArrayBuildState *astate = NULL;
|
||||
|
||||
qualname = PG_GETARG_TEXT_PP(0);
|
||||
qualname_str = text_to_cstring(qualname);
|
||||
strict = PG_GETARG_BOOL(1);
|
||||
|
||||
nextp = qualname_str;
|
||||
|
||||
/* skip leading whitespace */
|
||||
while (isspace((unsigned char) *nextp))
|
||||
nextp++;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
char *curname;
|
||||
char *endp;
|
||||
bool missing_ident;
|
||||
|
||||
missing_ident = true;
|
||||
|
||||
if (*nextp == '\"')
|
||||
{
|
||||
curname = nextp + 1;
|
||||
for (;;)
|
||||
{
|
||||
endp = strchr(nextp + 1, '\"');
|
||||
if (endp == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unclosed double quotes"),
|
||||
errdetail("string %s is not valid identifier",
|
||||
sanitize_text(qualname))));
|
||||
if (endp[1] != '\"')
|
||||
break;
|
||||
memmove(endp, endp + 1, strlen(endp));
|
||||
nextp = endp;
|
||||
}
|
||||
nextp = endp + 1;
|
||||
*endp = '\0';
|
||||
|
||||
/* Show complete input string in this case. */
|
||||
if (endp - curname == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("identifier should not be empty: %s",
|
||||
sanitize_text(qualname))));
|
||||
|
||||
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
|
||||
false, TEXTOID, CurrentMemoryContext);
|
||||
missing_ident = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_ident_start((unsigned char) *nextp))
|
||||
{
|
||||
char *downname;
|
||||
int len;
|
||||
text *part;
|
||||
|
||||
curname = nextp++;
|
||||
while (is_ident_cont((unsigned char) *nextp))
|
||||
nextp++;
|
||||
|
||||
len = nextp - curname;
|
||||
|
||||
/*
|
||||
* Unlike name, we don't implicitly truncate identifiers. This
|
||||
* is useful for allowing the user to check for specific parts
|
||||
* of the identifier being too long. It's easy enough for the
|
||||
* user to get the truncated names by casting our output to
|
||||
* name[].
|
||||
*/
|
||||
downname = downcase_identifier(curname, len, false, false);
|
||||
part = cstring_to_text_with_len(downname, len);
|
||||
astate = accumArrayResult(astate, PointerGetDatum(part), false,
|
||||
TEXTOID, CurrentMemoryContext);
|
||||
missing_ident = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (missing_ident)
|
||||
{
|
||||
/* Different error messages based on where we failed. */
|
||||
if (*nextp == '.')
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("missing valid identifier before \".\" symbol: %s",
|
||||
sanitize_text(qualname))));
|
||||
else if (after_dot)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("missing valid identifier after \".\" symbol: %s",
|
||||
sanitize_text(qualname))));
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("missing valid identifier: %s",
|
||||
sanitize_text(qualname))));
|
||||
}
|
||||
|
||||
while (isspace((unsigned char) *nextp))
|
||||
nextp++;
|
||||
|
||||
if (*nextp == '.')
|
||||
{
|
||||
after_dot = true;
|
||||
nextp++;
|
||||
while (isspace((unsigned char) *nextp))
|
||||
nextp++;
|
||||
}
|
||||
else if (*nextp == '\0')
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (strict)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("identifier contains disallowed characters: %s",
|
||||
sanitize_text(qualname))));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
|
||||
}
|
||||
|
|
|
@ -53,6 +53,6 @@
|
|||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 201603151
|
||||
#define CATALOG_VERSION_NO 201603181
|
||||
|
||||
#endif
|
||||
|
|
|
@ -3463,6 +3463,9 @@ DESCR("I/O");
|
|||
DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ ));
|
||||
DESCR("convert namespace name to regnamespace");
|
||||
|
||||
DATA(insert OID = 1268 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strict}" _null_ _null_ parse_ident _null_ _null_ _null_ ));
|
||||
DESCR("parse qualified identifier to array of identifiers");
|
||||
|
||||
DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ ));
|
||||
DESCR("(internal)");
|
||||
DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ ));
|
||||
|
|
|
@ -20,6 +20,9 @@ extern char *scanstr(const char *s);
|
|||
extern char *downcase_truncate_identifier(const char *ident, int len,
|
||||
bool warn);
|
||||
|
||||
extern char *downcase_identifier(const char *ident, int len,
|
||||
bool warn, bool truncate);
|
||||
|
||||
extern void truncate_identifier(char *ident, int len, bool warn);
|
||||
|
||||
extern bool scanner_isspace(char ch);
|
||||
|
|
|
@ -510,6 +510,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS);
|
|||
extern Datum pg_collation_for(PG_FUNCTION_ARGS);
|
||||
extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
|
||||
extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
|
||||
extern Datum parse_ident(PG_FUNCTION_ARGS);
|
||||
|
||||
/* oid.c */
|
||||
extern Datum oidin(PG_FUNCTION_ARGS);
|
||||
|
|
|
@ -124,3 +124,69 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
|
|||
(2 rows)
|
||||
|
||||
DROP TABLE NAME_TBL;
|
||||
DO $$
|
||||
DECLARE r text[];
|
||||
BEGIN
|
||||
r := parse_ident('Schemax.Tabley');
|
||||
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
|
||||
r := parse_ident('"SchemaX"."TableY"');
|
||||
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
|
||||
END;
|
||||
$$;
|
||||
NOTICE: schemax.tabley
|
||||
NOTICE: "SchemaX"."TableY"
|
||||
SELECT parse_ident('foo.boo');
|
||||
parse_ident
|
||||
-------------
|
||||
{foo,boo}
|
||||
(1 row)
|
||||
|
||||
SELECT parse_ident('foo.boo[]'); -- should fail
|
||||
ERROR: identifier contains disallowed characters: "foo.boo[]"
|
||||
SELECT parse_ident('foo.boo[]', strict => false); -- ok
|
||||
parse_ident
|
||||
-------------
|
||||
{foo,boo}
|
||||
(1 row)
|
||||
|
||||
-- should fail
|
||||
SELECT parse_ident(' ');
|
||||
ERROR: missing valid identifier: " "
|
||||
SELECT parse_ident(' .aaa');
|
||||
ERROR: missing valid identifier before "." symbol: " .aaa"
|
||||
SELECT parse_ident(' aaa . ');
|
||||
ERROR: missing valid identifier after "." symbol: " aaa . "
|
||||
SELECT parse_ident('aaa.a%b');
|
||||
ERROR: identifier contains disallowed characters: "aaa.a%b"
|
||||
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
|
||||
ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
|
||||
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
|
||||
length | length
|
||||
--------+--------
|
||||
414 | 289
|
||||
(1 row)
|
||||
|
||||
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
|
||||
parse_ident
|
||||
-----------------------------------------------------------------------------------------------------------
|
||||
{first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
|
||||
(1 row)
|
||||
|
||||
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
|
||||
parse_ident
|
||||
------------------------------------------------------------------------------------------------------
|
||||
{first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
|
||||
(1 row)
|
||||
|
||||
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
|
||||
ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
|
||||
SELECT parse_ident('1020');
|
||||
ERROR: missing valid identifier: "1020"
|
||||
SELECT parse_ident('10.20');
|
||||
ERROR: missing valid identifier: "10.20"
|
||||
SELECT parse_ident('.');
|
||||
ERROR: missing valid identifier before "." symbol: "."
|
||||
SELECT parse_ident('.1020');
|
||||
ERROR: missing valid identifier before "." symbol: ".1020"
|
||||
SELECT parse_ident('xxx.1020');
|
||||
ERROR: missing valid identifier after "." symbol: "xxx.1020"
|
||||
|
|
|
@ -52,3 +52,36 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '[0-9]';
|
|||
SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
|
||||
|
||||
DROP TABLE NAME_TBL;
|
||||
|
||||
DO $$
|
||||
DECLARE r text[];
|
||||
BEGIN
|
||||
r := parse_ident('Schemax.Tabley');
|
||||
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
|
||||
r := parse_ident('"SchemaX"."TableY"');
|
||||
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
|
||||
END;
|
||||
$$;
|
||||
|
||||
SELECT parse_ident('foo.boo');
|
||||
SELECT parse_ident('foo.boo[]'); -- should fail
|
||||
SELECT parse_ident('foo.boo[]', strict => false); -- ok
|
||||
|
||||
-- should fail
|
||||
SELECT parse_ident(' ');
|
||||
SELECT parse_ident(' .aaa');
|
||||
SELECT parse_ident(' aaa . ');
|
||||
SELECT parse_ident('aaa.a%b');
|
||||
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
|
||||
|
||||
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
|
||||
|
||||
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
|
||||
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
|
||||
|
||||
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
|
||||
SELECT parse_ident('1020');
|
||||
SELECT parse_ident('10.20');
|
||||
SELECT parse_ident('.');
|
||||
SELECT parse_ident('.1020');
|
||||
SELECT parse_ident('xxx.1020');
|
||||
|
|
Loading…
Reference in New Issue