Introduce parse_ident()

SQL-layer function to split qualified identifier into array parts.

Author: Pavel Stehule with minor editorization by me and Jim Nasby
This commit is contained in:
Teodor Sigaev 2016-03-18 18:16:14 +03:00
parent 992b5ba30d
commit 3187d6de0e
10 changed files with 375 additions and 2 deletions

View File

@ -1818,6 +1818,32 @@
<entry><literal>900150983cd24fb0 d6963f7d28e17f72</literal></entry>
</row>
<row>
<entry>
<indexterm>
<primary>parse_ident</primary>
</indexterm>
<literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
[ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
</entry>
<entry><type>text[]</type></entry>
<entry>Split <parameter>qualified identifier</parameter> into array
<parameter>parts</parameter>. When <parameter>strictmode</parameter> is
false, extra characters after the identifier are ignored. This is useful
for parsing identifiers for objects like functions and arrays that may
have trailing characters. By default, extra characters after the last
identifier are considered an error, but if second parameter is false,
then chararacters after last identifier are ignored. Note that this
function does not truncate quoted identifiers. If you care about that
you should cast the result of this function to name[]. A non-printable
chararacters (like 0 to 31) are displayed as hexadecimal codes always,
what can be different from PostgreSQL internal SQL identifiers
processing, when the original escaped value is displayed.
</entry>
<entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
<entry><literal>"SomeSchema,sometable"</literal></entry>
</row>
<row>
<entry>
<indexterm>

View File

@ -990,3 +990,10 @@ RETURNS jsonb
LANGUAGE INTERNAL
STRICT IMMUTABLE
AS 'jsonb_set';
CREATE OR REPLACE FUNCTION
parse_ident(str text, strict boolean DEFAULT true)
RETURNS text[]
LANGUAGE INTERNAL
STRICT IMMUTABLE
AS 'parse_ident';

View File

@ -129,6 +129,15 @@ scanstr(const char *s)
*/
char *
downcase_truncate_identifier(const char *ident, int len, bool warn)
{
return downcase_identifier(ident, len, warn, true);
}
/*
* a workhorse for downcase_truncate_identifier
*/
char *
downcase_identifier(const char *ident, int len, bool warn, bool truncate)
{
char *result;
int i;
@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
}
result[i] = '\0';
if (i >= NAMEDATALEN)
if (i >= NAMEDATALEN && truncate)
truncate_identifier(result, i, warn);
return result;
}
/*
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
*

View File

@ -27,6 +27,7 @@
#include "commands/dbcommands.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "parser/scansup.h"
#include "parser/keywords.h"
#include "postmaster/syslogger.h"
#include "rewrite/rewriteHandler.h"
@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
}
/*
* This simple parser utility are compatible with lexer implementation,
* used only in parse_ident function
*/
static bool
is_ident_start(unsigned char c)
{
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
if (c >= 0200 && c <= 0377)
return true;
return false;
}
static bool
is_ident_cont(unsigned char c)
{
if (c >= '0' && c <= '9')
return true;
return is_ident_start(c);
}
/*
* Sanitize SQL string for using in error message.
*/
static char *
sanitize_text(text *t)
{
int len = VARSIZE_ANY_EXHDR(t);
const char *p = VARDATA_ANY(t);
StringInfo dstr;
dstr = makeStringInfo();
appendStringInfoChar(dstr, '"');
while (len--)
{
switch (*p)
{
case '\b':
appendStringInfoString(dstr, "\\b");
break;
case '\f':
appendStringInfoString(dstr, "\\f");
break;
case '\n':
appendStringInfoString(dstr, "\\n");
break;
case '\r':
appendStringInfoString(dstr, "\\r");
break;
case '\t':
appendStringInfoString(dstr, "\\t");
break;
case '\'':
appendStringInfoString(dstr, "''");
break;
case '\\':
appendStringInfoString(dstr, "\\\\");
break;
default:
if ((unsigned char) *p < ' ')
appendStringInfo(dstr, "\\u%04x", (int) *p);
else
appendStringInfoCharMacro(dstr, *p);
break;
}
p++;
}
appendStringInfoChar(dstr, '"');
return dstr->data;
}
/*
* parse_ident - parse SQL composed identifier to separate identifiers.
* When strict mode is active (second parameter), then any chars after
* last identifiers are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
text *qualname;
char *qualname_str;
bool strict;
char *nextp;
bool after_dot = false;
ArrayBuildState *astate = NULL;
qualname = PG_GETARG_TEXT_PP(0);
qualname_str = text_to_cstring(qualname);
strict = PG_GETARG_BOOL(1);
nextp = qualname_str;
/* skip leading whitespace */
while (isspace((unsigned char) *nextp))
nextp++;
for (;;)
{
char *curname;
char *endp;
bool missing_ident;
missing_ident = true;
if (*nextp == '\"')
{
curname = nextp + 1;
for (;;)
{
endp = strchr(nextp + 1, '\"');
if (endp == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unclosed double quotes"),
errdetail("string %s is not valid identifier",
sanitize_text(qualname))));
if (endp[1] != '\"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
}
nextp = endp + 1;
*endp = '\0';
/* Show complete input string in this case. */
if (endp - curname == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier should not be empty: %s",
sanitize_text(qualname))));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
else
{
if (is_ident_start((unsigned char) *nextp))
{
char *downname;
int len;
text *part;
curname = nextp++;
while (is_ident_cont((unsigned char) *nextp))
nextp++;
len = nextp - curname;
/*
* Unlike name, we don't implicitly truncate identifiers. This
* is useful for allowing the user to check for specific parts
* of the identifier being too long. It's easy enough for the
* user to get the truncated names by casting our output to
* name[].
*/
downname = downcase_identifier(curname, len, false, false);
part = cstring_to_text_with_len(downname, len);
astate = accumArrayResult(astate, PointerGetDatum(part), false,
TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
}
if (missing_ident)
{
/* Different error messages based on where we failed. */
if (*nextp == '.')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier before \".\" symbol: %s",
sanitize_text(qualname))));
else if (after_dot)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier after \".\" symbol: %s",
sanitize_text(qualname))));
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier: %s",
sanitize_text(qualname))));
}
while (isspace((unsigned char) *nextp))
nextp++;
if (*nextp == '.')
{
after_dot = true;
nextp++;
while (isspace((unsigned char) *nextp))
nextp++;
}
else if (*nextp == '\0')
{
break;
}
else
{
if (strict)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier contains disallowed characters: %s",
sanitize_text(qualname))));
break;
}
}
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
}

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201603151
#define CATALOG_VERSION_NO 201603181
#endif

View File

@ -3463,6 +3463,9 @@ DESCR("I/O");
DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ ));
DESCR("convert namespace name to regnamespace");
DATA(insert OID = 1268 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strict}" _null_ _null_ parse_ident _null_ _null_ _null_ ));
DESCR("parse qualified identifier to array of identifiers");
DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ ));
DESCR("(internal)");
DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ ));

View File

@ -20,6 +20,9 @@ extern char *scanstr(const char *s);
extern char *downcase_truncate_identifier(const char *ident, int len,
bool warn);
extern char *downcase_identifier(const char *ident, int len,
bool warn, bool truncate);
extern void truncate_identifier(char *ident, int len, bool warn);
extern bool scanner_isspace(char ch);

View File

@ -510,6 +510,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS);
extern Datum pg_collation_for(PG_FUNCTION_ARGS);
extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
extern Datum parse_ident(PG_FUNCTION_ARGS);
/* oid.c */
extern Datum oidin(PG_FUNCTION_ARGS);

View File

@ -124,3 +124,69 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
(2 rows)
DROP TABLE NAME_TBL;
DO $$
DECLARE r text[];
BEGIN
r := parse_ident('Schemax.Tabley');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
r := parse_ident('"SchemaX"."TableY"');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
END;
$$;
NOTICE: schemax.tabley
NOTICE: "SchemaX"."TableY"
SELECT parse_ident('foo.boo');
parse_ident
-------------
{foo,boo}
(1 row)
SELECT parse_ident('foo.boo[]'); -- should fail
ERROR: identifier contains disallowed characters: "foo.boo[]"
SELECT parse_ident('foo.boo[]', strict => false); -- ok
parse_ident
-------------
{foo,boo}
(1 row)
-- should fail
SELECT parse_ident(' ');
ERROR: missing valid identifier: " "
SELECT parse_ident(' .aaa');
ERROR: missing valid identifier before "." symbol: " .aaa"
SELECT parse_ident(' aaa . ');
ERROR: missing valid identifier after "." symbol: " aaa . "
SELECT parse_ident('aaa.a%b');
ERROR: identifier contains disallowed characters: "aaa.a%b"
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
length | length
--------+--------
414 | 289
(1 row)
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
parse_ident
-----------------------------------------------------------------------------------------------------------
{first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
(1 row)
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
parse_ident
------------------------------------------------------------------------------------------------------
{first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
(1 row)
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
SELECT parse_ident('1020');
ERROR: missing valid identifier: "1020"
SELECT parse_ident('10.20');
ERROR: missing valid identifier: "10.20"
SELECT parse_ident('.');
ERROR: missing valid identifier before "." symbol: "."
SELECT parse_ident('.1020');
ERROR: missing valid identifier before "." symbol: ".1020"
SELECT parse_ident('xxx.1020');
ERROR: missing valid identifier after "." symbol: "xxx.1020"

View File

@ -52,3 +52,36 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '[0-9]';
SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
DROP TABLE NAME_TBL;
DO $$
DECLARE r text[];
BEGIN
r := parse_ident('Schemax.Tabley');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
r := parse_ident('"SchemaX"."TableY"');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
END;
$$;
SELECT parse_ident('foo.boo');
SELECT parse_ident('foo.boo[]'); -- should fail
SELECT parse_ident('foo.boo[]', strict => false); -- ok
-- should fail
SELECT parse_ident(' ');
SELECT parse_ident(' .aaa');
SELECT parse_ident(' aaa . ');
SELECT parse_ident('aaa.a%b');
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
SELECT parse_ident('1020');
SELECT parse_ident('10.20');
SELECT parse_ident('.');
SELECT parse_ident('.1020');
SELECT parse_ident('xxx.1020');