From 750487077802861a2accd94164166747ec311fa4 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Sat, 20 Nov 2010 00:21:17 -0500 Subject: [PATCH] Add new SQL function, format(text). Currently, three conversion format specifiers are supported: %s for a string, %L for an SQL literal, and %I for an SQL identifier. The latter two are deliberately designed not to overlap with what sprintf() already supports, in case we want to add more of sprintf()'s functionality here later. Patch by Pavel Stehule, heavily revised by me. Reviewed by Jeff Janes and, in earlier versions, by Itagaki Takahiro and Tom Lane. --- doc/src/sgml/func.sgml | 25 ++++ doc/src/sgml/plpgsql.sgml | 23 ++++ src/backend/utils/adt/varlena.c | 195 +++++++++++++++++++++++++++++ src/include/catalog/pg_proc.h | 4 + src/include/utils/builtins.h | 2 + src/test/regress/expected/text.out | 108 ++++++++++++++-- src/test/regress/sql/text.sql | 32 ++++- 7 files changed, 374 insertions(+), 15 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 11dd3bc6f9..e4d00b2403 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1271,6 +1271,9 @@ encode + + format + initcap @@ -1496,6 +1499,28 @@ MTIzAAE= + + + format(formatstr text + [, str "any" [, ...] ]) + + text + + Format a string. This function is similar to the C function + sprintf; but only the following conversions + are recognized: %s interpolates the corresponding + argument as a string; %I escapes its argument as + an SQL identifier; %L escapes its argument as an + SQL literal; %% outputs a literal %. + A conversion can reference an explicit parameter position by preceding + the conversion specifier with n$, where + n is the argument position. + See also . + + format('Hello %s, %1$s', 'World') + Hello World, World + + initcap(string) text diff --git a/doc/src/sgml/plpgsql.sgml b/doc/src/sgml/plpgsql.sgml index 934bea6c37..527eaecc9b 100644 --- a/doc/src/sgml/plpgsql.sgml +++ b/doc/src/sgml/plpgsql.sgml @@ -1152,6 +1152,11 @@ EXECUTE 'SELECT count(*) FROM ' use in PL/pgSQL + + format + use in PL/pgSQL + + When working with dynamic commands you will often have to handle escaping of single quotes. The recommended method for quoting fixed text in your @@ -1250,6 +1255,24 @@ EXECUTE 'UPDATE tbl SET ' must use quote_literal, quote_nullable, or quote_ident, as appropriate. + + + Dynamic SQL statements can also be safely constructed using the + format function (see ). For example: + +EXECUTE format('UPDATE tbl SET %I = %L WHERE key = %L', colname, newvalue, keyvalue); + + The format function can be used in conjunction with + the USING clause: + +EXECUTE format('UPDATE tbl SET %I = $1 WHERE key = $2', colname) + USING newvalue, keyvalue; + + This form is more efficient, because the parameters + newvalue and keyvalue are not + converted to text. + diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 363fd3ce49..15b6d7c4fd 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -15,6 +15,7 @@ #include "postgres.h" #include +#include #include "access/tuptoaster.h" #include "catalog/pg_type.h" @@ -74,6 +75,8 @@ static bytea *bytea_substring(Datum str, bool length_not_specified); static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); static StringInfo makeStringAggState(FunctionCallInfo fcinfo); +void text_format_string_conversion(StringInfo buf, char conversion, + Oid typid, Datum value, bool isNull); static Datum text_to_array_internal(PG_FUNCTION_ARGS); static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, @@ -3702,3 +3705,195 @@ text_reverse(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(result); } + +/* + * Returns a formated string + */ +Datum +text_format(PG_FUNCTION_ARGS) +{ + text *fmt; + StringInfoData str; + const char *cp; + const char *start_ptr; + const char *end_ptr; + text *result; + int arg = 0; + + /* When format string is null, returns null */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + /* Setup for main loop. */ + fmt = PG_GETARG_TEXT_PP(0); + start_ptr = VARDATA_ANY(fmt); + end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt); + initStringInfo(&str); + + /* Scan format string, looking for conversion specifiers. */ + for (cp = start_ptr; cp < end_ptr; cp++) + { + Datum value; + bool isNull; + Oid typid; + + /* + * If it's not the start of a conversion specifier, just copy it to + * the output buffer. + */ + if (*cp != '%') + { + appendStringInfoCharMacro(&str, *cp); + continue; + } + + /* Did we run off the end of the string? */ + if (++cp >= end_ptr) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unterminated conversion specifier"))); + + /* Easy case: %% outputs a single % */ + if (*cp == '%') + { + appendStringInfoCharMacro(&str, *cp); + continue; + } + + /* + * If the user hasn't specified an argument position, we just advance + * to the next one. If they have, we must parse it. + */ + if (*cp < '0' || *cp > '9') + ++arg; + else + { + bool unterminated = false; + + /* Parse digit string. */ + arg = 0; + do { + /* Treat overflowing arg position as unterminated. */ + if (arg > INT_MAX / 10) + break; + arg = arg * 10 + (*cp - '0'); + ++cp; + } while (cp < end_ptr && *cp >= '0' && *cp <= '9'); + + /* + * If we ran off the end, or if there's not a $ next, or if the $ + * is the last character, the conversion specifier is improperly + * terminated. + */ + if (cp == end_ptr || *cp != '$') + unterminated = true; + else + { + ++cp; + if (cp == end_ptr) + unterminated = true; + } + if (unterminated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unterminated conversion specifier"))); + + /* There's no argument 0. */ + if (arg == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("conversion specifies argument 0, but arguments are numbered from 1"))); + } + + /* Not enough arguments? Deduct 1 to avoid counting format string. */ + if (arg > PG_NARGS() - 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("too few arguments for format conversion"))); + + /* + * At this point, we should see the main conversion specifier. + * Whether or not an argument position was present, it's known + * that at least one character remains in the string at this point. + */ + value = PG_GETARG_DATUM(arg); + isNull = PG_ARGISNULL(arg); + typid = get_fn_expr_argtype(fcinfo->flinfo, arg); + + switch (*cp) + { + case 's': + case 'I': + case 'L': + text_format_string_conversion(&str, *cp, typid, value, isNull); + break; + default: + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized conversion specifier: %c", + *cp))); + } + } + + /* Generate results. */ + result = cstring_to_text_with_len(str.data, str.len); + pfree(str.data); + + PG_RETURN_TEXT_P(result); +} + +/* Format a %s, %I, or %L conversion. */ +void +text_format_string_conversion(StringInfo buf, char conversion, + Oid typid, Datum value, bool isNull) +{ + Oid typOutput; + bool typIsVarlena; + char *str; + + /* Handle NULL arguments before trying to stringify the value. */ + if (isNull) + { + if (conversion == 'L') + appendStringInfoString(buf, "NULL"); + else if (conversion == 'I') + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("NULL cannot be escaped as an SQL identifier"))); + return; + } + + /* Stringify. */ + getTypeOutputInfo(typid, &typOutput, &typIsVarlena); + str = OidOutputFunctionCall(typOutput, value); + + /* Escape. */ + if (conversion == 'I') + { + /* quote_identifier may or may not allocate a new string. */ + appendStringInfoString(buf, quote_identifier(str)); + } + else if (conversion == 'L') + { + char *qstr = quote_literal_cstr(str); + appendStringInfoString(buf, qstr); + /* quote_literal_cstr() always allocates a new string */ + pfree(qstr); + } + else + appendStringInfoString(buf, str); + + /* Cleanup. */ + pfree(str); +} + +/* + * text_format_nv - nonvariadic wrapper for text_format function. + * + * note: this wrapper is necessary to be sanity_checks test ok + */ +Datum +text_format_nv(PG_FUNCTION_ARGS) +{ + return text_format(fcinfo); +} diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 3e17eff0bf..25a391282a 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2744,6 +2744,10 @@ DATA(insert OID = 3061 ( right PGNSP PGUID 12 1 0 0 f f f t f i 2 0 25 "25 23" DESCR("return the last n characters"); DATA(insert OID = 3062 ( reverse PGNSP PGUID 12 1 0 0 f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ text_reverse _null_ _null_ _null_ )); DESCR("reverse text"); +DATA(insert OID = 3539 ( format PGNSP PGUID 12 1 0 2276 f f f f f s 2 0 25 "25 2276" "{25,2276}" "{i,v}" _null_ _null_ text_format _null_ _null_ _null_ )); +DESCR("format text message"); +DATA(insert OID = 3540 ( format PGNSP PGUID 12 1 0 0 f f f f f s 1 0 25 "25" _null_ _null_ _null_ _null_ text_format_nv _null_ _null_ _null_ )); +DESCR("format text message"); DATA(insert OID = 1810 ( bit_length PGNSP PGUID 14 1 0 0 f f f t f i 1 0 23 "17" _null_ _null_ _null_ _null_ "select pg_catalog.octet_length($1) * 8" _null_ _null_ _null_ )); DESCR("length in bits"); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index ae267ab17a..bad0a8ebdf 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -743,6 +743,8 @@ extern Datum text_concat_ws(PG_FUNCTION_ARGS); extern Datum text_left(PG_FUNCTION_ARGS); extern Datum text_right(PG_FUNCTION_ARGS); extern Datum text_reverse(PG_FUNCTION_ARGS); +extern Datum text_format(PG_FUNCTION_ARGS); +extern Datum text_format_nv(PG_FUNCTION_ARGS); /* version.c */ extern Datum pgsql_version(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/text.out b/src/test/regress/expected/text.out index 0b0014a6e2..70818389e4 100644 --- a/src/test/regress/expected/text.out +++ b/src/test/regress/expected/text.out @@ -118,21 +118,105 @@ select i, left('ahoj', i), right('ahoj', i) from generate_series(-5, 5) t(i) ord 5 | ahoj | ahoj (11 rows) -select quote_literal(''); - quote_literal ---------------- - '' +select format(NULL); + format +-------- + (1 row) -select quote_literal('abc'''); - quote_literal ---------------- - 'abc''' +select format('Hello'); + format +-------- + Hello (1 row) -select quote_literal(e'\\'); - quote_literal ---------------- - E'\\' +select format('Hello %s', 'World'); + format +------------- + Hello World +(1 row) + +select format('Hello %%'); + format +--------- + Hello % +(1 row) + +select format('Hello %%%%'); + format +---------- + Hello %% +(1 row) + +-- should fail +select format('Hello %s %s', 'World'); +ERROR: too few arguments for format conversion +select format('Hello %s'); +ERROR: too few arguments for format conversion +select format('Hello %x', 20); +ERROR: unrecognized conversion specifier: x +-- check literal and sql identifiers +select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', 10, 'Hello'); + format +---------------------------------------- + INSERT INTO mytab VALUES('10','Hello') +(1 row) + +select format('%s%s%s','Hello', NULL,'World'); + format +------------ + HelloWorld +(1 row) + +select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', 10, NULL); + format +------------------------------------- + INSERT INTO mytab VALUES('10',NULL) +(1 row) + +select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', NULL, 'Hello'); + format +---------------------------------------- + INSERT INTO mytab VALUES(NULL,'Hello') +(1 row) + +-- should fail, sql identifier cannot be NULL +select format('INSERT INTO %I VALUES(%L,%L)', NULL, 10, 'Hello'); +ERROR: NULL cannot be escaped as an SQL identifier +-- check positional placeholders +select format('%1$s %3$s', 1, 2, 3); + format +-------- + 1 3 +(1 row) + +select format('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); + format +-------- + 1 12 +(1 row) + +-- should fail +select format('%1$s %4$s', 1, 2, 3); +ERROR: too few arguments for format conversion +select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); +ERROR: too few arguments for format conversion +select format('%1s', 1); +ERROR: unterminated conversion specifier +select format('%1$', 1); +ERROR: unterminated conversion specifier +select format('%1$1', 1); +ERROR: unrecognized conversion specifier: 1 +--checkk mix of positional and ordered placeholders +select format('Hello %s %1$s %s', 'World', 'Hello again'); + format +------------------------------- + Hello World World Hello again +(1 row) + +select format('Hello %s %s, %2$s %2$s', 'World', 'Hello again'); + format +-------------------------------------------------- + Hello World Hello again, Hello again Hello again (1 row) diff --git a/src/test/regress/sql/text.sql b/src/test/regress/sql/text.sql index 50c3033d9e..563e2589ad 100644 --- a/src/test/regress/sql/text.sql +++ b/src/test/regress/sql/text.sql @@ -41,6 +41,32 @@ select concat_ws('',10,20,null,30); select concat_ws(NULL,10,20,null,30) is null; select reverse('abcde'); select i, left('ahoj', i), right('ahoj', i) from generate_series(-5, 5) t(i) order by i; -select quote_literal(''); -select quote_literal('abc'''); -select quote_literal(e'\\'); + +select format(NULL); +select format('Hello'); +select format('Hello %s', 'World'); +select format('Hello %%'); +select format('Hello %%%%'); +-- should fail +select format('Hello %s %s', 'World'); +select format('Hello %s'); +select format('Hello %x', 20); +-- check literal and sql identifiers +select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', 10, 'Hello'); +select format('%s%s%s','Hello', NULL,'World'); +select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', 10, NULL); +select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', NULL, 'Hello'); +-- should fail, sql identifier cannot be NULL +select format('INSERT INTO %I VALUES(%L,%L)', NULL, 10, 'Hello'); +-- check positional placeholders +select format('%1$s %3$s', 1, 2, 3); +select format('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); +-- should fail +select format('%1$s %4$s', 1, 2, 3); +select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); +select format('%1s', 1); +select format('%1$', 1); +select format('%1$1', 1); +--checkk mix of positional and ordered placeholders +select format('Hello %s %1$s %s', 'World', 'Hello again'); +select format('Hello %s %s, %2$s %2$s', 'World', 'Hello again');