From 00f941356e56bd80427765a6a8261eb71e4eb3cc Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 18 Jan 2024 16:10:57 -0500 Subject: [PATCH] Fix plpgsql to allow new-style SQL CREATE FUNCTION as a SQL command. plpgsql fails on new-style CREATE FUNCTION/PROCEDURE commands within a routine or DO block, because make_execsql_stmt believes that a semicolon token always terminates a SQL command. Now, that's actually been wrong since the day it was written, because CREATE RULE has long allowed multiple rule actions separated by semicolons. But there are few enough people using multi-action rules that there was never an attempt to fix it. New-style SQL functions, though, are popular. psql has this same problem of "does this semicolon really terminate the command?". It deals with CREATE RULE by counting parenthesis nesting depth: a semicolon within parens doesn't end a command. Commits e717a9a18 and 029c5ac03 created a similar heuristic to count matching BEGIN/END pairs (but only within CREATEs, so as not to be fooled by plain BEGIN). That's survived several releases now without trouble reports, so let's just absorb those heuristics into plpgsql. Per report from Samuel Dussault. Back-patch to v14 where new-style SQL function syntax came in. Discussion: https://postgr.es/m/YT2PR01MB88552C3E9AD40A6C038774A781722@YT2PR01MB8855.CANPRD01.PROD.OUTLOOK.COM --- src/pl/plpgsql/src/Makefile | 5 +- src/pl/plpgsql/src/expected/plpgsql_misc.out | 31 +++++++++ src/pl/plpgsql/src/meson.build | 3 +- src/pl/plpgsql/src/pl_gram.y | 72 +++++++++++++++++--- src/pl/plpgsql/src/sql/plpgsql_misc.sql | 22 ++++++ 5 files changed, 122 insertions(+), 11 deletions(-) create mode 100644 src/pl/plpgsql/src/expected/plpgsql_misc.out create mode 100644 src/pl/plpgsql/src/sql/plpgsql_misc.sql diff --git a/src/pl/plpgsql/src/Makefile b/src/pl/plpgsql/src/Makefile index f7eb42d54f..7cd890022c 100644 --- a/src/pl/plpgsql/src/Makefile +++ b/src/pl/plpgsql/src/Makefile @@ -32,8 +32,9 @@ DATA = plpgsql.control plpgsql--1.0.sql REGRESS_OPTS = --dbname=$(PL_TESTDB) -REGRESS = plpgsql_array plpgsql_call plpgsql_control plpgsql_copy plpgsql_domain \ - plpgsql_record plpgsql_cache plpgsql_simple plpgsql_transaction \ +REGRESS = plpgsql_array plpgsql_cache plpgsql_call plpgsql_control \ + plpgsql_copy plpgsql_domain plpgsql_misc \ + plpgsql_record plpgsql_simple plpgsql_transaction \ plpgsql_trap plpgsql_trigger plpgsql_varprops # where to find gen_keywordlist.pl and subsidiary files diff --git a/src/pl/plpgsql/src/expected/plpgsql_misc.out b/src/pl/plpgsql/src/expected/plpgsql_misc.out new file mode 100644 index 0000000000..faddba2511 --- /dev/null +++ b/src/pl/plpgsql/src/expected/plpgsql_misc.out @@ -0,0 +1,31 @@ +-- +-- Miscellaneous topics +-- +-- Verify that we can parse new-style CREATE FUNCTION/PROCEDURE +do +$$ + declare procedure int; -- check we still recognize non-keywords as vars + begin + create function test1() returns int + begin atomic + select 2 + 2; + end; + create or replace procedure test2(x int) + begin atomic + select x + 2; + end; + end +$$; +\sf test1 +CREATE OR REPLACE FUNCTION public.test1() + RETURNS integer + LANGUAGE sql +BEGIN ATOMIC + SELECT (2 + 2); +END +\sf test2 +CREATE OR REPLACE PROCEDURE public.test2(IN x integer) + LANGUAGE sql +BEGIN ATOMIC + SELECT (x + 2); +END diff --git a/src/pl/plpgsql/src/meson.build b/src/pl/plpgsql/src/meson.build index 85e7293b37..bc63e35612 100644 --- a/src/pl/plpgsql/src/meson.build +++ b/src/pl/plpgsql/src/meson.build @@ -76,12 +76,13 @@ tests += { 'regress': { 'sql': [ 'plpgsql_array', + 'plpgsql_cache', 'plpgsql_call', 'plpgsql_control', 'plpgsql_copy', 'plpgsql_domain', + 'plpgsql_misc', 'plpgsql_record', - 'plpgsql_cache', 'plpgsql_simple', 'plpgsql_transaction', 'plpgsql_trap', diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index 6a09bfdd67..1d9182f270 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -76,7 +76,8 @@ static PLpgSQL_expr *read_sql_expression2(int until, int until2, int *endtoken); static PLpgSQL_expr *read_sql_stmt(void); static PLpgSQL_type *read_datatype(int tok); -static PLpgSQL_stmt *make_execsql_stmt(int firsttoken, int location); +static PLpgSQL_stmt *make_execsql_stmt(int firsttoken, int location, + PLword *word); static PLpgSQL_stmt_fetch *read_fetch_direction(void); static void complete_direction(PLpgSQL_stmt_fetch *fetch, bool *check_FROM); @@ -1971,15 +1972,15 @@ loop_body : proc_sect K_END K_LOOP opt_label ';' */ stmt_execsql : K_IMPORT { - $$ = make_execsql_stmt(K_IMPORT, @1); + $$ = make_execsql_stmt(K_IMPORT, @1, NULL); } | K_INSERT { - $$ = make_execsql_stmt(K_INSERT, @1); + $$ = make_execsql_stmt(K_INSERT, @1, NULL); } | K_MERGE { - $$ = make_execsql_stmt(K_MERGE, @1); + $$ = make_execsql_stmt(K_MERGE, @1, NULL); } | T_WORD { @@ -1990,7 +1991,7 @@ stmt_execsql : K_IMPORT if (tok == '=' || tok == COLON_EQUALS || tok == '[' || tok == '.') word_is_not_variable(&($1), @1); - $$ = make_execsql_stmt(T_WORD, @1); + $$ = make_execsql_stmt(T_WORD, @1, &($1)); } | T_CWORD { @@ -2001,7 +2002,7 @@ stmt_execsql : K_IMPORT if (tok == '=' || tok == COLON_EQUALS || tok == '[' || tok == '.') cword_is_not_variable(&($1), @1); - $$ = make_execsql_stmt(T_CWORD, @1); + $$ = make_execsql_stmt(T_CWORD, @1, NULL); } ; @@ -2919,8 +2920,13 @@ read_datatype(int tok) return result; } +/* + * Read a generic SQL statement. We have already read its first token; + * firsttoken is that token's code and location its starting location. + * If firsttoken == T_WORD, pass its yylval value as "word", else pass NULL. + */ static PLpgSQL_stmt * -make_execsql_stmt(int firsttoken, int location) +make_execsql_stmt(int firsttoken, int location, PLword *word) { StringInfoData ds; IdentifierLookup save_IdentifierLookup; @@ -2933,9 +2939,16 @@ make_execsql_stmt(int firsttoken, int location) bool have_strict = false; int into_start_loc = -1; int into_end_loc = -1; + int paren_depth = 0; + int begin_depth = 0; + bool in_routine_definition = false; + int token_count = 0; + char tokens[4]; /* records the first few tokens */ initStringInfo(&ds); + memset(tokens, 0, sizeof(tokens)); + /* special lookup mode for identifiers within the SQL text */ save_IdentifierLookup = plpgsql_IdentifierLookup; plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_EXPR; @@ -2944,6 +2957,12 @@ make_execsql_stmt(int firsttoken, int location) * Scan to the end of the SQL command. Identify any INTO-variables * clause lurking within it, and parse that via read_into_target(). * + * The end of the statement is defined by a semicolon ... except that + * semicolons within parentheses or BEGIN/END blocks don't terminate a + * statement. We follow psql's lead in not recognizing BEGIN/END except + * after CREATE [OR REPLACE] {FUNCTION|PROCEDURE}. END can also appear + * within a CASE construct, so we treat CASE/END like BEGIN/END. + * * Because INTO is sometimes used in the main SQL grammar, we have to be * careful not to take any such usage of INTO as a PL/pgSQL INTO clause. * There are currently three such cases: @@ -2969,13 +2988,50 @@ make_execsql_stmt(int firsttoken, int location) * break this logic again ... beware! */ tok = firsttoken; + if (tok == T_WORD && strcmp(word->ident, "create") == 0) + tokens[token_count] = 'c'; + token_count++; + for (;;) { prev_tok = tok; tok = yylex(); if (have_into && into_end_loc < 0) into_end_loc = yylloc; /* token after the INTO part */ - if (tok == ';') + /* Detect CREATE [OR REPLACE] {FUNCTION|PROCEDURE} */ + if (tokens[0] == 'c' && token_count < sizeof(tokens)) + { + if (tok == K_OR) + tokens[token_count] = 'o'; + else if (tok == T_WORD && + strcmp(yylval.word.ident, "replace") == 0) + tokens[token_count] = 'r'; + else if (tok == T_WORD && + strcmp(yylval.word.ident, "function") == 0) + tokens[token_count] = 'f'; + else if (tok == T_WORD && + strcmp(yylval.word.ident, "procedure") == 0) + tokens[token_count] = 'f'; /* treat same as "function" */ + if (tokens[1] == 'f' || + (tokens[1] == 'o' && tokens[2] == 'r' && tokens[3] == 'f')) + in_routine_definition = true; + token_count++; + } + /* Track paren nesting (needed for CREATE RULE syntax) */ + if (tok == '(') + paren_depth++; + else if (tok == ')' && paren_depth > 0) + paren_depth--; + /* We need track BEGIN/END nesting only in a routine definition */ + if (in_routine_definition && paren_depth == 0) + { + if (tok == K_BEGIN || tok == K_CASE) + begin_depth++; + else if (tok == K_END && begin_depth > 0) + begin_depth--; + } + /* Command-ending semicolon? */ + if (tok == ';' && paren_depth == 0 && begin_depth == 0) break; if (tok == 0) yyerror("unexpected end of function definition"); diff --git a/src/pl/plpgsql/src/sql/plpgsql_misc.sql b/src/pl/plpgsql/src/sql/plpgsql_misc.sql new file mode 100644 index 0000000000..71a8fc232e --- /dev/null +++ b/src/pl/plpgsql/src/sql/plpgsql_misc.sql @@ -0,0 +1,22 @@ +-- +-- Miscellaneous topics +-- + +-- Verify that we can parse new-style CREATE FUNCTION/PROCEDURE +do +$$ + declare procedure int; -- check we still recognize non-keywords as vars + begin + create function test1() returns int + begin atomic + select 2 + 2; + end; + create or replace procedure test2(x int) + begin atomic + select x + 2; + end; + end +$$; + +\sf test1 +\sf test2