From 6f0cef9353fcef50e74c31bb06c96c40207fd535 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 16 Apr 2024 12:31:32 -0400 Subject: [PATCH] Fix assorted bugs in ecpg's macro mechanism. The code associated with EXEC SQL DEFINE was unreadable and full of bugs, notably: * It'd attempt to free a non-malloced string if the ecpg program tries to redefine a macro that was defined on the command line. * Possible memory stomp if user writes "-D=foo". * Undef'ing or redefining a macro defined on the command line would change the state visible to the next file, when multiple files are specified on the command line. (While possibly that could have been an intentional choice, the code clearly intends to revert to the original macro state; it's just failing to consider this interaction.) * Missing "break" in defining a new macro meant that redefinition of an existing name would cause an extra entry to be added to the definition list. While not immediately harmful, a subsequent undef would result in the prior entry becoming visible again. * The interactions with input buffering are subtle and were entirely undocumented. It's not that surprising that we hadn't noticed these bugs, because there was no test coverage at all of either the -D command line switch or multiple input files. This patch adds such coverage (in a rather hacky way I guess). In addition to the code bugs, the user documentation was confused about whether the -D switch defines a C macro or an ecpg one, and it failed to mention that you can write "-Dsymbol=value". These problems are old, so back-patch to all supported branches. Discussion: https://postgr.es/m/998011.1713217712@sss.pgh.pa.us --- doc/src/sgml/ecpg.sgml | 8 ++ doc/src/sgml/ref/ecpg-ref.sgml | 6 +- src/interfaces/ecpg/preproc/ecpg.c | 79 ++++++----- src/interfaces/ecpg/preproc/pgc.l | 125 ++++++++++++------ src/interfaces/ecpg/preproc/type.h | 22 ++- .../ecpg/test/expected/sql-define.c | 67 +++++++++- .../ecpg/test/expected/sql-define.stderr | 24 ++++ .../ecpg/test/expected/sql-define.stdout | 3 + src/interfaces/ecpg/test/sql/Makefile | 3 + src/interfaces/ecpg/test/sql/define.pgc | 25 ++++ .../ecpg/test/sql/define_prelim.pgc | 6 + src/interfaces/ecpg/test/sql/meson.build | 1 + 12 files changed, 289 insertions(+), 80 deletions(-) create mode 100644 src/interfaces/ecpg/test/sql/define_prelim.pgc diff --git a/doc/src/sgml/ecpg.sgml b/doc/src/sgml/ecpg.sgml index b332aa435d..e7a53f3c9d 100644 --- a/doc/src/sgml/ecpg.sgml +++ b/doc/src/sgml/ecpg.sgml @@ -5793,6 +5793,14 @@ EXEC SQL UPDATE Tbl SET col = MYNUMBER; embedded SQL query because in this case the embedded SQL precompiler is not able to see this declaration. + + + If multiple input files are named on the ecpg + preprocessor's command line, the effects of EXEC SQL + DEFINE and EXEC SQL UNDEF do not carry + across files: each file starts with only the symbols defined + by switches on the command line. + diff --git a/doc/src/sgml/ref/ecpg-ref.sgml b/doc/src/sgml/ref/ecpg-ref.sgml index f3b6034f42..43f2d8bdaa 100644 --- a/doc/src/sgml/ref/ecpg-ref.sgml +++ b/doc/src/sgml/ref/ecpg-ref.sgml @@ -93,10 +93,12 @@ PostgreSQL documentation - + - Define a C preprocessor symbol. + Define a preprocessor symbol, equivalently to the EXEC SQL + DEFINE directive. If no value is + specified, the symbol is defined with the value 1. diff --git a/src/interfaces/ecpg/preproc/ecpg.c b/src/interfaces/ecpg/preproc/ecpg.c index 93e66fc60f..73c37631ac 100644 --- a/src/interfaces/ecpg/preproc/ecpg.c +++ b/src/interfaces/ecpg/preproc/ecpg.c @@ -82,35 +82,46 @@ add_include_path(char *path) } } +/* + * Process a command line -D switch + */ static void add_preprocessor_define(char *define) { - struct _defines *pd = defines; - char *ptr, - *define_copy = mm_strdup(define); + /* copy the argument to avoid relying on argv storage */ + char *define_copy = mm_strdup(define); + char *ptr; + struct _defines *newdef; - defines = mm_alloc(sizeof(struct _defines)); + newdef = mm_alloc(sizeof(struct _defines)); /* look for = sign */ ptr = strchr(define_copy, '='); if (ptr != NULL) { + /* symbol has a value */ char *tmp; - /* symbol has a value */ - for (tmp = ptr - 1; *tmp == ' '; tmp--); + /* strip any spaces between name and '=' */ + for (tmp = ptr - 1; tmp >= define_copy && *tmp == ' '; tmp--); tmp[1] = '\0'; - defines->olddef = define_copy; - defines->newdef = ptr + 1; + + /* + * Note we don't bother to separately malloc cmdvalue; it will never + * be freed so that's not necessary. + */ + newdef->cmdvalue = ptr + 1; } else { - defines->olddef = define_copy; - defines->newdef = mm_strdup("1"); + /* define it as "1"; again no need to malloc it */ + newdef->cmdvalue = "1"; } - defines->pertinent = true; - defines->used = NULL; - defines->next = pd; + newdef->name = define_copy; + newdef->value = mm_strdup(newdef->cmdvalue); + newdef->used = NULL; + newdef->next = defines; + defines = newdef; } #define ECPG_GETOPT_LONG_REGRESSION 1 @@ -348,6 +359,8 @@ main(int argc, char *const argv[]) { struct cursor *ptr; struct _defines *defptr; + struct _defines *prevdefptr; + struct _defines *nextdefptr; struct typedefs *typeptr; struct declared_list *list; @@ -385,28 +398,28 @@ main(int argc, char *const argv[]) free(this); } - /* remove non-pertinent old defines as well */ - while (defines && !defines->pertinent) + /* restore defines to their command-line state */ + prevdefptr = NULL; + for (defptr = defines; defptr != NULL; defptr = nextdefptr) { - defptr = defines; - defines = defines->next; - - free(defptr->newdef); - free(defptr->olddef); - free(defptr); - } - - for (defptr = defines; defptr != NULL; defptr = defptr->next) - { - struct _defines *this = defptr->next; - - if (this && !this->pertinent) + nextdefptr = defptr->next; + if (defptr->cmdvalue != NULL) { - defptr->next = this->next; - - free(this->newdef); - free(this->olddef); - free(this); + /* keep it, resetting the value */ + free(defptr->value); + defptr->value = mm_strdup(defptr->cmdvalue); + prevdefptr = defptr; + } + else + { + /* remove it */ + if (prevdefptr != NULL) + prevdefptr->next = nextdefptr; + else + defines = nextdefptr; + free(defptr->name); + free(defptr->value); + free(defptr); } } diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index e56cd31d27..bcfbd0978b 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -69,7 +69,14 @@ char *token_start; static int state_before_str_start; static int state_before_str_stop; -struct _yy_buffer +/* + * State for handling include files and macro expansion. We use a new + * flex input buffer for each level of include or macro, and create a + * struct _yy_buffer to remember the previous level. There is not a struct + * for the currently active input source; that state is kept in the global + * variables YY_CURRENT_BUFFER, yylineno, and input_filename. + */ +static struct _yy_buffer { YY_BUFFER_STATE buffer; long lineno; @@ -77,8 +84,6 @@ struct _yy_buffer struct _yy_buffer *next; } *yy_buffer = NULL; -static char *old; - /* * Vars for handling ifdef/elif/endif constructs. preproc_tos is the current * nesting depth of such constructs, and stacked_if_value[preproc_tos] is the @@ -444,6 +449,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ %{ /* code to execute during start of each call of yylex() */ + char *newdefsymbol = NULL; + token_start = NULL; %} @@ -1010,6 +1017,7 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ } {identifier} { + /* First check to see if it's a define symbol to expand */ if (!isdefine()) { int kwvalue; @@ -1198,17 +1206,23 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ yytext[i+1] = '\0'; - for (ptr = defines; ptr != NULL; ptr2 = ptr, ptr = ptr->next) + /* Find and unset any matching define; should be only 1 */ + for (ptr = defines; ptr; ptr2 = ptr, ptr = ptr->next) { - if (strcmp(yytext, ptr->olddef) == 0) + if (strcmp(yytext, ptr->name) == 0) { - if (ptr2 == NULL) - defines = ptr->next; - else - ptr2->next = ptr->next; - free(ptr->newdef); - free(ptr->olddef); - free(ptr); + free(ptr->value); + ptr->value = NULL; + /* We cannot forget it if there's a cmdvalue */ + if (ptr->cmdvalue == NULL) + { + if (ptr2 == NULL) + defines = ptr->next; + else + ptr2->next = ptr->next; + free(ptr->name); + free(ptr); + } break; } } @@ -1413,11 +1427,17 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ ; yytext[i+1] = '\0'; - for (defptr = defines; - defptr != NULL && - strcmp(yytext, defptr->olddef) != 0; - defptr = defptr->next) - /* skip */ ; + /* Does a definition exist? */ + for (defptr = defines; defptr; defptr = defptr->next) + { + if (strcmp(yytext, defptr->name) == 0) + { + /* Found it, but is it currently undefined? */ + if (defptr->value == NULL) + defptr = NULL; /* pretend it's not found */ + break; + } + } this_active = (defptr ? ifcond : !ifcond); stacked_if_value[preproc_tos].active = @@ -1438,7 +1458,7 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ yyterminate(); } {identifier} { - old = mm_strdup(yytext); + newdefsymbol = mm_strdup(yytext); BEGIN(def); startlit(); } @@ -1447,26 +1467,31 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ yyterminate(); } {space}*";" { - struct _defines *ptr, *this; + struct _defines *ptr; + /* Does it already exist? */ for (ptr = defines; ptr != NULL; ptr = ptr->next) { - if (strcmp(old, ptr->olddef) == 0) - { - free(ptr->newdef); - ptr->newdef = mm_strdup(literalbuf); - } + if (strcmp(newdefsymbol, ptr->name) == 0) + { + free(ptr->value); + ptr->value = mm_strdup(literalbuf); + /* Don't leak newdefsymbol */ + free(newdefsymbol); + break; + } } if (ptr == NULL) { - this = (struct _defines *) mm_alloc(sizeof(struct _defines)); + /* Not present, make a new entry */ + ptr = (struct _defines *) mm_alloc(sizeof(struct _defines)); - /* initial definition */ - this->olddef = old; - this->newdef = mm_strdup(literalbuf); - this->next = defines; - this->used = NULL; - defines = this; + ptr->name = newdefsymbol; + ptr->value = mm_strdup(literalbuf); + ptr->cmdvalue = NULL; + ptr->used = NULL; + ptr->next = defines; + defines = ptr; } BEGIN(C); @@ -1483,6 +1508,7 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ <> { if (yy_buffer == NULL) { + /* No more input */ if (preproc_tos > 0) { preproc_tos = 0; @@ -1492,16 +1518,20 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ } else { + /* Revert to previous input source */ struct _yy_buffer *yb = yy_buffer; int i; struct _defines *ptr; + /* Check to see if we are exiting a macro value */ for (ptr = defines; ptr; ptr = ptr->next) + { if (ptr->used == yy_buffer) { ptr->used = NULL; - break; + break; /* there can't be multiple matches */ } + } if (yyin != NULL) fclose(yyin); @@ -1727,15 +1757,24 @@ ecpg_isspace(char ch) return false; } -static bool isdefine(void) +/* + * If yytext matches a define symbol, begin scanning the symbol's value + * and return true + */ +static bool +isdefine(void) { struct _defines *ptr; /* is it a define? */ for (ptr = defines; ptr; ptr = ptr->next) { - if (strcmp(yytext, ptr->olddef) == 0 && ptr->used == NULL) + /* notice we do not match anything being actively expanded */ + if (strcmp(yytext, ptr->name) == 0 && + ptr->value != NULL && + ptr->used == NULL) { + /* Save state associated with the current buffer */ struct _yy_buffer *yb; yb = mm_alloc(sizeof(struct _yy_buffer)); @@ -1744,10 +1783,17 @@ static bool isdefine(void) yb->lineno = yylineno; yb->filename = mm_strdup(input_filename); yb->next = yy_buffer; + yy_buffer = yb; - ptr->used = yy_buffer = yb; + /* Mark symbol as being actively expanded */ + ptr->used = yb; - yy_scan_string(ptr->newdef); + /* + * We use yy_scan_string which will copy the value, so there's + * no need to worry about a possible undef happening while we + * are still scanning it. + */ + yy_scan_string(ptr->value); return true; } } @@ -1755,7 +1801,12 @@ static bool isdefine(void) return false; } -static bool isinformixdefine(void) +/* + * Handle replacement of INFORMIX built-in defines. This works just + * like isdefine() except for the source of the string to scan. + */ +static bool +isinformixdefine(void) { const char *new = NULL; diff --git a/src/interfaces/ecpg/preproc/type.h b/src/interfaces/ecpg/preproc/type.h index 5935cd7473..ce2124361f 100644 --- a/src/interfaces/ecpg/preproc/type.h +++ b/src/interfaces/ecpg/preproc/type.h @@ -163,13 +163,25 @@ struct typedefs struct typedefs *next; }; +/* + * Info about a defined symbol (macro), coming from a -D command line switch + * or a define command in the program. These are stored in a simple list. + * Because ecpg supports compiling multiple files per run, we have to remember + * the command-line definitions and be able to revert to those; this motivates + * storing cmdvalue separately from value. + * name and value are separately-malloc'd strings; cmdvalue typically isn't. + * used is NULL unless we are currently expanding the macro, in which case + * it points to the buffer before the one scanning the macro; we reset it + * to NULL upon returning to that buffer. This is used to prevent recursive + * expansion of the macro. + */ struct _defines { - char *olddef; - char *newdef; - int pertinent; - void *used; - struct _defines *next; + char *name; /* symbol's name */ + char *value; /* current value, or NULL if undefined */ + const char *cmdvalue; /* value set on command line, or NULL */ + void *used; /* buffer pointer, or NULL */ + struct _defines *next; /* list link */ }; /* This is a linked list of the variable names and types. */ diff --git a/src/interfaces/ecpg/test/expected/sql-define.c b/src/interfaces/ecpg/test/expected/sql-define.c index 29583ecd74..e97caec5b0 100644 --- a/src/interfaces/ecpg/test/expected/sql-define.c +++ b/src/interfaces/ecpg/test/expected/sql-define.c @@ -6,6 +6,21 @@ /* End of automatic include section */ #define ECPGdebug(X,Y) ECPGdebug((X)+100,(Y)) +#line 1 "define_prelim.pgc" +/* + * Test that the effects of these commands don't carry over to the next + * file named on the ecpg command line. + */ + + +/* Processed by ecpg (regression mode) */ +/* These include files are added by the preprocessor */ +#include +#include +#include +/* End of automatic include section */ +#define ECPGdebug(X,Y) ECPGdebug((X)+100,(Y)) + #line 1 "define.pgc" #line 1 "sqlca.h" @@ -195,11 +210,57 @@ if (sqlca.sqlcode < 0) sqlprint ( );} - { ECPGdisconnect(__LINE__, "CURRENT"); -#line 56 "define.pgc" + /* test handling of a macro defined on the command line */ + { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "select 123", ECPGt_EOIT, + ECPGt_int,&(i),(long)1,(long)1,sizeof(int), + ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT); +#line 57 "define.pgc" if (sqlca.sqlcode < 0) sqlprint ( );} -#line 56 "define.pgc" +#line 57 "define.pgc" + + printf("original CMDLINESYM: %d\n", i); + + + + { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "select 42", ECPGt_EOIT, + ECPGt_int,&(i),(long)1,(long)1,sizeof(int), + ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT); +#line 62 "define.pgc" + +if (sqlca.sqlcode < 0) sqlprint ( );} +#line 62 "define.pgc" + + printf("redefined CMDLINESYM: %d\n", i); + + + + { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "select 43", ECPGt_EOIT, + ECPGt_int,&(i),(long)1,(long)1,sizeof(int), + ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT); +#line 67 "define.pgc" + +if (sqlca.sqlcode < 0) sqlprint ( );} +#line 67 "define.pgc" + + printf("redefined CMDLINESYM: %d\n", i); + + + + + + + + /* this macro should not have carried over from define_prelim.pgc */ + + + + + { ECPGdisconnect(__LINE__, "CURRENT"); +#line 81 "define.pgc" + +if (sqlca.sqlcode < 0) sqlprint ( );} +#line 81 "define.pgc" return 0; } diff --git a/src/interfaces/ecpg/test/expected/sql-define.stderr b/src/interfaces/ecpg/test/expected/sql-define.stderr index 20601b63cf..c4da9927e8 100644 --- a/src/interfaces/ecpg/test/expected/sql-define.stderr +++ b/src/interfaces/ecpg/test/expected/sql-define.stderr @@ -48,5 +48,29 @@ [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_process_output on line 53: OK: SET [NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_execute on line 57: query: select 123; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_execute on line 57: using PQexec +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_process_output on line 57: correctly got 1 tuples with 1 fields +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_get_data on line 57: RESULT: 123 offset: -1; array: no +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_execute on line 62: query: select 42; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_execute on line 62: using PQexec +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_process_output on line 62: correctly got 1 tuples with 1 fields +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_get_data on line 62: RESULT: 42 offset: -1; array: no +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_execute on line 67: query: select 43; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_execute on line 67: using PQexec +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_process_output on line 67: correctly got 1 tuples with 1 fields +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_get_data on line 67: RESULT: 43 offset: -1; array: no +[NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_finish: connection ecpg1_regression closed [NO_PID]: sqlca: code: 0, state: 00000 diff --git a/src/interfaces/ecpg/test/expected/sql-define.stdout b/src/interfaces/ecpg/test/expected/sql-define.stdout index 864cd850bf..eaddc7f8c3 100644 --- a/src/interfaces/ecpg/test/expected/sql-define.stdout +++ b/src/interfaces/ecpg/test/expected/sql-define.stdout @@ -1 +1,4 @@ i: 1, s: 29-abcdef +original CMDLINESYM: 123 +redefined CMDLINESYM: 42 +redefined CMDLINESYM: 43 diff --git a/src/interfaces/ecpg/test/sql/Makefile b/src/interfaces/ecpg/test/sql/Makefile index 7f032659b9..3ff190a523 100644 --- a/src/interfaces/ecpg/test/sql/Makefile +++ b/src/interfaces/ecpg/test/sql/Makefile @@ -34,5 +34,8 @@ TESTS = array array.c \ all: $(TESTS) +define.c: define.pgc define_prelim.pgc $(ECPG_TEST_DEPENDENCIES) + $(ECPG) -DCMDLINESYM=123 -o $@ $(srcdir)/define_prelim.pgc $< + oldexec.c: oldexec.pgc $(ECPG_TEST_DEPENDENCIES) $(ECPG) -r questionmarks -o $@ $< diff --git a/src/interfaces/ecpg/test/sql/define.pgc b/src/interfaces/ecpg/test/sql/define.pgc index ed58a4bde0..83f328df46 100644 --- a/src/interfaces/ecpg/test/sql/define.pgc +++ b/src/interfaces/ecpg/test/sql/define.pgc @@ -53,6 +53,31 @@ int main(void) exec sql SET TIMEZONE TO TZVAR; exec sql endif; + /* test handling of a macro defined on the command line */ + exec sql select CMDLINESYM INTO :i; + printf("original CMDLINESYM: %d\n", i); + + exec sql define CMDLINESYM 42; + + exec sql select CMDLINESYM INTO :i; + printf("redefined CMDLINESYM: %d\n", i); + + exec sql define CMDLINESYM 43; + + exec sql select CMDLINESYM INTO :i; + printf("redefined CMDLINESYM: %d\n", i); + + exec sql undef CMDLINESYM; + + exec sql ifdef CMDLINESYM; + exec sql insert into test values (NUMBER, 'no string'); + exec sql endif; + + /* this macro should not have carried over from define_prelim.pgc */ + exec sql ifdef NONCMDLINESYM; + exec sql insert into test values (NUMBER, 'no string'); + exec sql endif; + exec sql disconnect; return 0; } diff --git a/src/interfaces/ecpg/test/sql/define_prelim.pgc b/src/interfaces/ecpg/test/sql/define_prelim.pgc new file mode 100644 index 0000000000..7a984f74c8 --- /dev/null +++ b/src/interfaces/ecpg/test/sql/define_prelim.pgc @@ -0,0 +1,6 @@ +/* + * Test that the effects of these commands don't carry over to the next + * file named on the ecpg command line. + */ +exec sql define CMDLINESYM 999; +exec sql define NONCMDLINESYM 1234; diff --git a/src/interfaces/ecpg/test/sql/meson.build b/src/interfaces/ecpg/test/sql/meson.build index 88a3acb9af..4da6e19f5f 100644 --- a/src/interfaces/ecpg/test/sql/meson.build +++ b/src/interfaces/ecpg/test/sql/meson.build @@ -31,6 +31,7 @@ pgc_files = [ ] pgc_extra_flags = { + 'define': ['-DCMDLINESYM=123', files('define_prelim.pgc')], 'oldexec': ['-r', 'questionmarks'], }