diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index 7936f3a580..c8f5f2c0e9 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -1,23 +1,22 @@ /*------------------------------------------------------------------------- * * keywords.c - * lexical token lookup for reserved words in postgres SQL + * lexical token lookup for reserved words in PostgreSQL * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.88 2001/01/24 19:43:01 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.89 2001/02/21 18:53:46 tgl Exp $ * *------------------------------------------------------------------------- */ -#include - #include "postgres.h" +#include + #include "nodes/parsenodes.h" -#include "nodes/pg_list.h" #include "parser/keywords.h" #include "parser/parse.h" @@ -286,18 +285,62 @@ static ScanKeyword ScanKeywords[] = { {"zone", ZONE}, }; +/* + * ScanKeywordLookup - see if a given word is a keyword + * + * Returns a pointer to the ScanKeyword table entry, or NULL if no match. + * + * The match is done case-insensitively. Note that we deliberately use a + * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', + * even if we are in a locale where tolower() would produce more or different + * translations. This is to conform to the SQL99 spec, which says that + * keywords are to be matched in this way even though non-keyword identifiers + * receive a different case-normalization mapping. + */ ScanKeyword * ScanKeywordLookup(char *text) { - ScanKeyword *low = &ScanKeywords[0]; - ScanKeyword *high = endof(ScanKeywords) - 1; - ScanKeyword *middle; - int difference; + int len, + i; + char word[NAMEDATALEN]; + ScanKeyword *low; + ScanKeyword *high; + len = strlen(text); + /* We assume all keywords are shorter than NAMEDATALEN. */ + if (len >= NAMEDATALEN) + return NULL; + + /* + * Apply an ASCII-only downcasing. We must not use tolower() since + * it may produce the wrong translation in some locales (eg, Turkish), + * and we don't trust isupper() very much either. In an ASCII-based + * encoding the tests against A and Z are sufficient, but we also check + * isupper() so that we will work correctly under EBCDIC. The actual + * case conversion step should work for either ASCII or EBCDIC. + */ + for (i = 0; i < len; i++) + { + char ch = text[i]; + + if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch)) + ch += 'a' - 'A'; + word[i] = ch; + } + word[len] = '\0'; + + /* + * Now do a binary search using plain strcmp() comparison. + */ + low = &ScanKeywords[0]; + high = endof(ScanKeywords) - 1; while (low <= high) { + ScanKeyword *middle; + int difference; + middle = low + (high - low) / 2; - difference = strcmp(middle->name, text); + difference = strcmp(middle->name, word); if (difference == 0) return middle; else if (difference < 0) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index f0f4626b95..f913584c1a 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -2,14 +2,14 @@ /*------------------------------------------------------------------------- * * scan.l - * lexical scanner for POSTGRES + * lexical scanner for PostgreSQL * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.86 2001/02/03 20:13:05 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.87 2001/02/21 18:53:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -477,12 +477,27 @@ other . {identifier} { - int i; - ScanKeyword *keyword; + ScanKeyword *keyword; + int i; - for(i = 0; yytext[i]; i++) + /* Is it a keyword? */ + keyword = ScanKeywordLookup((char*) yytext); + if (keyword != NULL) + return keyword->value; + + /* + * No. Convert the identifier to lower case, and truncate + * if necessary. + * + * Note: here we use a locale-dependent case conversion, + * which seems appropriate under SQL99 rules, whereas + * the keyword comparison was NOT locale-dependent. + */ + for (i = 0; yytext[i]; i++) + { if (isupper((unsigned char) yytext[i])) yytext[i] = tolower((unsigned char) yytext[i]); + } if (i >= NAMEDATALEN) { #ifdef MULTIBYTE @@ -497,15 +512,8 @@ other . yytext[NAMEDATALEN-1] = '\0'; #endif } - keyword = ScanKeywordLookup((char*)yytext); - if (keyword != NULL) { - return keyword->value; - } - else - { - yylval.str = pstrdup((char*)yytext); - return IDENT; - } + yylval.str = pstrdup((char*) yytext); + return IDENT; } {other} { return yytext[0]; } diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 872b607e87..2dd460a442 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -3,7 +3,7 @@ * back to source text * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.72 2001/02/14 21:35:05 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.73 2001/02/21 18:53:47 tgl Exp $ * * This software is copyrighted by Jan Wieck - Hamburg. * @@ -2563,8 +2563,8 @@ quote_identifier(char *ident) * but the parser doesn't provide any easy way to test for whether * an identifier is safe or not... so be safe not sorry. * - * Note: ScanKeywordLookup() expects an all-lower-case input, but - * we've already checked we have that. + * Note: ScanKeywordLookup() does case-insensitive comparison, + * but that's fine, since we already know we have all-lower-case. */ if (ScanKeywordLookup(ident) != NULL) safe = false; diff --git a/src/interfaces/ecpg/preproc/ecpg_keywords.c b/src/interfaces/ecpg/preproc/ecpg_keywords.c index 740b7d9cd3..c65730d9a3 100644 --- a/src/interfaces/ecpg/preproc/ecpg_keywords.c +++ b/src/interfaces/ecpg/preproc/ecpg_keywords.c @@ -1,8 +1,11 @@ /*------------------------------------------------------------------------- * - * keywords.c + * ecpg_keywords.c * lexical token lookup for reserved words in postgres embedded SQL * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/ecpg_keywords.c,v 1.22 2001/02/21 18:53:47 tgl Exp $ + * *------------------------------------------------------------------------- */ #include "postgres_fe.h" @@ -12,6 +15,7 @@ #include "extern.h" #include "preproc.h" + /* * List of (keyword-name, keyword-token-value) pairs. * @@ -73,18 +77,62 @@ static ScanKeyword ScanKeywords[] = { {"whenever", SQL_WHENEVER}, }; +/* + * ScanECPGKeywordLookup - see if a given word is a keyword + * + * Returns a pointer to the ScanKeyword table entry, or NULL if no match. + * + * The match is done case-insensitively. Note that we deliberately use a + * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', + * even if we are in a locale where tolower() would produce more or different + * translations. This is to conform to the SQL99 spec, which says that + * keywords are to be matched in this way even though non-keyword identifiers + * receive a different case-normalization mapping. + */ ScanKeyword * ScanECPGKeywordLookup(char *text) { - ScanKeyword *low = &ScanKeywords[0]; - ScanKeyword *high = endof(ScanKeywords) - 1; - ScanKeyword *middle; - int difference; + int len, + i; + char word[NAMEDATALEN]; + ScanKeyword *low; + ScanKeyword *high; + len = strlen(text); + /* We assume all keywords are shorter than NAMEDATALEN. */ + if (len >= NAMEDATALEN) + return NULL; + + /* + * Apply an ASCII-only downcasing. We must not use tolower() since + * it may produce the wrong translation in some locales (eg, Turkish), + * and we don't trust isupper() very much either. In an ASCII-based + * encoding the tests against A and Z are sufficient, but we also check + * isupper() so that we will work correctly under EBCDIC. The actual + * case conversion step should work for either ASCII or EBCDIC. + */ + for (i = 0; i < len; i++) + { + char ch = text[i]; + + if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch)) + ch += 'a' - 'A'; + word[i] = ch; + } + word[len] = '\0'; + + /* + * Now do a binary search using plain strcmp() comparison. + */ + low = &ScanKeywords[0]; + high = endof(ScanKeywords) - 1; while (low <= high) { + ScanKeyword *middle; + int difference; + middle = low + (high - low) / 2; - difference = strcmp(middle->name, text); + difference = strcmp(middle->name, word); if (difference == 0) return middle; else if (difference < 0) diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c index ed7c418d73..2decc2b853 100644 --- a/src/interfaces/ecpg/preproc/keywords.c +++ b/src/interfaces/ecpg/preproc/keywords.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * * keywords.c - * lexical token lookup for reserved words in postgres SQL + * lexical token lookup for reserved words in PostgreSQL * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.37 2001/02/10 02:31:29 tgl Exp $ + * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.38 2001/02/21 18:53:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ #include "extern.h" #include "preproc.h" + /* * List of (keyword-name, keyword-token-value) pairs. * @@ -36,7 +37,7 @@ static ScanKeyword ScanKeywords[] = { {"aggregate", AGGREGATE}, {"all", ALL}, {"alter", ALTER}, - {"analyse", ANALYSE}, + {"analyse", ANALYSE}, /* British spelling */ {"analyze", ANALYZE}, {"and", AND}, {"any", ANY}, @@ -58,7 +59,7 @@ static ScanKeyword ScanKeywords[] = { {"chain", CHAIN}, {"char", CHAR}, {"character", CHARACTER}, - {"characteristics", CHARACTERISTICS}, + {"characteristics", CHARACTERISTICS}, {"check", CHECK}, {"checkpoint", CHECKPOINT}, {"close", CLOSE}, @@ -133,7 +134,7 @@ static ScanKeyword ScanKeywords[] = { {"inherits", INHERITS}, {"initially", INITIALLY}, {"inner", INNER_P}, - {"inout", INOUT}, + {"inout", INOUT}, {"insensitive", INSENSITIVE}, {"insert", INSERT}, {"instead", INSTEAD}, @@ -182,7 +183,7 @@ static ScanKeyword ScanKeywords[] = { {"nullif", NULLIF}, {"numeric", NUMERIC}, {"of", OF}, - {"off", OFF}, + {"off", OFF}, {"offset", OFFSET}, {"oids", OIDS}, {"old", OLD}, @@ -192,13 +193,13 @@ static ScanKeyword ScanKeywords[] = { {"option", OPTION}, {"or", OR}, {"order", ORDER}, - {"out", OUT}, + {"out", OUT}, {"outer", OUTER_P}, {"overlaps", OVERLAPS}, {"owner", OWNER}, {"partial", PARTIAL}, {"password", PASSWORD}, - {"path", PATH_P}, + {"path", PATH_P}, {"pendant", PENDANT}, {"position", POSITION}, {"precision", PRECISION}, @@ -221,14 +222,14 @@ static ScanKeyword ScanKeywords[] = { {"rollback", ROLLBACK}, {"row", ROW}, {"rule", RULE}, - {"schema", SCHEMA}, + {"schema", SCHEMA}, {"scroll", SCROLL}, {"second", SECOND_P}, {"select", SELECT}, {"sequence", SEQUENCE}, {"serial", SERIAL}, {"serializable", SERIALIZABLE}, - {"session", SESSION}, + {"session", SESSION}, {"session_user", SESSION_USER}, {"set", SET}, {"setof", SETOF}, @@ -251,7 +252,7 @@ static ScanKeyword ScanKeywords[] = { {"timezone_hour", TIMEZONE_HOUR}, {"timezone_minute", TIMEZONE_MINUTE}, {"to", TO}, - {"toast", TOAST}, + {"toast", TOAST}, {"trailing", TRAILING}, {"transaction", TRANSACTION}, {"trigger", TRIGGER}, @@ -284,18 +285,62 @@ static ScanKeyword ScanKeywords[] = { {"zone", ZONE}, }; +/* + * ScanKeywordLookup - see if a given word is a keyword + * + * Returns a pointer to the ScanKeyword table entry, or NULL if no match. + * + * The match is done case-insensitively. Note that we deliberately use a + * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', + * even if we are in a locale where tolower() would produce more or different + * translations. This is to conform to the SQL99 spec, which says that + * keywords are to be matched in this way even though non-keyword identifiers + * receive a different case-normalization mapping. + */ ScanKeyword * ScanKeywordLookup(char *text) { - ScanKeyword *low = &ScanKeywords[0]; - ScanKeyword *high = endof(ScanKeywords) - 1; - ScanKeyword *middle; - int difference; + int len, + i; + char word[NAMEDATALEN]; + ScanKeyword *low; + ScanKeyword *high; + len = strlen(text); + /* We assume all keywords are shorter than NAMEDATALEN. */ + if (len >= NAMEDATALEN) + return NULL; + + /* + * Apply an ASCII-only downcasing. We must not use tolower() since + * it may produce the wrong translation in some locales (eg, Turkish), + * and we don't trust isupper() very much either. In an ASCII-based + * encoding the tests against A and Z are sufficient, but we also check + * isupper() so that we will work correctly under EBCDIC. The actual + * case conversion step should work for either ASCII or EBCDIC. + */ + for (i = 0; i < len; i++) + { + char ch = text[i]; + + if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch)) + ch += 'a' - 'A'; + word[i] = ch; + } + word[len] = '\0'; + + /* + * Now do a binary search using plain strcmp() comparison. + */ + low = &ScanKeywords[0]; + high = endof(ScanKeywords) - 1; while (low <= high) { + ScanKeyword *middle; + int difference; + middle = low + (high - low) / 2; - difference = strcmp(middle->name, text); + difference = strcmp(middle->name, word); if (difference == 0) return middle; else if (difference < 0) diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index 5f4be6e4be..e8896e3cc6 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.76 2001/02/10 02:31:29 tgl Exp $ + * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.77 2001/02/21 18:53:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -527,74 +527,53 @@ cppline {space}*#(.*\\{line_end})*.* return(CVARIABLE); } {identifier} { - int i; - ScanKeyword *keyword; - char lower_text[NAMEDATALEN]; + ScanKeyword *keyword; + struct _defines *ptr; - /* this should leave the last byte set to '\0' */ - strncpy(lower_text, yytext, NAMEDATALEN-1); - for(i = 0; lower_text[i]; i++) - if (isupper((unsigned char) lower_text[i])) - lower_text[i] = tolower((unsigned char) lower_text[i]); - - if (i >= NAMEDATALEN) - { -#ifdef MULTIBYTE_NOTUSED - int len; - - len = pg_mbcliplen(lower_text,strlen(lower_text),NAMEDATALEN-1); - sprintf(errortext, "identifier \"%s\" will be truncated to \"%.*s\"", - lower_text, len, lower_text); - lower_text[len] = '\0'; -#else - sprintf(errortext, "identifier \"%s\" will be truncated to \"%.*s\"", - lower_text, NAMEDATALEN-1, lower_text); - lower_text[NAMEDATALEN-1] = '\0'; -#endif - mmerror(ET_NOTICE, errortext); - yytext[NAMEDATALEN-1] = '\0'; - } - - keyword = ScanKeywordLookup((char*)lower_text); - if (keyword != NULL) { + /* Is it an SQL keyword? */ + keyword = ScanKeywordLookup((char*) yytext); + if (keyword != NULL) return keyword->value; - } - else + + /* Is it an ECPG keyword? */ + keyword = ScanECPGKeywordLookup((char*) yytext); + if (keyword != NULL) + return keyword->value; + + /* How about a DEFINE? */ + for (ptr = defines; ptr; ptr = ptr->next) { - keyword = ScanECPGKeywordLookup((char*)lower_text); - if (keyword != NULL) { - return keyword->value; - } - else + if (strcmp(yytext, ptr->old) == 0) { - struct _defines *ptr; + struct _yy_buffer *yb; - for (ptr = defines; ptr; ptr = ptr->next) - { - if (strcmp(yytext, ptr->old) == 0) - { - struct _yy_buffer *yb; + yb = mm_alloc(sizeof(struct _yy_buffer)); - yb = mm_alloc(sizeof(struct _yy_buffer)); + yb->buffer = YY_CURRENT_BUFFER; + yb->lineno = yylineno; + yb->filename = mm_strdup(input_filename); + yb->next = yy_buffer; - yb->buffer = YY_CURRENT_BUFFER; - yb->lineno = yylineno; - yb->filename = mm_strdup(input_filename); - yb->next = yy_buffer; + yy_buffer = yb; - yy_buffer = yb; - - yy_scan_string(ptr->new); - break; - } - } - if (ptr == NULL) - { - yylval.str = mm_strdup((char*)yytext); - return IDENT; - } + yy_scan_string(ptr->new); + break; } } + + /* + * None of the above. Return it as an identifier. + * + * The backend would attempt to truncate and case-fold + * the identifier, but I see no good reason for ecpg + * to do so; that's just another way that ecpg could get + * out of step with the backend. + */ + if (ptr == NULL) + { + yylval.str = mm_strdup((char*) yytext); + return IDENT; + } } {other} { return yytext[0]; } {exec_sql} { BEGIN SQL; return SQL_START; }