Tweak the core scanner so that it can be used by plpgsql too.

Changes:

Pass in the keyword lookup array instead of having it be hardwired.
(This incidentally allows elimination of some duplicate coding in ecpg.)

Re-order the token declarations in gram.y so that non-keyword tokens have
numbers that won't change when keywords are added or removed.

Add ".." and ":=" to the set of tokens recognized by scan.l.  (Since these
combinations are nowhere legal in core SQL, this does not change anything
except the precise wording of the error you get when you write this.)
This commit is contained in:
Tom Lane 2009-07-14 20:24:10 +00:00
parent 0d4899e448
commit 1aa58d3a83
15 changed files with 139 additions and 116 deletions

View File

@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.668 2009/07/13 02:02:20 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.669 2009/07/14 20:24:10 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@ -421,10 +421,23 @@ static TypeName *TableFuncTypeName(List *columns);
/*
* If you make any token changes, update the keyword table in
* src/include/parser/kwlist.h and add new keywords to the appropriate one of
* the reserved-or-not-so-reserved keyword lists, below; search
* this file for "Name classification hierarchy".
* Non-keyword token types. These are hard-wired into the "flex" lexer.
* They must be listed first so that their numeric codes do not depend on
* the set of keywords. PL/pgsql depends on this so that it can share the
* same lexer. If you add/change tokens here, fix PL/pgsql to match!
*
* DOT_DOT and COLON_EQUALS are unused in the core SQL grammar, and so will
* always provoke parse errors. They are needed by PL/pgsql.
*/
%token <str> IDENT FCONST SCONST BCONST XCONST Op
%token <ival> ICONST PARAM
%token TYPECAST DOT_DOT COLON_EQUALS
/*
* If you want to make any keyword changes, update the keyword table in
* src/include/parser/kwlist.h and add new keywords to the appropriate one
* of the reserved-or-not-so-reserved keyword lists, below; search
* this file for "Keyword category lists".
*/
/* ordinary key words in alphabetical order */
@ -515,17 +528,15 @@ static TypeName *TableFuncTypeName(List *columns);
ZONE
/* The grammar thinks these are keywords, but they are not in the kwlist.h
/*
* The grammar thinks these are keywords, but they are not in the kwlist.h
* list and so can never be entered directly. The filter in parser.c
* creates these tokens when required.
*/
%token NULLS_FIRST NULLS_LAST WITH_TIME
/* Special token types, not actually keywords - see the "lex" file */
%token <str> IDENT FCONST SCONST BCONST XCONST Op
%token <ival> ICONST PARAM
/* precedence: lowest to highest */
/* Precedence: lowest to highest */
%nonassoc SET /* see relation_expr_opt_alias */
%left UNION EXCEPT
%left INTERSECT

View File

@ -9,14 +9,13 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.213 2009/07/12 17:12:33 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.214 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "parser/gramparse.h"
#include "parser/keywords.h"
#define PG_KEYWORD(a,b,c) {a,b,c},
@ -25,5 +24,4 @@ const ScanKeyword ScanKeywords[] = {
#include "parser/kwlist.h"
};
/* End of ScanKeywords, for use in kwlookup.c and elsewhere */
const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
const int NumScanKeywords = lengthof(ScanKeywords);

View File

@ -6,15 +6,12 @@
* NB - this file is also used by ECPG and several frontend programs in
* src/bin/ including pg_dump and psql
*
* Note that this file expects that the ScanKeywords array is defined
* and that LastScanKeyword points to its element one past the last.
*
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/kwlookup.c,v 2.2 2009/03/08 16:53:30 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/parser/kwlookup.c,v 2.3 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -39,7 +36,9 @@
* receive a different case-normalization mapping.
*/
const ScanKeyword *
ScanKeywordLookup(const char *text)
ScanKeywordLookup(const char *text,
const ScanKeyword *keywords,
int num_keywords)
{
int len,
i;
@ -69,8 +68,8 @@ ScanKeywordLookup(const char *text)
/*
* Now do a binary search using plain strcmp() comparison.
*/
low = &ScanKeywords[0];
high = LastScanKeyword - 1;
low = keywords;
high = keywords + (num_keywords - 1);
while (low <= high)
{
const ScanKeyword *middle;

View File

@ -14,7 +14,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.80 2009/07/13 02:02:20 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.81 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -39,7 +39,7 @@ raw_parser(const char *str)
int yyresult;
/* initialize the flex scanner */
yyscanner = scanner_init(str, &yyextra);
yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords);
/* filtered_base_yylex() only needs this much initialization */
yyextra.have_lookahead = false;
@ -79,7 +79,7 @@ pg_parse_string_token(const char *token)
YYSTYPE yylval;
YYLTYPE yylloc;
yyscanner = scanner_init(token, &yyextra);
yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords);
ctoken = base_yylex(&yylval, &yylloc, yyscanner);

View File

@ -24,7 +24,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.156 2009/07/13 03:11:12 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.157 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -304,6 +304,10 @@ identifier {ident_start}{ident_cont}*
typecast "::"
/* these two token types are used by PL/pgsql, though not in core SQL */
dot_dot \.\.
colon_equals ":="
/*
* "self" is the set of chars that should be returned as single-character
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
@ -450,11 +454,21 @@ other .
SET_YYLLOC();
yyless(1); /* eat only 'n' this time */
/* nchar had better be a keyword! */
keyword = ScanKeywordLookup("nchar");
Assert(keyword != NULL);
yylval->keyword = keyword->name;
return keyword->value;
keyword = ScanKeywordLookup("nchar",
yyextra->keywords,
yyextra->num_keywords);
if (keyword != NULL)
{
yylval->keyword = keyword->name;
return keyword->value;
}
else
{
/* If NCHAR isn't a keyword, just return "n" */
yylval->str = pstrdup("n");
return IDENT;
}
}
{xqstart} {
@ -680,6 +694,16 @@ other .
return TYPECAST;
}
{dot_dot} {
SET_YYLLOC();
return DOT_DOT;
}
{colon_equals} {
SET_YYLLOC();
return COLON_EQUALS;
}
{self} {
SET_YYLLOC();
return yytext[0];
@ -830,7 +854,9 @@ other .
SET_YYLLOC();
/* Is it a keyword? */
keyword = ScanKeywordLookup(yytext);
keyword = ScanKeywordLookup(yytext,
yyextra->keywords,
yyextra->num_keywords);
if (keyword != NULL)
{
yylval->keyword = keyword->name;
@ -939,7 +965,10 @@ scanner_yyerror(const char *message, base_yyscan_t yyscanner)
* Called before any actual parsing is done
*/
base_yyscan_t
scanner_init(const char *str, base_yy_extra_type *yyext)
scanner_init(const char *str,
base_yy_extra_type *yyext,
const ScanKeyword *keywords,
int num_keywords)
{
Size slen = strlen(str);
yyscan_t scanner;
@ -949,6 +978,9 @@ scanner_init(const char *str, base_yy_extra_type *yyext)
base_yyset_extra(yyext, scanner);
yyext->keywords = keywords;
yyext->num_keywords = num_keywords;
/*
* Make a scan buffer with special termination needed by flex.
*/

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/misc.c,v 1.71 2009/06/11 14:49:03 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/misc.c,v 1.72 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -334,7 +334,7 @@ pg_get_keywords(PG_FUNCTION_ARGS)
funcctx = SRF_PERCALL_SETUP();
if (&ScanKeywords[funcctx->call_cntr] < LastScanKeyword)
if (funcctx->call_cntr < NumScanKeywords)
{
char *values[3];
HeapTuple tuple;

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.301 2009/07/12 17:12:34 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.302 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -6219,7 +6219,9 @@ quote_identifier(const char *ident)
* Note: ScanKeywordLookup() does case-insensitive comparison, but
* that's fine, since we already know we have all-lower-case.
*/
const ScanKeyword *keyword = ScanKeywordLookup(ident);
const ScanKeyword *keyword = ScanKeywordLookup(ident,
ScanKeywords,
NumScanKeywords);
if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
safe = false;

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/bin/pg_dump/dumputils.c,v 1.46 2009/06/11 14:49:07 momjian Exp $
* $PostgreSQL: pgsql/src/bin/pg_dump/dumputils.c,v 1.47 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -130,7 +130,9 @@ fmtId(const char *rawid)
* Note: ScanKeywordLookup() does case-insensitive comparison, but
* that's fine, since we already know we have all-lower-case.
*/
const ScanKeyword *keyword = ScanKeywordLookup(rawid);
const ScanKeyword *keyword = ScanKeywordLookup(rawid,
ScanKeywords,
NumScanKeywords);
if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
need_quotes = true;

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/bin/pg_dump/keywords.c,v 1.3 2009/06/11 14:49:07 momjian Exp $
* $PostgreSQL: pgsql/src/bin/pg_dump/keywords.c,v 1.4 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -27,5 +27,4 @@ const ScanKeyword ScanKeywords[] = {
#include "parser/kwlist.h"
};
/* End of ScanKeywords, for use in kwlookup.c */
const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
const int NumScanKeywords = lengthof(ScanKeywords);

View File

@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.46 2009/07/13 02:02:20 tgl Exp $
* $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.47 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -20,6 +20,7 @@
#define GRAMPARSE_H
#include "nodes/parsenodes.h"
#include "parser/keywords.h"
/*
* We track token locations in terms of byte offsets from the start of the
@ -49,6 +50,12 @@ typedef struct base_yy_extra_type
char *scanbuf;
Size scanbuflen;
/*
* The keyword list to use.
*/
const ScanKeyword *keywords;
int num_keywords;
/*
* literalbuf is used to accumulate literal values when multiple rules
* are needed to parse a single literal. Call startlit() to reset buffer
@ -106,7 +113,10 @@ extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
base_yyscan_t yyscanner);
/* from scan.l */
extern base_yyscan_t scanner_init(const char *str, base_yy_extra_type *yyext);
extern base_yyscan_t scanner_init(const char *str,
base_yy_extra_type *yyext,
const ScanKeyword *keywords,
int num_keywords);
extern void scanner_finish(base_yyscan_t yyscanner);
extern int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
base_yyscan_t yyscanner);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/parser/keywords.h,v 1.26 2009/01/01 17:24:00 momjian Exp $
* $PostgreSQL: pgsql/src/include/parser/keywords.h,v 1.27 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -29,8 +29,10 @@ typedef struct ScanKeyword
} ScanKeyword;
extern const ScanKeyword ScanKeywords[];
extern const ScanKeyword *LastScanKeyword;
extern const int NumScanKeywords;
extern const ScanKeyword *ScanKeywordLookup(const char *text);
extern const ScanKeyword *ScanKeywordLookup(const char *text,
const ScanKeyword *keywords,
int num_keywords);
#endif /* KEYWORDS_H */

View File

@ -1,10 +1,10 @@
/*-------------------------------------------------------------------------
*
* keywords.c
* c_keywords.c
* lexical token lookup for reserved words in postgres embedded SQL
*
* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/c_keywords.c,v 1.23 2009/06/11 14:49:13 momjian Exp $
* §
* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/c_keywords.c,v 1.24 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
@ -55,8 +55,31 @@ static const ScanKeyword ScanCKeywords[] = {
{"year", YEAR_P, 0},
};
/*
* Do a binary search using plain strcmp() comparison. This is much like
* ScanKeywordLookup(), except we want case-sensitive matching.
*/
const ScanKeyword *
ScanCKeywordLookup(const char *text)
{
return DoLookup(text, &ScanCKeywords[0], endof(ScanCKeywords) - 1);
const ScanKeyword *low = &ScanCKeywords[0];
const ScanKeyword *high = &ScanCKeywords[lengthof(ScanCKeywords) - 1];
while (low <= high)
{
const ScanKeyword *middle;
int difference;
middle = low + (high - low) / 2;
difference = strcmp(middle->name, text);
if (difference == 0)
return middle;
else if (difference < 0)
low = middle + 1;
else
high = middle - 1;
}
return NULL;
}

View File

@ -4,7 +4,7 @@
* lexical token lookup for reserved words in postgres embedded SQL
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/ecpg_keywords.c,v 1.40 2009/06/11 14:49:13 momjian Exp $
* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/ecpg_keywords.c,v 1.41 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -75,79 +75,26 @@ static const ScanKeyword ScanECPGKeywords[] = {
{"whenever", SQL_WHENEVER, 0},
};
/* This is all taken from src/backend/parser/keyword.c and adjusted for our needs. */
/*
* Do a binary search using plain strcmp() comparison.
*/
const ScanKeyword *
DoLookup(const char *word, const ScanKeyword *low, const ScanKeyword *high)
{
while (low <= high)
{
const ScanKeyword *middle;
int difference;
middle = low + (high - low) / 2;
difference = strcmp(middle->name, word);
if (difference == 0)
return middle;
else if (difference < 0)
low = middle + 1;
else
high = middle - 1;
}
return NULL;
}
/*
* ScanECPGKeywordLookup - see if a given word is a keyword
*
* Returns a pointer to the ScanKeyword table entry, or NULL if no match.
*
* The match is done case-insensitively. Note that we deliberately use a
* dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
* even if we are in a locale where tolower() would produce more or different
* translations. This is to conform to the SQL99 spec, which says that
* keywords are to be matched in this way even though non-keyword identifiers
* receive a different case-normalization mapping.
* Keywords are matched using the same case-folding rules as in the backend.
*/
const ScanKeyword *
ScanECPGKeywordLookup(const char *text)
{
int len,
i;
char word[NAMEDATALEN];
const ScanKeyword *res;
/* First check SQL symbols defined by the backend. */
res = ScanKeywordLookup(text);
res = ScanKeywordLookup(text, ScanKeywords, NumScanKeywords);
if (res)
return res;
len = strlen(text);
/* We assume all keywords are shorter than NAMEDATALEN. */
if (len >= NAMEDATALEN)
return NULL;
/* Try ECPG-specific keywords. */
res = ScanKeywordLookup(text, ScanECPGKeywords, lengthof(ScanECPGKeywords));
if (res)
return res;
/*
* Apply an ASCII-only downcasing. We must not use tolower() since it may
* produce the wrong translation in some locales (eg, Turkish).
*/
for (i = 0; i < len; i++)
{
char ch = text[i];
if (ch >= 'A' && ch <= 'Z')
ch += 'a' - 'A';
word[i] = ch;
}
word[len] = '\0';
/*
* Now do a binary search using plain strcmp() comparison.
*/
return DoLookup(word, &ScanECPGKeywords[0], endof(ScanECPGKeywords) - 1);
return NULL;
}

View File

@ -1,4 +1,4 @@
/* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/extern.h,v 1.73 2009/06/11 14:49:13 momjian Exp $ */
/* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/extern.h,v 1.74 2009/07/14 20:24:10 tgl Exp $ */
#ifndef _ECPG_PREPROC_EXTERN_H
#define _ECPG_PREPROC_EXTERN_H
@ -101,7 +101,6 @@ extern void remove_variables(int);
extern struct variable *new_variable(const char *, struct ECPGtype *, int);
extern const ScanKeyword *ScanCKeywordLookup(const char *);
extern const ScanKeyword *ScanECPGKeywordLookup(const char *text);
extern const ScanKeyword *DoLookup(const char *, const ScanKeyword *, const ScanKeyword *);
extern void scanner_init(const char *);
extern void parser_init(void);
extern void scanner_finish(void);

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.88 2009/03/08 16:53:30 alvherre Exp $
* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.89 2009/07/14 20:24:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -26,5 +26,4 @@ const ScanKeyword ScanKeywords[] = {
#include "parser/kwlist.h"
};
/* End of ScanKeywords, for use in kwlookup.c */
const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
const int NumScanKeywords = lengthof(ScanKeywords);