1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* parser.c
|
2000-10-07 02:58:23 +02:00
|
|
|
* Main entry point/driver for PostgreSQL grammar
|
|
|
|
*
|
|
|
|
* Note that the grammar is not allowed to perform any table access
|
|
|
|
* (since we need to be able to do basic parsing even while inside an
|
|
|
|
* aborted transaction). Therefore, the data structures returned by
|
|
|
|
* the grammar are "raw" parsetrees that still need to be analyzed by
|
2006-03-07 02:00:19 +01:00
|
|
|
* analyze.c and related files.
|
2000-09-12 23:07:18 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2015-01-06 17:43:47 +01:00
|
|
|
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/parser/parser.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
1996-10-31 12:09:44 +01:00
|
|
|
#include "postgres.h"
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2009-07-12 19:12:34 +02:00
|
|
|
#include "parser/gramparse.h"
|
1997-11-26 02:14:33 +01:00
|
|
|
#include "parser/parser.h"
|
1996-11-08 07:02:30 +01:00
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2003-04-30 00:13:11 +02:00
|
|
|
* raw_parser
|
|
|
|
* Given a query in string form, do lexical and grammatical analysis.
|
2000-10-07 02:58:23 +02:00
|
|
|
*
|
|
|
|
* Returns a list of raw (un-analyzed) parse trees.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1999-05-13 09:29:22 +02:00
|
|
|
List *
|
2003-04-30 00:13:11 +02:00
|
|
|
raw_parser(const char *str)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2009-11-09 19:38:48 +01:00
|
|
|
core_yyscan_t yyscanner;
|
2009-07-13 04:02:20 +02:00
|
|
|
base_yy_extra_type yyextra;
|
1997-09-08 04:41:22 +02:00
|
|
|
int yyresult;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2009-07-13 04:02:20 +02:00
|
|
|
/* initialize the flex scanner */
|
2009-11-09 19:38:48 +01:00
|
|
|
yyscanner = scanner_init(str, &yyextra.core_yy_extra,
|
|
|
|
ScanKeywords, NumScanKeywords);
|
2009-07-13 04:02:20 +02:00
|
|
|
|
2009-11-09 19:38:48 +01:00
|
|
|
/* base_yylex() only needs this much initialization */
|
2009-07-13 04:02:20 +02:00
|
|
|
yyextra.have_lookahead = false;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2009-07-13 04:02:20 +02:00
|
|
|
/* initialize the bison parser */
|
|
|
|
parser_init(&yyextra);
|
2000-03-17 06:29:07 +01:00
|
|
|
|
2009-07-13 04:02:20 +02:00
|
|
|
/* Parse! */
|
|
|
|
yyresult = base_yyparse(yyscanner);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2009-07-13 04:02:20 +02:00
|
|
|
/* Clean up (release memory) */
|
|
|
|
scanner_finish(yyscanner);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1998-02-26 05:46:47 +01:00
|
|
|
if (yyresult) /* error */
|
2000-10-07 02:58:23 +02:00
|
|
|
return NIL;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2009-07-13 04:02:20 +02:00
|
|
|
return yyextra.parsetree;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2006-05-27 19:38:46 +02:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
2009-11-09 19:38:48 +01:00
|
|
|
* Intermediate filter between parser and core lexer (core_yylex in scan.l).
|
2006-05-27 19:38:46 +02:00
|
|
|
*
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
* This filter is needed because in some cases the standard SQL grammar
|
2014-05-06 18:12:18 +02:00
|
|
|
* requires more than one token lookahead. We reduce these cases to one-token
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
* lookahead by replacing tokens here, in order to keep the grammar LALR(1).
|
2006-05-27 19:38:46 +02:00
|
|
|
*
|
|
|
|
* Using a filter is simpler than trying to recognize multiword tokens
|
|
|
|
* directly in scan.l, because we'd have to allow for comments between the
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
* words. Furthermore it's not clear how to do that without re-introducing
|
2006-05-27 19:38:46 +02:00
|
|
|
* scanner backtrack, which would cost more performance than this filter
|
|
|
|
* layer does.
|
2009-11-09 19:38:48 +01:00
|
|
|
*
|
|
|
|
* The filter also provides a convenient place to translate between
|
|
|
|
* the core_YYSTYPE and YYSTYPE representations (which are really the
|
|
|
|
* same thing anyway, but notationally they're different).
|
2006-05-27 19:38:46 +02:00
|
|
|
*/
|
|
|
|
int
|
2009-11-09 19:38:48 +01:00
|
|
|
base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
|
2006-05-27 19:38:46 +02:00
|
|
|
{
|
2009-07-13 04:02:20 +02:00
|
|
|
base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
|
2006-05-27 19:38:46 +02:00
|
|
|
int cur_token;
|
2007-01-06 20:14:17 +01:00
|
|
|
int next_token;
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
int cur_token_length;
|
2007-01-06 20:14:17 +01:00
|
|
|
YYLTYPE cur_yylloc;
|
2006-05-27 19:38:46 +02:00
|
|
|
|
|
|
|
/* Get next token --- we might already have it */
|
2009-07-13 04:02:20 +02:00
|
|
|
if (yyextra->have_lookahead)
|
2006-05-27 19:38:46 +02:00
|
|
|
{
|
2009-07-13 04:02:20 +02:00
|
|
|
cur_token = yyextra->lookahead_token;
|
2009-11-09 19:38:48 +01:00
|
|
|
lvalp->core_yystype = yyextra->lookahead_yylval;
|
2009-07-13 04:02:20 +02:00
|
|
|
*llocp = yyextra->lookahead_yylloc;
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
*(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
|
2009-07-13 04:02:20 +02:00
|
|
|
yyextra->have_lookahead = false;
|
2006-05-27 19:38:46 +02:00
|
|
|
}
|
|
|
|
else
|
2009-11-09 19:38:48 +01:00
|
|
|
cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
|
2006-05-27 19:38:46 +02:00
|
|
|
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
/*
|
|
|
|
* If this token isn't one that requires lookahead, just return it. If it
|
|
|
|
* does, determine the token length. (We could get that via strlen(), but
|
|
|
|
* since we have such a small set of possibilities, hardwiring seems
|
|
|
|
* feasible and more efficient.)
|
|
|
|
*/
|
2006-05-27 19:38:46 +02:00
|
|
|
switch (cur_token)
|
|
|
|
{
|
2007-01-09 03:14:16 +01:00
|
|
|
case NULLS_P:
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
cur_token_length = 5;
|
|
|
|
break;
|
|
|
|
case WITH:
|
|
|
|
cur_token_length = 4;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return cur_token;
|
|
|
|
}
|
2007-11-15 22:14:46 +01:00
|
|
|
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
/*
|
|
|
|
* Identify end+1 of current token. core_yylex() has temporarily stored a
|
|
|
|
* '\0' here, and will undo that when we call it again. We need to redo
|
|
|
|
* it to fully revert the lookahead call for error reporting purposes.
|
|
|
|
*/
|
|
|
|
yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
|
|
|
|
*llocp + cur_token_length;
|
|
|
|
Assert(*(yyextra->lookahead_end) == '\0');
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Save and restore *llocp around the call. It might look like we could
|
|
|
|
* avoid this by just passing &lookahead_yylloc to core_yylex(), but that
|
|
|
|
* does not work because flex actually holds onto the last-passed pointer
|
|
|
|
* internally, and will use that for error reporting. We need any error
|
|
|
|
* reports to point to the current token, not the next one.
|
|
|
|
*/
|
|
|
|
cur_yylloc = *llocp;
|
|
|
|
|
|
|
|
/* Get next token, saving outputs into lookahead variables */
|
|
|
|
next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
|
|
|
|
yyextra->lookahead_token = next_token;
|
|
|
|
yyextra->lookahead_yylloc = *llocp;
|
|
|
|
|
|
|
|
*llocp = cur_yylloc;
|
|
|
|
|
|
|
|
/* Now revert the un-truncation of the current token */
|
|
|
|
yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
|
|
|
|
*(yyextra->lookahead_end) = '\0';
|
|
|
|
|
|
|
|
yyextra->have_lookahead = true;
|
|
|
|
|
|
|
|
/* Replace cur_token if needed, based on lookahead */
|
|
|
|
switch (cur_token)
|
|
|
|
{
|
|
|
|
case NULLS_P:
|
|
|
|
/* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
|
2007-01-09 03:14:16 +01:00
|
|
|
switch (next_token)
|
|
|
|
{
|
|
|
|
case FIRST_P:
|
|
|
|
case LAST_P:
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
cur_token = NULLS_LA;
|
2007-01-09 03:14:16 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2007-01-09 03:14:16 +01:00
|
|
|
case WITH:
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
/* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
|
2007-01-06 20:14:17 +01:00
|
|
|
switch (next_token)
|
2006-05-27 19:38:46 +02:00
|
|
|
{
|
2008-10-28 15:09:45 +01:00
|
|
|
case TIME:
|
2013-07-29 17:38:01 +02:00
|
|
|
case ORDINALITY:
|
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),
by requiring more than a single token of lookahead to decide what to do.
Up to now we've dealt with that by using a filter between the lexer and
parser that merges adjacent tokens into one in the places where two tokens
of lookahead are necessary. But that creates a number of user-visible
anomalies, for instance that you can't name a CTE "ordinality" because
"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into one
token. I realized that there's a better way.
In this patch, we still do the lookahead basically as before, but we never
merge the second token into the first; we replace just the first token by
a special lookahead symbol when one of the lookahead pairs is seen.
This requires a couple extra productions in the grammar, but it involves
fewer special tokens, so that the grammar tables come out a bit smaller
than before. The filter logic is no slower than before, perhaps a bit
faster.
I also fixed the filter logic so that when backing up after a lookahead,
the current token's terminator is correctly restored; this eliminates some
weird behavior in error message issuance, as is shown by the one change in
existing regression test outputs.
I believe that this patch entirely eliminates odd behaviors caused by
lookahead for WITH. It doesn't really improve the situation for NULLS
followed by FIRST/LAST unfortunately: those sequences still act like a
reserved word, even though there are cases where they should be seen as two
ordinary identifiers, eg "SELECT nulls first FROM ...". I experimented
with additional grammar hacks but couldn't find any simple solution for
that. Still, this is better than before, and it seems much more likely
that we *could* somehow solve the NULLS case on the basis of this filter
behavior than the previous one.
2015-02-24 23:53:42 +01:00
|
|
|
cur_token = WITH_LA;
|
2006-05-27 19:38:46 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return cur_token;
|
|
|
|
}
|