Align ECPG lexer more closely with the core and psql lexers.

Make a bunch of basically-cosmetic changes to reduce the diffs between the flex rules in scan.l, psqlscan.l, and pgc.l. Reorder some code, adjust a lot of whitespace, sync some comments, make use of flex start condition scopes to do that. There are a few non-cosmetic changes in the ECPG lexer: * Bring over the decimalfail rule (and support function process_integer_literal) so that ECPG will lex "1..10" into the same tokens as the backend would. I'm not sure this makes any visible difference to users, but I'm not sure it doesn't, either. * <xdc><<EOF>> gets its own rule so as to produce a more on-point error message. * Remove duplicate <SQL>{xdstart} rule. John Naylor, with a few additional changes by me Discussion: https://postgr.es/m/CAJVSVGWGqY9YBs2EwtRUkbNv=hXkN8yRPOoD1wxE6COgvvrz5g@mail.gmail.com
2018-11-13 12:57:52 -05:00 · 2018-11-13 12:57:52 -05:00 · ec937d0805
parent d20dceaf50
commit ec937d0805
3 changed files with 623 additions and 471 deletions
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@ -6,7 +6,8 @@
 *
 * NOTE NOTE NOTE:
 *
- * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l!
+ * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l
 * and src/interfaces/ecpg/preproc/pgc.l!
 *
 * The rules are designed so that the scanner never has to backtrack,
 * in the sense that there is always a rule that can match the input
@ -168,8 +169,8 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
 %x xc
 %x xd
 %x xh
 %x xe
 %x xq
 %x xe
 %x xdolq
 %x xui
 %x xuiend
@ -192,7 +193,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 *
 * XXX if you change the set of whitespace characters, fix scanner_isspace()
- * to agree, and see also the plpgsql lexer.
+ * to agree.
 */
 space			[ \t\n\r\f]
@ -417,32 +418,36 @@ other			.
 					yyless(2);
 				}
-<xc>{xcstart}	{
+<xc>{
 {xcstart}		{
 					(yyextra->xcdepth)++;
 					/* Put back any characters past slash-star; see above */
 					yyless(2);
 				}
-<xc>{xcstop}	{
+{xcstop}		{
 					if (yyextra->xcdepth <= 0)
 						BEGIN(INITIAL);
 					else
 						(yyextra->xcdepth)--;
 				}
-<xc>{xcinside}	{
+{xcinside}		{
 					/* ignore */
 				}
-<xc>{op_chars}	{
+{op_chars}		{
 					/* ignore */
 				}
-<xc>\*+			{
+\*+				{
 					/* ignore */
 				}
-<xc><<EOF>>		{ yyerror("unterminated /* comment"); }
+<<EOF>>			{
 					yyerror("unterminated /* comment");
 				}
 } /* <xc> */
 {xbstart}		{
 					/* Binary bit type.
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@ -23,6 +23,7 @@
 *
 * See psqlscan_int.h for additional commentary.
 *
 *
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
@ -39,6 +40,9 @@
 }
 %{
 /* LCOV_EXCL_START */
 #include "fe_utils/psqlscan_int.h"
 /*
@ -71,8 +75,6 @@ typedef int YYSTYPE;
 extern int	psql_yyget_column(yyscan_t yyscanner);
 extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
 /* LCOV_EXCL_START */
 %}
 %option reentrant
@ -128,8 +130,8 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
 %x xc
 %x xd
 %x xh
 %x xe
 %x xq
 %x xe
 %x xdolq
 %x xui
 %x xuiend
@ -151,7 +153,7 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 *
 * XXX if you change the set of whitespace characters, fix scanner_isspace()
- * to agree, and see also the plpgsql lexer.
+ * to agree.
 */
 space			[ \t\n\r\f]
@ -402,14 +404,15 @@ other			.
 					ECHO;
 				}
-<xc>{xcstart}	{
+<xc>{
 {xcstart}		{
 					cur_state->xcdepth++;
 					/* Put back any characters past slash-star; see above */
 					yyless(2);
 					ECHO;
 				}
-<xc>{xcstop}	{
+{xcstop}		{
 					if (cur_state->xcdepth <= 0)
 						BEGIN(INITIAL);
 					else
@ -417,17 +420,18 @@ other			.
 					ECHO;
 				}
-<xc>{xcinside}	{
+{xcinside}		{
 					ECHO;
 				}
-<xc>{op_chars}	{
+{op_chars}		{
 					ECHO;
 				}
-<xc>\*+			{
+\*+				{
 					ECHO;
 				}
 } /* <xc> */
 {xbstart}		{
 					BEGIN(xb);
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@ -10,7 +10,6 @@
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  src/interfaces/ecpg/preproc/pgc.l
 *
@ -28,6 +27,9 @@
 }
 %{
 /* LCOV_EXCL_START */
 extern YYSTYPE base_yylval;
 static int		xcdepth = 0;	/* depth of nesting in slash-star comments */
@ -54,6 +56,7 @@ static bool		include_next;
 #define startlit()	(literalbuf[0] = '\0', literallen = 0)
 static void addlit(char *ytext, int yleng);
 static void addlitchar(unsigned char);
 static int	process_integer_literal(const char *token, YYSTYPE *lval);
 static void parse_include(void);
 static bool ecpg_isspace(char ch);
 static bool isdefine(void);
@ -81,8 +84,6 @@ static struct _if_value
 	short else_branch;
 } stacked_if_value[MAX_NESTED_IF];
 /* LCOV_EXCL_START */
 %}
 %option 8bit
@ -91,11 +92,8 @@ static struct _if_value
 %option noinput
 %option noyywrap
 %option warn
 %option prefix="base_yy"
 %option yylineno
-
+%option prefix="base_yy"
 %x C SQL incl def def_ident undef
 /*
 * OK, here is a short description of lex/flex rules behavior.
@ -111,15 +109,21 @@ static struct _if_value
 *  <xb> bit string literal
 *  <xcc> extended C-style comments in C
 *  <xcsql> extended C-style comments in SQL
- *	<xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
+ *  <xd> delimited identifiers (double-quoted identifiers)
- *	<xh> hexadecimal numeric string - thomas 1997-11-16
+ *  <xdc> double-quoted strings in C
- *	<xq> standard quoted strings - thomas 1997-07-30
+ *  <xh> hexadecimal numeric string
 *	<xqc> standard quoted strings in C - michael
 *	<xe> extended quoted strings (support backslash escape sequences)
 *  <xn> national character quoted strings
 *  <xq> standard quoted strings
 *  <xe> extended quoted strings (support backslash escape sequences)
 *  <xqc> single-quoted strings in C
 *  <xdolq> $foo$ quoted strings
 *  <xui> quoted identifier with Unicode escapes
 *  <xus> quoted string with Unicode escapes
 *  <xcond> condition of an EXEC SQL IFDEF construct
 *  <xskip> skipping the inactive part of an EXEC SQL IFDEF construct
 *
 * Remember to add an <<EOF>> case whenever you add a new exclusive state!
 * The default one is probably not the right thing.
 */
 %x xb
@ -128,15 +132,60 @@ static struct _if_value
 %x xd
 %x xdc
 %x xh
 %x xe
 %x xn
 %x xq
 %x xe
 %x xqc
 %x xdolq
 %x xcond
 %x xskip
 %x xui
 %x xus
 %x xcond
 %x xskip
 /* Additional exclusive states that are specific to ECPG */
 %x C SQL incl def def_ident undef
 /*
 * In order to make the world safe for Windows and Mac clients as well as
 * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 * sequence will be seen as two successive newlines, but that doesn't cause
 * any problems.  SQL-style comments, which start with -- and extend to the
 * next newline, are treated as equivalent to a single whitespace character.
 *
 * NOTE a fine point: if there is no newline following --, we will absorb
 * everything to the end of the input as a comment.  This is correct.  Older
 * versions of Postgres failed to recognize -- as a comment if the input
 * did not end with a newline.
 *
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 *
 * XXX if you change the set of whitespace characters, fix ecpg_isspace()
 * to agree.
 */
 space			[ \t\n\r\f]
 horiz_space		[ \t\f]
 newline			[\n\r]
 non_newline		[^\n\r]
 comment			("--"{non_newline}*)
 whitespace		({space}+|{comment})
 /*
 * SQL requires at least one newline in the whitespace separating
 * string literals that are to be concatenated.  Silly, but who are we
 * to argue?  Note that {whitespace_with_newline} should not have * after
 * it, whereas {whitespace} should generally have a * after it...
 */
 horiz_whitespace		({horiz_space}|{comment})
 whitespace_with_newline	({horiz_whitespace}*{newline}{whitespace}*)
 quote			'
 quotestop		{quote}{whitespace}*
 quotecontinue	{quote}{whitespace_with_newline}{quote}
 quotefail		{quote}{whitespace}*"-"
 /* Bit string
 */
@ -158,9 +207,6 @@ xeoctesc		[\\][0-7]{1,3}
 xehexesc		[\\]x[0-9A-Fa-f]{1,2}
 xeunicode		[\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
 /* C version of hex number */
 xch				0[xX][0-9A-Fa-f]*
 /* Extended quote
 * xqdouble implements embedded quote, ''''
 */
@ -194,7 +240,9 @@ xddouble		{dquote}{dquote}
 xdinside		[^"]+
 /* Unicode escapes */
-/* (The ecpg scanner is not backup-free, so the fail rules in scan.l are not needed here, but could be added if desired.) */
+/* (The ecpg scanner is not backup-free, so the fail rules in scan.l are
 * not needed here, but could be added if desired.)
 */
 uescape			[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
 /* Quoted identifier with Unicode escapes */
@ -211,6 +259,7 @@ xdcqdq			\\\"
 xdcother		[^"]
 xdcinside		({xdcqq}|{xdcqdq}|{xdcother})
 /* C-style comments
 *
 * The "extended comment" syntax closely resembles allowable operator syntax.
@ -278,68 +327,40 @@ operator		{op_chars}+
 * instead we pass it separately to parser. there it gets
 * coerced via doNegate() -- Leon aug 20 1999
 *
 * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
 *
 * {realfail1} and {realfail2} are added to prevent the need for scanner
 * backup when the {real} rule fails to match completely.
 */
 integer			{digit}+
 decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
 decimalfail		{digit}+\.\.
 real			({integer}|{decimal})[Ee][-+]?{digit}+
 realfail1		({integer}|{decimal})[Ee]
 realfail2		({integer}|{decimal})[Ee][-+]
 param			\${integer}
 /*
 * In order to make the world safe for Windows and Mac clients as well as
 * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 * sequence will be seen as two successive newlines, but that doesn't cause
 * any problems.  SQL-style comments, which start with -- and extend to the
 * next newline, are treated as equivalent to a single whitespace character.
 *
 * NOTE a fine point: if there is no newline following --, we will absorb
 * everything to the end of the input as a comment.  This is correct.  Older
 * versions of Postgres failed to recognize -- as a comment if the input
 * did not end with a newline.
 *
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 *
 * XXX if you change the set of whitespace characters, fix ecpg_isspace()
 * to agree.
 */
 ccomment		"//".*\n
 space			[ \t\n\r\f]
 horiz_space		[ \t\f]
 newline			[\n\r]
 non_newline		[^\n\r]
 comment			("--"{non_newline}*)
 whitespace		({space}+|{comment})
 /*
 * SQL requires at least one newline in the whitespace separating
 * string literals that are to be concatenated.  Silly, but who are we
 * to argue?  Note that {whitespace_with_newline} should not have * after
 * it, whereas {whitespace} should generally have a * after it...
 */
 horiz_whitespace	({horiz_space}|{comment})
 whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
 quote			'
 quotestop		{quote}{whitespace}*
 quotecontinue	{quote}{whitespace_with_newline}{quote}
 quotefail		{quote}{whitespace}*"-"
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
 informix_special	[\$]
 other			.
 /*
 * Dollar quoted strings are totally opaque, and no escaping is done on them.
 * Other quoted strings must allow some special characters such as single-quote
 *  and newline.
 * Embedded single-quotes are implemented both in the SQL standard
 *  style of two adjacent single quotes "''" and in the Postgres/Java style
 *  of escaped-quote "\'".
 * Other embedded escaped characters are matched explicitly and the leading
 *  backslash is dropped from the string.
 * Note that xcstart must appear before operator, as explained above!
 *  Also whitespace (comment) must appear before operator.
 */
 /* some stuff needed for ecpg */
 exec			[eE][xX][eE][cC]
 sql				[sS][qQ][lL]
@ -349,6 +370,11 @@ include_next	[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
 import			[iI][mM][pP][oO][rR][tT]
 undef			[uU][nN][dD][eE][fF]
 /* C version of hex number */
 xch				0[xX][0-9A-Fa-f]*
 ccomment		"//".*\n
 if				[iI][fF]
 ifdef			[iI][fF][dD][eE][fF]
 ifndef			[iI][fF][nN][dD][eE][fF]
@ -366,24 +392,12 @@ ip				{ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
 cppinclude		{space}*#{include}{space}*
 cppinclude_next		{space}*#{include_next}{space}*
-/* take care of cpp lines, they may also be continuated */
+/* take care of cpp lines, they may also be continued */
 /* first a general line for all commands not starting with "i" */
 /* and then the other commands starting with "i", we have to add these
- * separately because the cppline production would match on "include" too */
+ * separately because the cppline production would match on "include" too
 cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}
 /*
 * Dollar quoted strings are totally opaque, and no escaping is done on them.
 * Other quoted strings must allow some special characters such as single-quote
 *	and newline.
 * Embedded single-quotes are implemented both in the SQL standard
 *	style of two adjacent single quotes "''" and in the Postgres/Java style
 *	of escaped-quote "\'".
 * Other embedded escaped characters are matched explicitly and the leading
 *	backslash is dropped from the string. - thomas 1997-09-24
 * Note that xcstart must appear before operator, as explained above!
 *	Also whitespace (comment) must appear before operator.
 */
 cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}
 %%
@ -392,7 +406,21 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 		token_start = NULL;
 %}
-<SQL>{whitespace}	{ /* ignore */ }
+<SQL>{
 {whitespace}	{
 					/* ignore */
 				}
 {xcstart}		{
 					token_start = yytext;
 					state_before = YYSTATE;
 					xcdepth = 0;
 					BEGIN(xcsql);
 					/* Put back any characters past slash-star; see above */
 					yyless(2);
 					fputs("/*", yyout);
 				}
 } /* <SQL> */
 <C>{xcstart}	{
 					token_start = yytext;
@ -403,15 +431,6 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					yyless(2);
 					fputs("/*", yyout);
 				}
 <SQL>{xcstart}		{
 					token_start = yytext;
 					state_before = YYSTATE;
 					xcdepth = 0;
 					BEGIN(xcsql);
 					/* Put back any characters past slash-star; see above */
 					yyless(2);
 					fputs("/*", yyout);
 				}
 <xcc>{xcstart}	{ ECHO; }
 <xcsql>{xcstart}	{
 					xcdepth++;
@ -437,18 +456,34 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					BEGIN(state_before);
 					token_start = NULL;
 				}
 <xcc,xcsql>{xcinside}	{ ECHO; }
 <xcc,xcsql>{op_chars}	{ ECHO; }
 <xcc,xcsql>\*+		{ ECHO; }
-<xcc,xcsql><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated /* comment"); }
+<xcc,xcsql>{
 {xcinside}		{
 					ECHO;
 				}
-<SQL>{xbstart}	{
+{op_chars}		{
 					ECHO;
 				}
 \*+				{
 					ECHO;
 				}
 <<EOF>>			{
 					mmfatal(PARSE_ERROR, "unterminated /* comment");
 				}
 } /* <xcc,xcsql> */
 <SQL>{
 {xbstart}		{
 					token_start = yytext;
 					BEGIN(xb);
 					startlit();
 					addlitchar('b');
 				}
 } /* <SQL> */
 <xb>{quotestop}	|
 <xb>{quotefail} {
 					yyless(1);
@ -458,11 +493,14 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					base_yylval.str = mm_strdup(literalbuf);
 					return BCONST;
 				}
 <xh>{xhinside}	|
-<xb>{xbinside}	{ addlit(yytext, yyleng); }
+<xb>{xbinside}	{
 					addlit(yytext, yyleng);
 				}
 <xh>{quotecontinue}	|
-<xb>{quotecontinue}	{ /* ignore */ }
+<xb>{quotecontinue}	{
 					/* ignore */
 				}
 <xb><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated bit string literal"); }
 <SQL>{xhstart}	{
@ -480,7 +518,16 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 				}
 <xh><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); }
-<SQL>{xnstart} {
+
 <C>{xqstart}	{
 					token_start = yytext;
 					state_before = YYSTATE;
 					BEGIN(xqc);
 					startlit();
 				}
 <SQL>{
 {xnstart}		{
 					/* National character.
 					 * Transfer it as-is to the backend.
 					 */
@ -489,31 +536,28 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					BEGIN(xn);
 					startlit();
 				}
-<C>{xqstart}	{
+
-				token_start = yytext;
+{xqstart}		{
 				state_before = YYSTATE;
 				BEGIN(xqc);
 				startlit();
 			}
 <SQL>{xqstart}	{
 					token_start = yytext;
 					state_before = YYSTATE;
 					BEGIN(xq);
 					startlit();
 				}
-<SQL>{xestart}	{
+{xestart}		{
 					token_start = yytext;
 					state_before = YYSTATE;
 					BEGIN(xe);
 					startlit();
 				}
-<SQL>{xusstart}	{
+{xusstart}		{
 					token_start = yytext;
 					state_before = YYSTATE;
 					BEGIN(xus);
 					startlit();
 					addlit(yytext, yyleng);
 				}
 } /* <SQL> */
 <xq,xqc>{quotestop} |
 <xq,xqc>{quotefail} {
 					yyless(1);
@ -547,24 +591,32 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					addlitchar('\'');
 				}
 <xq,xqc,xn,xus>{xqinside}	{ addlit(yytext, yyleng); }
-<xe>{xeinside}		{ addlit(yytext, yyleng); }
+<xe>{xeinside}  {
-<xe>{xeunicode}		{ addlit(yytext, yyleng); }
+					addlit(yytext, yyleng);
-<xe>{xeescape}		{ addlit(yytext, yyleng); }
+				}
-<xe>{xeoctesc}		{ addlit(yytext, yyleng); }
+<xe>{xeunicode} {
-<xe>{xehexesc}		{ addlit(yytext, yyleng); }
+					addlit(yytext, yyleng);
-<xq,xqc,xe,xn,xus>{quotecontinue}	{ /* ignore */ }
+				}
 <xe>{xeescape}  {
 					addlit(yytext, yyleng);
 				}
 <xe>{xeoctesc}  {
 					addlit(yytext, yyleng);
 				}
 <xe>{xehexesc}  {
 					addlit(yytext, yyleng);
 				}
 <xq,xqc,xe,xn,xus>{quotecontinue}	{
 					/* ignore */
 				}
 <xe>.			{
 					/* This is only needed for \ just before EOF */
 					addlitchar(yytext[0]);
 				}
 <xq,xqc,xe,xn,xus><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated quoted string"); }
-<SQL>{dolqfailed}	{
+
-				/* throw back all but the initial "$" */
+<SQL>{
-				yyless(1);
+{dolqdelim}		{
 				/* and treat it as {other} */
 				return yytext[0];
 			}
 <SQL>{dolqdelim} {
 					token_start = yytext;
 					if (dolqstart)
 						free(dolqstart);
@ -573,6 +625,14 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					startlit();
 					addlit(yytext, yyleng);
 				}
 {dolqfailed}	{
 					/* throw back all but the initial "$" */
 					yyless(1);
 					/* and treat it as {other} */
 					return yytext[0];
 				}
 } /* <SQL> */
 <xdolq>{dolqdelim} {
 					if (strcmp(yytext, dolqstart) == 0)
 					{
@ -594,24 +654,32 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 						yyless(yyleng - 1);
 					}
 				}
-<xdolq>{dolqinside}	{ addlit(yytext, yyleng); }
+<xdolq>{dolqinside} {
-<xdolq>{dolqfailed}	{ addlit(yytext, yyleng); }
+					addlit(yytext, yyleng);
-<xdolq>{other}		{
+				}
 <xdolq>{dolqfailed} {
 					addlit(yytext, yyleng);
 				}
 <xdolq>.		{
 					/* single quote or dollar sign */
 					addlitchar(yytext[0]);
 				}
-<xdolq><<EOF>>		{ base_yyerror("unterminated dollar-quoted string"); }
+<xdolq><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated dollar-quoted string"); }
-<SQL>{xdstart}		{
+
 <SQL>{
 {xdstart}		{
 					state_before = YYSTATE;
 					BEGIN(xd);
 					startlit();
 				}
-<SQL>{xuistart}		{
+{xuistart}		{
 					state_before = YYSTATE;
 					BEGIN(xui);
 					startlit();
 					addlit(yytext, yyleng);
 				}
 } /* <SQL> */
 <xd>{xdstop}	{
 					BEGIN(state_before);
 					if (literallen == 0)
@ -634,24 +702,59 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					base_yylval.str = mm_strdup(literalbuf);
 					return UIDENT;
 				}
-<xd,xui>{xddouble}		{ addlitchar('"'); }
+<xd,xui>{xddouble}	{
-<xd,xui>{xdinside}		{ addlit(yytext, yyleng); }
+					addlitchar('"');
-<xd,xdc,xui><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
+				}
-<C,SQL>{xdstart}	{
+<xd,xui>{xdinside}	{
 					addlit(yytext, yyleng);
 				}
 <xd,xui><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
 <C>{xdstart}	{
 					state_before = YYSTATE;
 					BEGIN(xdc);
 					startlit();
 				}
-<xdc>{xdcinside}	{ addlit(yytext, yyleng); }
+<xdc>{xdcinside}	{
-<SQL>{typecast}		{ return TYPECAST; }
+					addlit(yytext, yyleng);
-<SQL>{dot_dot}		{ return DOT_DOT; }
+				}
-<SQL>{colon_equals}	{ return COLON_EQUALS; }
+<xdc><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated quoted string"); }
-<SQL>{equals_greater} { return EQUALS_GREATER; }
+
-<SQL>{less_equals}	{ return LESS_EQUALS; }
+<SQL>{
-<SQL>{greater_equals} { return GREATER_EQUALS; }
+{typecast}		{
-<SQL>{less_greater}	{ return NOT_EQUALS; }
+					return TYPECAST;
-<SQL>{not_equals}	{ return NOT_EQUALS; }
+				}
-<SQL>{informix_special}	{
+
 {dot_dot}		{
 					return DOT_DOT;
 				}
 {colon_equals}	{
 					return COLON_EQUALS;
 				}
 {equals_greater} {
 					return EQUALS_GREATER;
 				}
 {less_equals}	{
 					return LESS_EQUALS;
 				}
 {greater_equals} {
 					return GREATER_EQUALS;
 				}
 {less_greater}	{
 					/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
 					return NOT_EQUALS;
 				}
 {not_equals}	{
 					/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
 					return NOT_EQUALS;
 				}
 {informix_special}	{
 			  /* are we simulating Informix? */
 				if (INFORMIX_MODE)
 				{
@ -660,7 +763,9 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 				else
 					return yytext[0];
 				}
-<SQL>{self}			{ /*
+
 {self}			{
 					/*
 					 * We may find a ';' inside a structure
 					 * definition in a TYPE or VAR statement.
 					 * This is not an EOL marker.
@ -669,7 +774,8 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 						BEGIN(C);
 					return yytext[0];
 				}
-<SQL>{operator}		{
+
 {operator}		{
 					/*
 					 * Check for embedded slash-star or dash-dash; those
 					 * are comment starts, so operator must stop there.
@ -765,52 +871,61 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					base_yylval.str = mm_strdup(yytext);
 					return Op;
 				}
-<SQL>{param}		{
+
 {param}			{
 					base_yylval.ival = atol(yytext+1);
 					return PARAM;
 				}
 <C,SQL>{integer}	{
 						int val;
 						char* endptr;
-						errno = 0;
+{ip}			{
 						val = strtoint(yytext, &endptr, 10);
 						if (*endptr != '\0' || errno == ERANGE)
 						{
 							errno = 0;
 							base_yylval.str = mm_strdup(yytext);
 							return FCONST;
 						}
 						base_yylval.ival = val;
 						return ICONST;
 					}
 <SQL>{ip}			{
 					base_yylval.str = mm_strdup(yytext);
 					return IP;
 				}
-<C,SQL>{decimal}	{
+}  /* <SQL> */
 <C,SQL>{
 {integer}		{
 					return process_integer_literal(yytext, &base_yylval);
 				}
 {decimal}		{
 					base_yylval.str = mm_strdup(yytext);
 					return FCONST;
 				}
-<C,SQL>{real}		{
+{decimalfail}	{
 					/* throw back the .., and treat as integer */
 					yyless(yyleng - 2);
 					return process_integer_literal(yytext, &base_yylval);
 				}
 {real}			{
 					base_yylval.str = mm_strdup(yytext);
 					return FCONST;
 				}
-<SQL>{realfail1}	{
+{realfail1}		{
 					/*
 					 * throw back the [Ee], and treat as {decimal}.  Note
 					 * that it is possible the input is actually {integer},
 					 * but since this case will almost certainly lead to a
 					 * syntax error anyway, we don't bother to distinguish.
 					 */
 					yyless(yyleng - 1);
 					base_yylval.str = mm_strdup(yytext);
 					return FCONST;
 				}
-<SQL>{realfail2}	{
+{realfail2}		{
 					/* throw back the [Ee][+-], and proceed as above */
 					yyless(yyleng - 2);
 					base_yylval.str = mm_strdup(yytext);
 					return FCONST;
 				}
-<SQL>:{identifier}((("->"|\.){identifier})|(\[{array}\]))*	{
+} /* <C,SQL> */
 <SQL>{
 :{identifier}((("->"|\.){identifier})|(\[{array}\]))*	{
 					base_yylval.str = mm_strdup(yytext+1);
 					return CVARIABLE;
 				}
-<SQL>{identifier}	{
+
 {identifier}	{
 					const ScanKeyword  *keyword;
 					if (!isdefine())
@ -837,7 +952,16 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 						return IDENT;
 					}
 				}
-<SQL>{other}		{ return yytext[0]; }
+
 {other}			{
 					return yytext[0];
 				}
 } /* <SQL> */
 	/*
 	 * Begin ECPG-specific rules
 	 */
 <C>{exec_sql}		{ BEGIN(SQL); return SQL_START; }
 <C>{informix_special}	{
 						/* are we simulating Informix? */
@ -1288,6 +1412,7 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					}
 				}
 <INITIAL>{other}|\n	{ mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to <pgsql-bugs@postgresql.org>"); }
 %%
@ -1350,6 +1475,24 @@ addlitchar(unsigned char ychar)
 	literalbuf[literallen] = '\0';
 }
 static int
 process_integer_literal(const char *token, YYSTYPE *lval)
 {
 	int			val;
 	char	   *endptr;
 	errno = 0;
 	val = strtoint(token, &endptr, 10);
 	if (*endptr != '\0' || errno == ERANGE)
 	{
 		/* integer too large, treat it as a float */
 		lval->str = mm_strdup(token);
 		return FCONST;
 	}
 	lval->ival = val;
 	return ICONST;
 }
 static void
 parse_include(void)
 {