diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index 2165ffcc25..e35636883a 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -59,25 +59,24 @@ fprintf_to_ereport(const char *fmt, const char *msg) %option noyyfree /* - * We use exclusive states for quoted, signle-quoted and non-quoted strings, - * quoted variable names and C-tyle comments. + * We use exclusive states for quoted and non-quoted strings, + * quoted variable names and C-style comments. * Exclusive states: * - quoted strings * - non-quoted strings * - quoted variable names - * - single-quoted strings * - C-style comment */ %x xq %x xnq %x xvq -%x xsq %x xc -special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] -any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f] +special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] blank [ \t\n\r\f] +/* "other" means anything that's not special, blank, or '\' or '"' */ +other [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f] digit [0-9] integer (0|[1-9]{digit}*) @@ -95,7 +94,7 @@ hex_fail \\x{hex_dig}{0,1} %% -{any}+ { +{other}+ { addstring(false, yytext, yyleng); } @@ -105,13 +104,12 @@ hex_fail \\x{hex_dig}{0,1} return checkKeyword(); } - \/\* { yylval->str = scanstring; BEGIN xc; } -({special}|\"|\') { +({special}|\") { yylval->str = scanstring; yyless(0); BEGIN INITIAL; @@ -124,39 +122,37 @@ hex_fail \\x{hex_dig}{0,1} return checkKeyword(); } -\\[\"\'\\] { addchar(false, yytext[1]); } +\\b { addchar(false, '\b'); } -\\b { addchar(false, '\b'); } +\\f { addchar(false, '\f'); } -\\f { addchar(false, '\f'); } +\\n { addchar(false, '\n'); } -\\n { addchar(false, '\n'); } +\\r { addchar(false, '\r'); } -\\r { addchar(false, '\r'); } +\\t { addchar(false, '\t'); } -\\t { addchar(false, '\t'); } +\\v { addchar(false, '\v'); } -\\v { addchar(false, '\v'); } +{unicode}+ { parseUnicode(yytext, yyleng); } -{unicode}+ { parseUnicode(yytext, yyleng); } +{hex_char} { parseHexChar(yytext); } -{hex_char} { parseHexChar(yytext); } +{unicode}*{unicodefail} { yyerror(NULL, "invalid unicode sequence"); } -{unicode}*{unicodefail} { yyerror(NULL, "invalid unicode sequence"); } +{hex_fail} { yyerror(NULL, "invalid hex character sequence"); } -{hex_fail} { yyerror(NULL, "invalid hex character sequence"); } +{unicode}+\\ { + /* throw back the \\, and treat as unicode */ + yyless(yyleng - 1); + parseUnicode(yytext, yyleng); + } -{unicode}+\\ { - /* throw back the \\, and treat as unicode */ - yyless(yyleng - 1); - parseUnicode(yytext, yyleng); - } +\\. { addchar(false, yytext[1]); } -\\. { yyerror(NULL, "escape sequence is invalid"); } +\\ { yyerror(NULL, "unexpected end after backslash"); } -\\ { yyerror(NULL, "unexpected end after backslash"); } - -<> { yyerror(NULL, "unexpected end of quoted string"); } +<> { yyerror(NULL, "unexpected end of quoted string"); } \" { yylval->str = scanstring; @@ -170,16 +166,8 @@ hex_fail \\x{hex_dig}{0,1} return VARIABLE_P; } -\' { - yylval->str = scanstring; - BEGIN INITIAL; - return STRING_P; - } - [^\\\"]+ { addstring(false, yytext, yyleng); } -[^\\\']+ { addstring(false, yytext, yyleng); } - \*\/ { BEGIN INITIAL; } [^\*]+ { } @@ -210,7 +198,7 @@ hex_fail \\x{hex_dig}{0,1} \> { return GREATER_P; } -\${any}+ { +\${other}+ { addstring(true, yytext + 1, yyleng - 1); addchar(false, '\0'); yylval->str = scanstring; @@ -263,27 +251,22 @@ hex_fail \\x{hex_dig}{0,1} ({realfail1}|{realfail2}) { yyerror(NULL, "invalid floating point number"); } -{any}+ { - addstring(true, yytext, yyleng); - BEGIN xnq; - } - \" { addchar(true, '\0'); BEGIN xq; } -\' { - addchar(true, '\0'); - BEGIN xsq; - } - \\ { yyless(0); addchar(true, '\0'); BEGIN xnq; } +{other}+ { + addstring(true, yytext, yyleng); + BEGIN xnq; + } + <> { yyterminate(); } %% diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out index ea42ae367a..fc971dc408 100644 --- a/src/test/regress/expected/jsonpath.out +++ b/src/test/regress/expected/jsonpath.out @@ -171,30 +171,24 @@ select '"\b\f\r\n\t\v\"\''\\"'::jsonpath; "\b\f\r\n\t\u000b\"'\\" (1 row) -select '''\b\f\r\n\t\v\"\''\\'''::jsonpath; - jsonpath -------------------------- - "\b\f\r\n\t\u000b\"'\\" -(1 row) - select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath; jsonpath ---------- "PgSQL" (1 row) -select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath; - jsonpath ----------- - "PgSQL" -(1 row) - select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath; jsonpath --------------------- $."fooPgSQL\t\"bar" (1 row) +select '"\z"'::jsonpath; -- unrecognized escape is just the literal char + jsonpath +---------- + "z" +(1 row) + select '$.g ? ($.a == 1)'::jsonpath; jsonpath -------------------- diff --git a/src/test/regress/expected/jsonpath_encoding.out b/src/test/regress/expected/jsonpath_encoding.out index 8db6e47dbb..ecffe095b5 100644 --- a/src/test/regress/expected/jsonpath_encoding.out +++ b/src/test/regress/expected/jsonpath_encoding.out @@ -81,84 +81,6 @@ select '"null \\u0000 escape"'::jsonpath as not_an_escape; "null \\u0000 escape" (1 row) --- checks for single-quoted values --- basic unicode input -SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape -ERROR: invalid Unicode escape -LINE 1: SELECT E'\'\u\''::jsonpath; - ^ -HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. -SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape -ERROR: invalid Unicode escape -LINE 1: SELECT E'\'\u00\''::jsonpath; - ^ -HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. -SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit -ERROR: invalid Unicode escape -LINE 1: SELECT E'\'\u000g\''::jsonpath; - ^ -HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. -SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape -ERROR: invalid Unicode escape value at or near "E'\'\u0000" -LINE 1: SELECT E'\'\u0000\''::jsonpath; - ^ -SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK - jsonpath ----------- - "ꯍ" -(1 row) - --- handling of unicode surrogate pairs -select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8; - correct_in_utf8 ------------------ - "😄🐶" -(1 row) - -select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row -ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d" -LINE 1: select E'\'\ud83d\ud83d\''::jsonpath; - ^ -select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order -ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04" -LINE 1: select E'\'\ude04\ud83d\''::jsonpath; - ^ -select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate -ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83dX" -LINE 1: select E'\'\ud83dX\''::jsonpath; - ^ -select E'\'\ude04X\''::jsonpath; -- orphan low surrogate -ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04" -LINE 1: select E'\'\ude04X\''::jsonpath; - ^ ---handling of simple unicode escapes -select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8; - correct_in_utf8 ------------------------- - "the Copyright © sign" -(1 row) - -select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere; - correct_everywhere ----------------------- - "dollar $ character" -(1 row) - -select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape; - not_an_escape ----------------------- - "dollar $ character" -(1 row) - -select E'\'null \u0000 escape\''::jsonpath as not_unescaped; -ERROR: invalid Unicode escape value at or near "E'\'null \u0000" -LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped; - ^ -select E'\'null \\u0000 escape\''::jsonpath as not_an_escape; -ERROR: unsupported Unicode escape sequence -LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape... - ^ -DETAIL: \u0000 cannot be converted to text. -- checks for quoted key names -- basic unicode input SELECT '$."\u"'::jsonpath; -- ERROR, incomplete escape diff --git a/src/test/regress/expected/jsonpath_encoding_1.out b/src/test/regress/expected/jsonpath_encoding_1.out index e6dff25d45..c8cc2173a8 100644 --- a/src/test/regress/expected/jsonpath_encoding_1.out +++ b/src/test/regress/expected/jsonpath_encoding_1.out @@ -78,78 +78,6 @@ select '"null \\u0000 escape"'::jsonpath as not_an_escape; "null \\u0000 escape" (1 row) --- checks for single-quoted values --- basic unicode input -SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape -ERROR: invalid Unicode escape -LINE 1: SELECT E'\'\u\''::jsonpath; - ^ -HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. -SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape -ERROR: invalid Unicode escape -LINE 1: SELECT E'\'\u00\''::jsonpath; - ^ -HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. -SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit -ERROR: invalid Unicode escape -LINE 1: SELECT E'\'\u000g\''::jsonpath; - ^ -HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. -SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape -ERROR: invalid Unicode escape value at or near "E'\'\u0000" -LINE 1: SELECT E'\'\u0000\''::jsonpath; - ^ -SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK -ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\uaBcD" -LINE 1: SELECT E'\'\uaBcD\''::jsonpath; - ^ --- handling of unicode surrogate pairs -select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8; -ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\ud83d\ude04" -LINE 1: select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_... - ^ -select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row -ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d" -LINE 1: select E'\'\ud83d\ud83d\''::jsonpath; - ^ -select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order -ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04" -LINE 1: select E'\'\ude04\ud83d\''::jsonpath; - ^ -select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate -ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83dX" -LINE 1: select E'\'\ud83dX\''::jsonpath; - ^ -select E'\'\ude04X\''::jsonpath; -- orphan low surrogate -ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04" -LINE 1: select E'\'\ude04X\''::jsonpath; - ^ ---handling of simple unicode escapes -select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8; -ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'the Copyright \u00a9" -LINE 1: select E'\'the Copyright \u00a9 sign\''::jsonpath as correct... - ^ -select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere; - correct_everywhere ----------------------- - "dollar $ character" -(1 row) - -select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape; - not_an_escape ----------------------- - "dollar $ character" -(1 row) - -select E'\'null \u0000 escape\''::jsonpath as not_unescaped; -ERROR: invalid Unicode escape value at or near "E'\'null \u0000" -LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped; - ^ -select E'\'null \\u0000 escape\''::jsonpath as not_an_escape; -ERROR: unsupported Unicode escape sequence -LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape... - ^ -DETAIL: \u0000 cannot be converted to text. -- checks for quoted key names -- basic unicode input SELECT '$."\u"'::jsonpath; -- ERROR, incomplete escape diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql index 29ea77a485..7afe2528c3 100644 --- a/src/test/regress/sql/jsonpath.sql +++ b/src/test/regress/sql/jsonpath.sql @@ -30,10 +30,9 @@ select '$.a/+-1'::jsonpath; select '1 * 2 + 4 % -3 != false'::jsonpath; select '"\b\f\r\n\t\v\"\''\\"'::jsonpath; -select '''\b\f\r\n\t\v\"\''\\'''::jsonpath; select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath; -select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath; select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath; +select '"\z"'::jsonpath; -- unrecognized escape is just the literal char select '$.g ? ($.a == 1)'::jsonpath; select '$.g ? (@ == 1)'::jsonpath; diff --git a/src/test/regress/sql/jsonpath_encoding.sql b/src/test/regress/sql/jsonpath_encoding.sql index a3b5bc39a1..3a23b72818 100644 --- a/src/test/regress/sql/jsonpath_encoding.sql +++ b/src/test/regress/sql/jsonpath_encoding.sql @@ -24,29 +24,6 @@ select '"dollar \\u0024 character"'::jsonpath as not_an_escape; select '"null \u0000 escape"'::jsonpath as not_unescaped; select '"null \\u0000 escape"'::jsonpath as not_an_escape; --- checks for single-quoted values - --- basic unicode input -SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape -SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape -SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit -SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape -SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK - --- handling of unicode surrogate pairs -select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8; -select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row -select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order -select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate -select E'\'\ude04X\''::jsonpath; -- orphan low surrogate - ---handling of simple unicode escapes -select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8; -select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere; -select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape; -select E'\'null \u0000 escape\''::jsonpath as not_unescaped; -select E'\'null \\u0000 escape\''::jsonpath as not_an_escape; - -- checks for quoted key names -- basic unicode input