Fix JSON error reporting for many cases of erroneous string values.

The majority of error exit cases in json_lex_string() failed to
set lex->token_terminator, causing problems for the error context
reporting code: it would see token_terminator less than token_start
and do something more or less nuts.  In v14 and up the end result
could be as bad as a crash in report_json_context().  Older
versions accidentally avoided that fate; but all versions produce
error context lines that are far less useful than intended,
because they'd stop at the end of the prior token instead of
continuing to where the actually-bad input is.

To fix, invent some macros that make it less notationally painful
to do the right thing.  Also add documentation about what the
function is actually required to do; and in >= v14, add an assertion
in report_json_context about token_terminator being sufficiently
far advanced.

Per report from Nikolay Shaplov.  Back-patch to all supported
versions.

Discussion: https://postgr.es/m/7332649.x5DLKWyVIX@thinkpad-pgpro
This commit is contained in:
Tom Lane 2023-03-13 15:19:00 -04:00
parent 0736b11318
commit 234941a3bb
3 changed files with 57 additions and 32 deletions

View File

@ -750,6 +750,13 @@ json_lex(JsonLexContext *lex)
/* /*
* The next token in the input stream is known to be a string; lex it. * The next token in the input stream is known to be a string; lex it.
*
* If lex->strval isn't NULL, fill it with the decoded string.
* Set lex->token_terminator to the end of the decoded input, and in
* success cases, transfer its previous value to lex->prev_token_terminator.
*
* Note: be careful that all error cases advance lex->token_terminator
* to the point after the character we detected the error on.
*/ */
static inline void static inline void
json_lex_string(JsonLexContext *lex) json_lex_string(JsonLexContext *lex)
@ -837,33 +844,42 @@ json_lex_string(JsonLexContext *lex)
if (ch >= 0xd800 && ch <= 0xdbff) if (ch >= 0xd800 && ch <= 0xdbff)
{ {
if (hi_surrogate != -1) if (hi_surrogate != -1)
{
lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", errmsg("invalid input syntax for type %s",
"json"), "json"),
errdetail("Unicode high surrogate must not follow a high surrogate."), errdetail("Unicode high surrogate must not follow a high surrogate."),
report_json_context(lex))); report_json_context(lex)));
}
hi_surrogate = (ch & 0x3ff) << 10; hi_surrogate = (ch & 0x3ff) << 10;
continue; continue;
} }
else if (ch >= 0xdc00 && ch <= 0xdfff) else if (ch >= 0xdc00 && ch <= 0xdfff)
{ {
if (hi_surrogate == -1) if (hi_surrogate == -1)
{
lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"), errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."), errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex))); report_json_context(lex)));
}
ch = 0x10000 + hi_surrogate + (ch & 0x3ff); ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
hi_surrogate = -1; hi_surrogate = -1;
} }
if (hi_surrogate != -1) if (hi_surrogate != -1)
{
lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"), errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."), errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex))); report_json_context(lex)));
}
/* /*
* For UTF8, replace the escape sequence by the actual * For UTF8, replace the escape sequence by the actual
@ -875,6 +891,7 @@ json_lex_string(JsonLexContext *lex)
if (ch == 0) if (ch == 0)
{ {
/* We can't allow this, since our TEXT type doesn't */ /* We can't allow this, since our TEXT type doesn't */
lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"), errmsg("unsupported Unicode escape sequence"),
@ -898,24 +915,27 @@ json_lex_string(JsonLexContext *lex)
} }
else else
{ {
lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"), errmsg("unsupported Unicode escape sequence"),
errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."), errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
report_json_context(lex))); report_json_context(lex)));
} }
} }
} }
else if (lex->strval != NULL) else if (lex->strval != NULL)
{ {
if (hi_surrogate != -1) if (hi_surrogate != -1)
{
lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", errmsg("invalid input syntax for type %s",
"json"), "json"),
errdetail("Unicode low surrogate must follow a high surrogate."), errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex))); report_json_context(lex)));
}
switch (*s) switch (*s)
{ {
@ -968,16 +988,18 @@ json_lex_string(JsonLexContext *lex)
extract_mb_char(s)), extract_mb_char(s)),
report_json_context(lex))); report_json_context(lex)));
} }
} }
else if (lex->strval != NULL) else if (lex->strval != NULL)
{ {
if (hi_surrogate != -1) if (hi_surrogate != -1)
{
lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"), errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."), errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex))); report_json_context(lex)));
}
appendStringInfoChar(lex->strval, *s); appendStringInfoChar(lex->strval, *s);
} }
@ -985,11 +1007,14 @@ json_lex_string(JsonLexContext *lex)
} }
if (hi_surrogate != -1) if (hi_surrogate != -1)
{
lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"), errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."), errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex))); report_json_context(lex)));
}
/* Hooray, we found the end of the string! */ /* Hooray, we found the end of the string! */
lex->prev_token_terminator = lex->token_terminator; lex->prev_token_terminator = lex->token_terminator;

View File

@ -41,19 +41,19 @@ select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
DETAIL: Unicode high surrogate must not follow a high surrogate. DETAIL: Unicode high surrogate must not follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ude04...
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83dX...
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ude04...
--handling of simple unicode escapes --handling of simple unicode escapes
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8 correct_in_utf8
@ -106,7 +106,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
ERROR: unsupported Unicode escape sequence ERROR: unsupported Unicode escape sequence
DETAIL: \u0000 cannot be converted to text. DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "null \u0000...
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape not_an_escape
-------------------- --------------------
@ -144,7 +144,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT '"\u0000"'::jsonb; LINE 1: SELECT '"\u0000"'::jsonb;
^ ^
DETAIL: \u0000 cannot be converted to text. DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: ... CONTEXT: JSON data, line 1: "\u0000...
-- use octet_length here so we don't get an odd unicode char in the -- use octet_length here so we don't get an odd unicode char in the
-- output -- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@ -165,25 +165,25 @@ ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
^ ^
DETAIL: Unicode high surrogate must not follow a high surrogate. DETAIL: Unicode high surrogate must not follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
^ ^
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ude04...
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
^ ^
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83dX...
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
^ ^
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ude04...
-- handling of simple unicode escapes -- handling of simple unicode escapes
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8 correct_in_utf8
@ -208,7 +208,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
^ ^
DETAIL: \u0000 cannot be converted to text. DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
not_an_escape not_an_escape
------------------------------ ------------------------------
@ -238,7 +238,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai... LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
^ ^
DETAIL: \u0000 cannot be converted to text. DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape not_an_escape
-------------------- --------------------

View File

@ -35,23 +35,23 @@ SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
ERROR: unsupported Unicode escape sequence ERROR: unsupported Unicode escape sequence
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83d\ude04...
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
DETAIL: Unicode high surrogate must not follow a high surrogate. DETAIL: Unicode high surrogate must not follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ude04...
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83dX...
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ude04...
--handling of simple unicode escapes --handling of simple unicode escapes
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8 correct_in_utf8
@ -86,7 +86,7 @@ select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
ERROR: unsupported Unicode escape sequence ERROR: unsupported Unicode escape sequence
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "the Copyright \u00a9...
select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere correct_everywhere
-------------------- --------------------
@ -102,7 +102,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
ERROR: unsupported Unicode escape sequence ERROR: unsupported Unicode escape sequence
DETAIL: \u0000 cannot be converted to text. DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "null \u0000...
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape not_an_escape
-------------------- --------------------
@ -140,7 +140,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT '"\u0000"'::jsonb; LINE 1: SELECT '"\u0000"'::jsonb;
^ ^
DETAIL: \u0000 cannot be converted to text. DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: ... CONTEXT: JSON data, line 1: "\u0000...
-- use octet_length here so we don't get an odd unicode char in the -- use octet_length here so we don't get an odd unicode char in the
-- output -- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@ -148,45 +148,45 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text); LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
^ ^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
CONTEXT: JSON data, line 1: ... CONTEXT: JSON data, line 1: "\uaBcD...
-- handling of unicode surrogate pairs -- handling of unicode surrogate pairs
SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8; SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
ERROR: unsupported Unicode escape sequence ERROR: unsupported Unicode escape sequence
LINE 1: SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc3... LINE 1: SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc3...
^ ^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83d\ude04...
SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
^ ^
DETAIL: Unicode high surrogate must not follow a high surrogate. DETAIL: Unicode high surrogate must not follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
^ ^
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ude04...
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
^ ^
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ud83dX...
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
^ ^
DETAIL: Unicode low surrogate must follow a high surrogate. DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "\ude04...
-- handling of simple unicode escapes -- handling of simple unicode escapes
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
ERROR: unsupported Unicode escape sequence ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as corr... LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as corr...
^ ^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "the Copyright \u00a9...
SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
correct_everywhere correct_everywhere
----------------------------- -----------------------------
@ -204,7 +204,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
^ ^
DETAIL: \u0000 cannot be converted to text. DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
not_an_escape not_an_escape
------------------------------ ------------------------------
@ -216,7 +216,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a'... LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a'...
^ ^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "the Copyright \u00a9...
SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere correct_everywhere
-------------------- --------------------
@ -234,7 +234,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai... LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
^ ^
DETAIL: \u0000 cannot be converted to text. DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: { "a":... CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape not_an_escape
-------------------- --------------------