postgresql/src/test/regress/expected/json_encoding.out
Andrew Dunstan b6363772fd Factor out encoding specific tests for json
This lets us remove the large alternative results files for the main
json and jsonb tests, which makes modifying those tests simpler for
committers and patch submitters.

Backpatch to 9.4 for jsonb and 9.3 for json.
2015-10-07 22:18:27 -04:00

248 lines
8.2 KiB
Plaintext

-- encoding-sensitive tests for json and jsonb
-- first json
-- basic unicode input
SELECT '"\u"'::json; -- ERROR, incomplete escape
ERROR: invalid input syntax for type json
LINE 1: SELECT '"\u"'::json;
^
DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u"
SELECT '"\u00"'::json; -- ERROR, incomplete escape
ERROR: invalid input syntax for type json
LINE 1: SELECT '"\u00"'::json;
^
DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u00"
SELECT '"\u000g"'::json; -- ERROR, g is not a hex digit
ERROR: invalid input syntax for type json
LINE 1: SELECT '"\u000g"'::json;
^
DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u000g...
SELECT '"\u0000"'::json; -- OK, legal escape
json
----------
"\u0000"
(1 row)
SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK
json
----------
"\uaBcD"
(1 row)
-- handling of unicode surrogate pairs
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
correct_in_utf8
----------------------------
"\ud83d\ude04\ud83d\udc36"
(1 row)
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json
DETAIL: Unicode high surrogate must not follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
--handling of simple unicode escapes
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8
---------------------------------------
{ "a": "the Copyright \u00a9 sign" }
(1 row)
select json '{ "a": "dollar \u0024 character" }' as correct_everywhere;
correct_everywhere
-------------------------------------
{ "a": "dollar \u0024 character" }
(1 row)
select json '{ "a": "dollar \\u0024 character" }' as not_an_escape;
not_an_escape
--------------------------------------
{ "a": "dollar \\u0024 character" }
(1 row)
select json '{ "a": "null \u0000 escape" }' as not_unescaped;
not_unescaped
--------------------------------
{ "a": "null \u0000 escape" }
(1 row)
select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
not_an_escape
---------------------------------
{ "a": "null \\u0000 escape" }
(1 row)
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
correct_in_utf8
----------------------
the Copyright © sign
(1 row)
select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere
--------------------
dollar $ character
(1 row)
select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
not_an_escape
-------------------------
dollar \u0024 character
(1 row)
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
ERROR: unsupported Unicode escape sequence
DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: { "a":...
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------
null \u0000 escape
(1 row)
-- then jsonb
-- basic unicode input
SELECT '"\u"'::jsonb; -- ERROR, incomplete escape
ERROR: invalid input syntax for type json
LINE 1: SELECT '"\u"'::jsonb;
^
DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u"
SELECT '"\u00"'::jsonb; -- ERROR, incomplete escape
ERROR: invalid input syntax for type json
LINE 1: SELECT '"\u00"'::jsonb;
^
DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u00"
SELECT '"\u000g"'::jsonb; -- ERROR, g is not a hex digit
ERROR: invalid input syntax for type json
LINE 1: SELECT '"\u000g"'::jsonb;
^
DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u000g...
SELECT '"\u0045"'::jsonb; -- OK, legal escape
jsonb
-------
"E"
(1 row)
SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000
ERROR: unsupported Unicode escape sequence
LINE 1: SELECT '"\u0000"'::jsonb;
^
DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: ...
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
octet_length
--------------
5
(1 row)
-- handling of unicode surrogate pairs
SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
correct_in_utf8
-----------------
10
(1 row)
SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
^
DETAIL: Unicode high surrogate must not follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
-- handling of simple unicode escapes
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8
-------------------------------
{"a": "the Copyright © sign"}
(1 row)
SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
correct_everywhere
-----------------------------
{"a": "dollar $ character"}
(1 row)
SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape;
not_an_escape
-----------------------------------
{"a": "dollar \\u0024 character"}
(1 row)
SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
^
DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: { "a":...
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
not_an_escape
------------------------------
{"a": "null \\u0000 escape"}
(1 row)
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
correct_in_utf8
----------------------
the Copyright © sign
(1 row)
SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere
--------------------
dollar $ character
(1 row)
SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
not_an_escape
-------------------------
dollar \u0024 character
(1 row)
SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
^
DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: { "a":...
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------
null \u0000 escape
(1 row)