From 78ed8e03c67d7333708f5c1873ec1d239ae2d7e0 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Wed, 12 Jun 2013 13:35:24 -0400 Subject: [PATCH] Fix unescaping of JSON Unicode escapes, especially for non-UTF8. Per discussion on -hackers. We treat Unicode escapes when unescaping them similarly to the way we treat them in PostgreSQL string literals. Escapes in the ASCII range are always accepted, no matter what the database encoding. Escapes for higher code points are only processed in UTF8 databases, and attempts to process them in other databases will result in an error. \u0000 is never unescaped, since it would result in an impermissible null byte. --- doc/src/sgml/func.sgml | 11 + src/backend/utils/adt/json.c | 42 +- src/test/regress/expected/json.out | 23 +- src/test/regress/expected/json_1.out | 960 +++++++++++++++++++++++++++ src/test/regress/sql/json.sql | 8 +- 5 files changed, 1033 insertions(+), 11 deletions(-) create mode 100644 src/test/regress/expected/json_1.out diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 3adb36579e..4c5af4b83c 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -10159,6 +10159,17 @@ table2-mapping + + + Many of these functions and operators will convert Unicode escapes + in the JSON text to the appropriate UTF8 character when the database encoding is UTF8. In + other encodings the escape sequence must be for an ASCII character, and any other code point + in a Unicode escape sequence will result in an error. + In general, it is best to avoid mixing Unicode escapes in JSON with a non-UTF8 database + encoding, if possible. + + + The extension has a cast from hstore to diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index d8046c5b54..a1c7f51efa 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -717,7 +717,6 @@ json_lex_string(JsonLexContext *lex) { char utf8str[5]; int utf8len; - char *converted; if (ch >= 0xd800 && ch <= 0xdbff) { @@ -749,13 +748,40 @@ json_lex_string(JsonLexContext *lex) errdetail("low order surrogate must follow a high order surrogate."), report_json_context(lex))); - unicode_to_utf8(ch, (unsigned char *) utf8str); - utf8len = pg_utf_mblen((unsigned char *) utf8str); - utf8str[utf8len] = '\0'; - converted = pg_any_to_server(utf8str, utf8len, PG_UTF8); - appendStringInfoString(lex->strval, converted); - if (converted != utf8str) - pfree(converted); + /* + * For UTF8, replace the escape sequence by the actual utf8 + * character in lex->strval. Do this also for other encodings + * if the escape designates an ASCII character, otherwise + * raise an error. We don't ever unescape a \u0000, since that + * would result in an impermissible nul byte. + */ + + if (ch == 0) + { + appendStringInfoString(lex->strval, "\\u0000"); + } + else if (GetDatabaseEncoding() == PG_UTF8) + { + unicode_to_utf8(ch, (unsigned char *) utf8str); + utf8len = pg_utf_mblen((unsigned char *) utf8str); + appendBinaryStringInfo(lex->strval, utf8str, utf8len); + } + else if (ch <= 0x007f) + { + /* + * This is the only way to designate things like a form feed + * character in JSON, so it's useful in all encodings. + */ + appendStringInfoChar(lex->strval, (char) ch); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Unicode escape for code points higher than U+007F not permitted in non-UTF8 encoding"), + report_json_context(lex))); + } } } diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 293c742962..e5da9956bc 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -921,8 +921,8 @@ ERROR: cannot call json_populate_recordset on a nested object select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q; ERROR: cannot call json_populate_recordset on a nested object -- handling of unicode surrogate pairs -select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct; - correct +select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; + correct_in_utf8 ---------------------------- "\ud83d\ude04\ud83d\udc36" (1 row) @@ -943,3 +943,22 @@ select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate ERROR: invalid input syntax for type json DETAIL: low order surrogate must follow a high order surrogate. CONTEXT: JSON data, line 1: { "a":... +--handling of simple unicode escapes +select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; + correct_in_utf8 +---------------------- + the Copyright © sign +(1 row) + +select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; + correct_everywhere +-------------------- + dollar $ character +(1 row) + +select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped; + not_unescaped +-------------------- + null \u0000 escape +(1 row) + diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out new file mode 100644 index 0000000000..641f3458c7 --- /dev/null +++ b/src/test/regress/expected/json_1.out @@ -0,0 +1,960 @@ +-- Strings. +SELECT '""'::json; -- OK. + json +------ + "" +(1 row) + +SELECT $$''$$::json; -- ERROR, single quotes are not allowed +ERROR: invalid input syntax for type json +LINE 1: SELECT $$''$$::json; + ^ +DETAIL: Token "'" is invalid. +CONTEXT: JSON data, line 1: '... +SELECT '"abc"'::json; -- OK + json +------- + "abc" +(1 row) + +SELECT '"abc'::json; -- ERROR, quotes not closed +ERROR: invalid input syntax for type json +LINE 1: SELECT '"abc'::json; + ^ +DETAIL: Token ""abc" is invalid. +CONTEXT: JSON data, line 1: "abc +SELECT '"abc +def"'::json; -- ERROR, unescaped newline in string constant +ERROR: invalid input syntax for type json +LINE 1: SELECT '"abc + ^ +DETAIL: Character with value 0x0a must be escaped. +CONTEXT: JSON data, line 1: "abc +SELECT '"\n\"\\"'::json; -- OK, legal escapes + json +---------- + "\n\"\\" +(1 row) + +SELECT '"\v"'::json; -- ERROR, not a valid JSON escape +ERROR: invalid input syntax for type json +LINE 1: SELECT '"\v"'::json; + ^ +DETAIL: Escape sequence "\v" is invalid. +CONTEXT: JSON data, line 1: "\v... +SELECT '"\u"'::json; -- ERROR, incomplete escape +ERROR: invalid input syntax for type json +LINE 1: SELECT '"\u"'::json; + ^ +DETAIL: "\u" must be followed by four hexadecimal digits. +CONTEXT: JSON data, line 1: "\u" +SELECT '"\u00"'::json; -- ERROR, incomplete escape +ERROR: invalid input syntax for type json +LINE 1: SELECT '"\u00"'::json; + ^ +DETAIL: "\u" must be followed by four hexadecimal digits. +CONTEXT: JSON data, line 1: "\u00" +SELECT '"\u000g"'::json; -- ERROR, g is not a hex digit +ERROR: invalid input syntax for type json +LINE 1: SELECT '"\u000g"'::json; + ^ +DETAIL: "\u" must be followed by four hexadecimal digits. +CONTEXT: JSON data, line 1: "\u000g... +SELECT '"\u0000"'::json; -- OK, legal escape + json +---------- + "\u0000" +(1 row) + +SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK + json +---------- + "\uaBcD" +(1 row) + +-- Numbers. +SELECT '1'::json; -- OK + json +------ + 1 +(1 row) + +SELECT '0'::json; -- OK + json +------ + 0 +(1 row) + +SELECT '01'::json; -- ERROR, not valid according to JSON spec +ERROR: invalid input syntax for type json +LINE 1: SELECT '01'::json; + ^ +DETAIL: Token "01" is invalid. +CONTEXT: JSON data, line 1: 01 +SELECT '0.1'::json; -- OK + json +------ + 0.1 +(1 row) + +SELECT '9223372036854775808'::json; -- OK, even though it's too large for int8 + json +--------------------- + 9223372036854775808 +(1 row) + +SELECT '1e100'::json; -- OK + json +------- + 1e100 +(1 row) + +SELECT '1.3e100'::json; -- OK + json +--------- + 1.3e100 +(1 row) + +SELECT '1f2'::json; -- ERROR +ERROR: invalid input syntax for type json +LINE 1: SELECT '1f2'::json; + ^ +DETAIL: Token "1f2" is invalid. +CONTEXT: JSON data, line 1: 1f2 +SELECT '0.x1'::json; -- ERROR +ERROR: invalid input syntax for type json +LINE 1: SELECT '0.x1'::json; + ^ +DETAIL: Token "0.x1" is invalid. +CONTEXT: JSON data, line 1: 0.x1 +SELECT '1.3ex100'::json; -- ERROR +ERROR: invalid input syntax for type json +LINE 1: SELECT '1.3ex100'::json; + ^ +DETAIL: Token "1.3ex100" is invalid. +CONTEXT: JSON data, line 1: 1.3ex100 +-- Arrays. +SELECT '[]'::json; -- OK + json +------ + [] +(1 row) + +SELECT '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]'::json; -- OK + json +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] +(1 row) + +SELECT '[1,2]'::json; -- OK + json +------- + [1,2] +(1 row) + +SELECT '[1,2,]'::json; -- ERROR, trailing comma +ERROR: invalid input syntax for type json +LINE 1: SELECT '[1,2,]'::json; + ^ +DETAIL: Expected JSON value, but found "]". +CONTEXT: JSON data, line 1: [1,2,] +SELECT '[1,2'::json; -- ERROR, no closing bracket +ERROR: invalid input syntax for type json +LINE 1: SELECT '[1,2'::json; + ^ +DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: [1,2 +SELECT '[1,[2]'::json; -- ERROR, no closing bracket +ERROR: invalid input syntax for type json +LINE 1: SELECT '[1,[2]'::json; + ^ +DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: [1,[2] +-- Objects. +SELECT '{}'::json; -- OK + json +------ + {} +(1 row) + +SELECT '{"abc"}'::json; -- ERROR, no value +ERROR: invalid input syntax for type json +LINE 1: SELECT '{"abc"}'::json; + ^ +DETAIL: Expected ":", but found "}". +CONTEXT: JSON data, line 1: {"abc"} +SELECT '{"abc":1}'::json; -- OK + json +----------- + {"abc":1} +(1 row) + +SELECT '{1:"abc"}'::json; -- ERROR, keys must be strings +ERROR: invalid input syntax for type json +LINE 1: SELECT '{1:"abc"}'::json; + ^ +DETAIL: Expected string or "}", but found "1". +CONTEXT: JSON data, line 1: {1... +SELECT '{"abc",1}'::json; -- ERROR, wrong separator +ERROR: invalid input syntax for type json +LINE 1: SELECT '{"abc",1}'::json; + ^ +DETAIL: Expected ":", but found ",". +CONTEXT: JSON data, line 1: {"abc",... +SELECT '{"abc"=1}'::json; -- ERROR, totally wrong separator +ERROR: invalid input syntax for type json +LINE 1: SELECT '{"abc"=1}'::json; + ^ +DETAIL: Token "=" is invalid. +CONTEXT: JSON data, line 1: {"abc"=... +SELECT '{"abc"::1}'::json; -- ERROR, another wrong separator +ERROR: invalid input syntax for type json +LINE 1: SELECT '{"abc"::1}'::json; + ^ +DETAIL: Expected JSON value, but found ":". +CONTEXT: JSON data, line 1: {"abc"::... +SELECT '{"abc":1,"def":2,"ghi":[3,4],"hij":{"klm":5,"nop":[6]}}'::json; -- OK + json +--------------------------------------------------------- + {"abc":1,"def":2,"ghi":[3,4],"hij":{"klm":5,"nop":[6]}} +(1 row) + +SELECT '{"abc":1:2}'::json; -- ERROR, colon in wrong spot +ERROR: invalid input syntax for type json +LINE 1: SELECT '{"abc":1:2}'::json; + ^ +DETAIL: Expected "," or "}", but found ":". +CONTEXT: JSON data, line 1: {"abc":1:... +SELECT '{"abc":1,3}'::json; -- ERROR, no value +ERROR: invalid input syntax for type json +LINE 1: SELECT '{"abc":1,3}'::json; + ^ +DETAIL: Expected string, but found "3". +CONTEXT: JSON data, line 1: {"abc":1,3... +-- Miscellaneous stuff. +SELECT 'true'::json; -- OK + json +------ + true +(1 row) + +SELECT 'false'::json; -- OK + json +------- + false +(1 row) + +SELECT 'null'::json; -- OK + json +------ + null +(1 row) + +SELECT ' true '::json; -- OK, even with extra whitespace + json +-------- + true +(1 row) + +SELECT 'true false'::json; -- ERROR, too many values +ERROR: invalid input syntax for type json +LINE 1: SELECT 'true false'::json; + ^ +DETAIL: Expected end of input, but found "false". +CONTEXT: JSON data, line 1: true false +SELECT 'true, false'::json; -- ERROR, too many values +ERROR: invalid input syntax for type json +LINE 1: SELECT 'true, false'::json; + ^ +DETAIL: Expected end of input, but found ",". +CONTEXT: JSON data, line 1: true,... +SELECT 'truf'::json; -- ERROR, not a keyword +ERROR: invalid input syntax for type json +LINE 1: SELECT 'truf'::json; + ^ +DETAIL: Token "truf" is invalid. +CONTEXT: JSON data, line 1: truf +SELECT 'trues'::json; -- ERROR, not a keyword +ERROR: invalid input syntax for type json +LINE 1: SELECT 'trues'::json; + ^ +DETAIL: Token "trues" is invalid. +CONTEXT: JSON data, line 1: trues +SELECT ''::json; -- ERROR, no value +ERROR: invalid input syntax for type json +LINE 1: SELECT ''::json; + ^ +DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: +SELECT ' '::json; -- ERROR, no value +ERROR: invalid input syntax for type json +LINE 1: SELECT ' '::json; + ^ +DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: +--constructors +-- array_to_json +SELECT array_to_json(array(select 1 as a)); + array_to_json +--------------- + [1] +(1 row) + +SELECT array_to_json(array_agg(q),false) from (select x as b, x * 2 as c from generate_series(1,3) x) q; + array_to_json +--------------------------------------------- + [{"b":1,"c":2},{"b":2,"c":4},{"b":3,"c":6}] +(1 row) + +SELECT array_to_json(array_agg(q),true) from (select x as b, x * 2 as c from generate_series(1,3) x) q; + array_to_json +----------------- + [{"b":1,"c":2},+ + {"b":2,"c":4},+ + {"b":3,"c":6}] +(1 row) + +SELECT array_to_json(array_agg(q),false) + FROM ( SELECT $$a$$ || x AS b, y AS c, + ARRAY[ROW(x.*,ARRAY[1,2,3]), + ROW(y.*,ARRAY[4,5,6])] AS z + FROM generate_series(1,2) x, + generate_series(4,5) y) q; + array_to_json +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"b":"a1","c":4,"z":[{"f1":1,"f2":[1,2,3]},{"f1":4,"f2":[4,5,6]}]},{"b":"a1","c":5,"z":[{"f1":1,"f2":[1,2,3]},{"f1":5,"f2":[4,5,6]}]},{"b":"a2","c":4,"z":[{"f1":2,"f2":[1,2,3]},{"f1":4,"f2":[4,5,6]}]},{"b":"a2","c":5,"z":[{"f1":2,"f2":[1,2,3]},{"f1":5,"f2":[4,5,6]}]}] +(1 row) + +SELECT array_to_json(array_agg(x),false) from generate_series(5,10) x; + array_to_json +---------------- + [5,6,7,8,9,10] +(1 row) + +SELECT array_to_json('{{1,5},{99,100}}'::int[]); + array_to_json +------------------ + [[1,5],[99,100]] +(1 row) + +-- row_to_json +SELECT row_to_json(row(1,'foo')); + row_to_json +--------------------- + {"f1":1,"f2":"foo"} +(1 row) + +SELECT row_to_json(q) +FROM (SELECT $$a$$ || x AS b, + y AS c, + ARRAY[ROW(x.*,ARRAY[1,2,3]), + ROW(y.*,ARRAY[4,5,6])] AS z + FROM generate_series(1,2) x, + generate_series(4,5) y) q; + row_to_json +-------------------------------------------------------------------- + {"b":"a1","c":4,"z":[{"f1":1,"f2":[1,2,3]},{"f1":4,"f2":[4,5,6]}]} + {"b":"a1","c":5,"z":[{"f1":1,"f2":[1,2,3]},{"f1":5,"f2":[4,5,6]}]} + {"b":"a2","c":4,"z":[{"f1":2,"f2":[1,2,3]},{"f1":4,"f2":[4,5,6]}]} + {"b":"a2","c":5,"z":[{"f1":2,"f2":[1,2,3]},{"f1":5,"f2":[4,5,6]}]} +(4 rows) + +SELECT row_to_json(q,true) +FROM (SELECT $$a$$ || x AS b, + y AS c, + ARRAY[ROW(x.*,ARRAY[1,2,3]), + ROW(y.*,ARRAY[4,5,6])] AS z + FROM generate_series(1,2) x, + generate_series(4,5) y) q; + row_to_json +----------------------------------------------------- + {"b":"a1", + + "c":4, + + "z":[{"f1":1,"f2":[1,2,3]},{"f1":4,"f2":[4,5,6]}]} + {"b":"a1", + + "c":5, + + "z":[{"f1":1,"f2":[1,2,3]},{"f1":5,"f2":[4,5,6]}]} + {"b":"a2", + + "c":4, + + "z":[{"f1":2,"f2":[1,2,3]},{"f1":4,"f2":[4,5,6]}]} + {"b":"a2", + + "c":5, + + "z":[{"f1":2,"f2":[1,2,3]},{"f1":5,"f2":[4,5,6]}]} +(4 rows) + +CREATE TEMP TABLE rows AS +SELECT x, 'txt' || x as y +FROM generate_series(1,3) AS x; +SELECT row_to_json(q,true) +FROM rows q; + row_to_json +-------------- + {"x":1, + + "y":"txt1"} + {"x":2, + + "y":"txt2"} + {"x":3, + + "y":"txt3"} +(3 rows) + +SELECT row_to_json(row((select array_agg(x) as d from generate_series(5,10) x)),false); + row_to_json +----------------------- + {"f1":[5,6,7,8,9,10]} +(1 row) + +--json_agg +SELECT json_agg(q) + FROM ( SELECT $$a$$ || x AS b, y AS c, + ARRAY[ROW(x.*,ARRAY[1,2,3]), + ROW(y.*,ARRAY[4,5,6])] AS z + FROM generate_series(1,2) x, + generate_series(4,5) y) q; + json_agg +----------------------------------------------------------------------- + [{"b":"a1","c":4,"z":[{"f1":1,"f2":[1,2,3]},{"f1":4,"f2":[4,5,6]}]}, + + {"b":"a1","c":5,"z":[{"f1":1,"f2":[1,2,3]},{"f1":5,"f2":[4,5,6]}]}, + + {"b":"a2","c":4,"z":[{"f1":2,"f2":[1,2,3]},{"f1":4,"f2":[4,5,6]}]}, + + {"b":"a2","c":5,"z":[{"f1":2,"f2":[1,2,3]},{"f1":5,"f2":[4,5,6]}]}] +(1 row) + +SELECT json_agg(q) + FROM rows q; + json_agg +----------------------- + [{"x":1,"y":"txt1"}, + + {"x":2,"y":"txt2"}, + + {"x":3,"y":"txt3"}] +(1 row) + +-- non-numeric output +SELECT row_to_json(q) +FROM (SELECT 'NaN'::float8 AS "float8field") q; + row_to_json +----------------------- + {"float8field":"NaN"} +(1 row) + +SELECT row_to_json(q) +FROM (SELECT 'Infinity'::float8 AS "float8field") q; + row_to_json +---------------------------- + {"float8field":"Infinity"} +(1 row) + +SELECT row_to_json(q) +FROM (SELECT '-Infinity'::float8 AS "float8field") q; + row_to_json +----------------------------- + {"float8field":"-Infinity"} +(1 row) + +-- json input +SELECT row_to_json(q) +FROM (SELECT '{"a":1,"b": [2,3,4,"d","e","f"],"c":{"p":1,"q":2}}'::json AS "jsonfield") q; + row_to_json +------------------------------------------------------------------ + {"jsonfield":{"a":1,"b": [2,3,4,"d","e","f"],"c":{"p":1,"q":2}}} +(1 row) + +-- json extraction functions +CREATE TEMP TABLE test_json ( + json_type text, + test_json json +); +INSERT INTO test_json VALUES +('scalar','"a scalar"'), +('array','["zero", "one","two",null,"four","five"]'), +('object','{"field1":"val1","field2":"val2","field3":null}'); +SELECT test_json -> 'x' +FROM test_json +WHERE json_type = 'scalar'; +ERROR: cannot extract element from a scalar +SELECT test_json -> 'x' +FROM test_json +WHERE json_type = 'array'; +ERROR: cannot extract field from a non-object +SELECT test_json -> 'x' +FROM test_json +WHERE json_type = 'object'; + ?column? +---------- + +(1 row) + +SELECT test_json->'field2' +FROM test_json +WHERE json_type = 'object'; + ?column? +---------- + "val2" +(1 row) + +SELECT test_json->>'field2' +FROM test_json +WHERE json_type = 'object'; + ?column? +---------- + val2 +(1 row) + +SELECT test_json -> 2 +FROM test_json +WHERE json_type = 'scalar'; +ERROR: cannot extract element from a scalar +SELECT test_json -> 2 +FROM test_json +WHERE json_type = 'array'; + ?column? +---------- + "two" +(1 row) + +SELECT test_json -> 2 +FROM test_json +WHERE json_type = 'object'; +ERROR: cannot extract array element from a non-array +SELECT test_json->>2 +FROM test_json +WHERE json_type = 'array'; + ?column? +---------- + two +(1 row) + +SELECT json_object_keys(test_json) +FROM test_json +WHERE json_type = 'scalar'; +ERROR: cannot call json_object_keys on a scalar +SELECT json_object_keys(test_json) +FROM test_json +WHERE json_type = 'array'; +ERROR: cannot call json_object_keys on an array +SELECT json_object_keys(test_json) +FROM test_json +WHERE json_type = 'object'; + json_object_keys +------------------ + field1 + field2 + field3 +(3 rows) + +-- nulls +select (test_json->'field3') is null as expect_false +from test_json +where json_type = 'object'; + expect_false +-------------- + f +(1 row) + +select (test_json->>'field3') is null as expect_true +from test_json +where json_type = 'object'; + expect_true +------------- + t +(1 row) + +select (test_json->3) is null as expect_false +from test_json +where json_type = 'array'; + expect_false +-------------- + f +(1 row) + +select (test_json->>3) is null as expect_true +from test_json +where json_type = 'array'; + expect_true +------------- + t +(1 row) + +-- array length +SELECT json_array_length('[1,2,3,{"f1":1,"f2":[5,6]},4]'); + json_array_length +------------------- + 5 +(1 row) + +SELECT json_array_length('[]'); + json_array_length +------------------- + 0 +(1 row) + +SELECT json_array_length('{"f1":1,"f2":[5,6]}'); +ERROR: cannot get array length of a non-array +SELECT json_array_length('4'); +ERROR: cannot get array length of a scalar +-- each +select json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null}'); + json_each +------------------- + (f1,"[1,2,3]") + (f2,"{""f3"":1}") + (f4,null) +(3 rows) + +select * from json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":99,"f6":"stringy"}') q; + key | value +-----+----------- + f1 | [1,2,3] + f2 | {"f3":1} + f4 | null + f5 | 99 + f6 | "stringy" +(5 rows) + +select json_each_text('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":"null"}'); + json_each_text +------------------- + (f1,"[1,2,3]") + (f2,"{""f3"":1}") + (f4,) + (f5,null) +(4 rows) + +select * from json_each_text('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":99,"f6":"stringy"}') q; + key | value +-----+---------- + f1 | [1,2,3] + f2 | {"f3":1} + f4 | + f5 | 99 + f6 | stringy +(5 rows) + +-- extract_path, extract_path_as_text +select json_extract_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f4','f6'); + json_extract_path +------------------- + "stringy" +(1 row) + +select json_extract_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f2'); + json_extract_path +------------------- + {"f3":1} +(1 row) + +select json_extract_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',0::text); + json_extract_path +------------------- + "f3" +(1 row) + +select json_extract_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',1::text); + json_extract_path +------------------- + 1 +(1 row) + +select json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f4','f6'); + json_extract_path_text +------------------------ + stringy +(1 row) + +select json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f2'); + json_extract_path_text +------------------------ + {"f3":1} +(1 row) + +select json_extract_path_text('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',0::text); + json_extract_path_text +------------------------ + f3 +(1 row) + +select json_extract_path_text('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',1::text); + json_extract_path_text +------------------------ + 1 +(1 row) + +-- extract_path nulls +select json_extract_path('{"f2":{"f3":1},"f4":{"f5":null,"f6":"stringy"}}','f4','f5') is null as expect_false; + expect_false +-------------- + f +(1 row) + +select json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":null,"f6":"stringy"}}','f4','f5') is null as expect_true; + expect_true +------------- + t +(1 row) + +select json_extract_path('{"f2":{"f3":1},"f4":[0,1,2,null]}','f4','3') is null as expect_false; + expect_false +-------------- + f +(1 row) + +select json_extract_path_text('{"f2":{"f3":1},"f4":[0,1,2,null]}','f4','3') is null as expect_true; + expect_true +------------- + t +(1 row) + +-- extract_path operators +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f4','f6']; + ?column? +----------- + "stringy" +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f2']; + ?column? +---------- + {"f3":1} +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f2','0']; + ?column? +---------- + "f3" +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f2','1']; + ?column? +---------- + 1 +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f4','f6']; + ?column? +---------- + stringy +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f2']; + ?column? +---------- + {"f3":1} +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f2','0']; + ?column? +---------- + f3 +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f2','1']; + ?column? +---------- + 1 +(1 row) + +-- same using array literals +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f4,f6}'; + ?column? +----------- + "stringy" +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f2}'; + ?column? +---------- + {"f3":1} +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f2,0}'; + ?column? +---------- + "f3" +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f2,1}'; + ?column? +---------- + 1 +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f4,f6}'; + ?column? +---------- + stringy +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f2}'; + ?column? +---------- + {"f3":1} +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f2,0}'; + ?column? +---------- + f3 +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f2,1}'; + ?column? +---------- + 1 +(1 row) + +-- array_elements +select json_array_elements('[1,true,[1,[2,3]],null,{"f1":1,"f2":[7,8,9]},false]'); + json_array_elements +----------------------- + 1 + true + [1,[2,3]] + null + {"f1":1,"f2":[7,8,9]} + false +(6 rows) + +select * from json_array_elements('[1,true,[1,[2,3]],null,{"f1":1,"f2":[7,8,9]},false]') q; + value +----------------------- + 1 + true + [1,[2,3]] + null + {"f1":1,"f2":[7,8,9]} + false +(6 rows) + +-- populate_record +create type jpop as (a text, b int, c timestamp); +select * from json_populate_record(null::jpop,'{"a":"blurfl","x":43.2}') q; + a | b | c +--------+---+--- + blurfl | | +(1 row) + +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":"blurfl","x":43.2}') q; + a | b | c +--------+---+-------------------------- + blurfl | 3 | Mon Dec 31 15:30:56 2012 +(1 row) + +select * from json_populate_record(null::jpop,'{"a":"blurfl","x":43.2}', true) q; + a | b | c +--------+---+--- + blurfl | | +(1 row) + +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":"blurfl","x":43.2}', true) q; + a | b | c +--------+---+-------------------------- + blurfl | 3 | Mon Dec 31 15:30:56 2012 +(1 row) + +select * from json_populate_record(null::jpop,'{"a":[100,200,false],"x":43.2}', true) q; + a | b | c +-----------------+---+--- + [100,200,false] | | +(1 row) + +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":[100,200,false],"x":43.2}', true) q; + a | b | c +-----------------+---+-------------------------- + [100,200,false] | 3 | Mon Dec 31 15:30:56 2012 +(1 row) + +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"c":[100,200,false],"x":43.2}', true) q; +ERROR: invalid input syntax for type timestamp: "[100,200,false]" +-- populate_recordset +select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',false) q; + a | b | c +--------+---+-------------------------- + blurfl | | + | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',false) q; + a | b | c +--------+----+-------------------------- + blurfl | 99 | + def | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',true) q; + a | b | c +--------+---+-------------------------- + blurfl | | + | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',true) q; + a | b | c +--------+----+-------------------------- + blurfl | 99 | + def | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]',true) q; + a | b | c +---------------+----+-------------------------- + [100,200,300] | 99 | + {"z":true} | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]',true) q; +ERROR: invalid input syntax for type timestamp: "[100,200,300]" +-- using the default use_json_as_text argument +select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q; + a | b | c +--------+---+-------------------------- + blurfl | | + | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q; + a | b | c +--------+----+-------------------------- + blurfl | 99 | + def | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q; +ERROR: cannot call json_populate_recordset on a nested object +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q; +ERROR: cannot call json_populate_recordset on a nested object +-- handling of unicode surrogate pairs +select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; +ERROR: invalid input syntax for type json +DETAIL: Unicode escape for code points higher than U+007F not permitted in non-UTF8 encoding +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row +ERROR: invalid input syntax for type json +DETAIL: high order surrogate must not follow a high order surrogate. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order +ERROR: invalid input syntax for type json +DETAIL: low order surrogate must follow a high order surrogate. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate +ERROR: invalid input syntax for type json +DETAIL: low order surrogate must follow a high order surrogate. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate +ERROR: invalid input syntax for type json +DETAIL: low order surrogate must follow a high order surrogate. +CONTEXT: JSON data, line 1: { "a":... +--handling of simple unicode escapes +select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; +ERROR: invalid input syntax for type json +DETAIL: Unicode escape for code points higher than U+007F not permitted in non-UTF8 encoding +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; + correct_everywhere +-------------------- + dollar $ character +(1 row) + +select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped; + not_unescaped +-------------------- + null \u0000 escape +(1 row) + diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 5b6bc36517..4c4c6958bb 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -299,8 +299,14 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,3 -- handling of unicode surrogate pairs -select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct; +select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate + +--handling of simple unicode escapes + +select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; +select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; +select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped;