From 89ddd29bbd70c31652c6e7a179473753b89a3cac Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Fri, 17 Jul 2015 20:56:13 -0400 Subject: [PATCH] Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan --- doc/src/sgml/func.sgml | 16 +++-- src/backend/utils/adt/json.c | 39 ++++++++++ src/backend/utils/adt/jsonfuncs.c | 100 +++++++++++++++++++------- src/include/utils/jsonapi.h | 7 ++ src/test/regress/expected/json.out | 14 ++++ src/test/regress/expected/json_1.out | 14 ++++ src/test/regress/expected/jsonb.out | 30 ++++++++ src/test/regress/expected/jsonb_1.out | 30 ++++++++ src/test/regress/sql/json.sql | 5 ++ src/test/regress/sql/jsonb.sql | 5 ++ 10 files changed, 231 insertions(+), 29 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 99923f46bc..ef50fa5811 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -10177,7 +10177,8 @@ table2-mapping -> int - Get JSON array element (indexed from zero) + Get JSON array element (indexed from zero, negative + integers count from the end) '[{"a":"foo"},{"b":"bar"},{"c":"baz"}]'::json->2 {"c":"baz"} @@ -10230,7 +10231,10 @@ table2-mapping returning text, which coerce the value to text. The field/element/path extraction operators return NULL, rather than failing, if the JSON input does not have the right structure to match - the request; for example if no such element exists. + the request; for example if no such element exists. The + field/element/path extraction operators that accept integer JSON + array subscripts all support negative subscripting from the end of + arrays. @@ -10318,7 +10322,8 @@ table2-mapping #- text[] - Delete the field or element with specified path + Delete the field or element with specified path (for + JSON arrays, negative integers count from the end) '["a", {"b":1}]'::jsonb #- '{1,b}' @@ -10858,6 +10863,9 @@ table2-mapping create_missing is true ( default is true) and the item designated by path does not exist. + As with the path orientated operators, negative integers that + appear in path count from the end + of JSON arrays. jsonb_set('[{"f1":1,"f2":null},2,null,3]', '{0,f1}','[2,3,4]', false) jsonb_set('[{"f1":1,"f2":null},2]', '{0,f3}','[2,3,4]') @@ -10872,7 +10880,7 @@ table2-mapping text Returns from_json - as indented json text. + as indented JSON text. jsonb_pretty('[{"f1":1,"f2":null},2,null,3]') diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 26d3843369..8d0434767a 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -340,6 +340,45 @@ pg_parse_json(JsonLexContext *lex, JsonSemAction *sem) } +/* + * json_count_array_elements + * + * Returns number of array elements in lex context at start of array token + * until end of array token at same nesting level. + * + * Designed to be called from array_start routines. + */ +int +json_count_array_elements(JsonLexContext *lex) +{ + JsonLexContext copylex; + int count; + + /* + * It's safe to do this with a shallow copy because the lexical routines + * don't scribble on the input. They do scribble on the other pointers etc, + * so doing this with a copy makes that safe. + */ + memcpy(©lex, lex, sizeof(JsonLexContext)); + copylex.strval = NULL; /* not interested in values here */ + copylex.lex_level++; + + count = 0; + lex_expect(JSON_PARSE_ARRAY_START, ©lex, JSON_TOKEN_ARRAY_START); + if (lex_peek(©lex) != JSON_TOKEN_ARRAY_END) + { + do + { + count++; + parse_array_element(©lex, &nullSemAction); + } + while (lex_accept(©lex, JSON_TOKEN_COMMA, NULL)); + } + lex_expect(JSON_PARSE_ARRAY_NEXT, ©lex, JSON_TOKEN_ARRAY_END); + + return count; +} + /* * Recursive Descent parse routines. There is one for each structural * element in a json document: diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 13d5b7af2f..424280b929 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -597,6 +597,17 @@ jsonb_array_element(PG_FUNCTION_ARGS) if (!JB_ROOT_IS_ARRAY(jb)) PG_RETURN_NULL(); + /* Handle negative subscript */ + if (element < 0) + { + uint32 nelements = JB_ROOT_COUNT(jb); + + if (-element > nelements) + PG_RETURN_NULL(); + else + element += nelements; + } + v = getIthJsonbValueFromContainer(&jb->root, element); if (v != NULL) PG_RETURN_JSONB(JsonbValueToJsonb(v)); @@ -629,6 +640,17 @@ jsonb_array_element_text(PG_FUNCTION_ARGS) if (!JB_ROOT_IS_ARRAY(jb)) PG_RETURN_NULL(); + /* Handle negative subscript */ + if (element < 0) + { + uint32 nelements = JB_ROOT_COUNT(jb); + + if (-element > nelements) + PG_RETURN_NULL(); + else + element += nelements; + } + v = getIthJsonbValueFromContainer(&jb->root, element); if (v != NULL) { @@ -719,7 +741,7 @@ get_path_all(FunctionCallInfo fcinfo, bool as_text) /* * we have no idea at this stage what structure the document is so * just convert anything in the path that we can to an integer and set - * all the other integers to -1 which will never match. + * all the other integers to INT_MIN which will never match. */ if (*tpath[i] != '\0') { @@ -728,13 +750,13 @@ get_path_all(FunctionCallInfo fcinfo, bool as_text) errno = 0; ind = strtol(tpath[i], &endptr, 10); - if (*endptr == '\0' && errno == 0 && ind <= INT_MAX && ind >= 0) + if (*endptr == '\0' && errno == 0 && ind <= INT_MAX && ind >= INT_MIN) ipath[i] = (int) ind; else - ipath[i] = -1; + ipath[i] = INT_MIN; } else - ipath[i] = -1; + ipath[i] = INT_MIN; } result = get_worker(json, tpath, ipath, npath, as_text); @@ -752,14 +774,15 @@ get_path_all(FunctionCallInfo fcinfo, bool as_text) * * json: JSON object (in text form) * tpath[]: field name(s) to extract - * ipath[]: array index(es) (zero-based) to extract + * ipath[]: array index(es) (zero-based) to extract, accepts negatives * npath: length of tpath[] and/or ipath[] * normalize_results: true to de-escape string and null scalars * * tpath can be NULL, or any one tpath[] entry can be NULL, if an object * field is not to be matched at that nesting level. Similarly, ipath can - * be NULL, or any one ipath[] entry can be -1, if an array element is not - * to be matched at that nesting level. + * be NULL, or any one ipath[] entry can be INT_MIN if an array element is + * not to be matched at that nesting level (a json datum should never be + * large enough to have -INT_MIN elements due to MaxAllocSize restriction). */ static text * get_worker(text *json, @@ -964,6 +987,17 @@ get_array_start(void *state) */ _state->result_start = _state->lex->token_start; } + + /* INT_MIN value is reserved to represent invalid subscript */ + if (_state->path_indexes[lex_level] < 0 && + _state->path_indexes[lex_level] != INT_MIN) + { + /* Negative subscript -- convert to positive-wise subscript */ + int nelements = json_count_array_elements(_state->lex); + + if (-_state->path_indexes[lex_level] <= nelements) + _state->path_indexes[lex_level] += nelements; + } } static void @@ -1209,9 +1243,30 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) errno = 0; lindex = strtol(indextext, &endptr, 10); if (endptr == indextext || *endptr != '\0' || errno != 0 || - lindex > INT_MAX || lindex < 0) + lindex > INT_MAX || lindex < INT_MIN) PG_RETURN_NULL(); - index = (uint32) lindex; + + if (lindex >= 0) + { + index = (uint32) lindex; + } + else + { + /* Handle negative subscript */ + uint32 nelements; + + /* Container must be array, but make sure */ + if ((container->header & JB_FARRAY) == 0) + elog(ERROR, "not a jsonb array"); + + nelements = container->header & JB_CMASK; + + if (-lindex > nelements) + PG_RETURN_NULL(); + else + index = nelements + lindex; + } + jbvp = getIthJsonbValueFromContainer(container, index); } else @@ -3411,10 +3466,8 @@ jsonb_delete_idx(PG_FUNCTION_ARGS) it = JsonbIteratorInit(&in->root); r = JsonbIteratorNext(&it, &v, false); - if (r == WJB_BEGIN_ARRAY) - n = v.val.array.nElems; - else - n = v.val.object.nPairs; + Assert (r == WJB_BEGIN_ARRAY); + n = v.val.array.nElems; if (idx < 0) { @@ -3431,14 +3484,10 @@ jsonb_delete_idx(PG_FUNCTION_ARGS) while ((r = JsonbIteratorNext(&it, &v, true)) != 0) { - if (r == WJB_ELEM || r == WJB_KEY) + if (r == WJB_ELEM) { if (i++ == idx) - { - if (r == WJB_KEY) - JsonbIteratorNext(&it, &v, true); /* skip value */ continue; - } } res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL); @@ -3657,7 +3706,7 @@ IteratorConcat(JsonbIterator **it1, JsonbIterator **it2, * If newval is null, the element is to be removed. * * If create is true, we create the new value if the key or array index - * does not exist. All path elemnts before the last must already exist + * does not exist. All path elements before the last must already exist * whether or not create is true, or nothing is done. */ static JsonbValue * @@ -3818,7 +3867,8 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, errno = 0; lindex = strtol(c, &badp, 10); - if (errno != 0 || badp == c || lindex > INT_MAX || lindex < INT_MIN) + if (errno != 0 || badp == c || *badp != '\0' || lindex > INT_MAX || + lindex < INT_MIN) idx = nelems; else idx = lindex; @@ -3829,7 +3879,7 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if (idx < 0) { if (-idx > nelems) - idx = -1; + idx = INT_MIN; else idx = nelems + idx; } @@ -3838,12 +3888,12 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, idx = nelems; /* - * if we're creating, and idx == -1, we prepend the new value to the array - * also if the array is empty - in which case we don't really care what - * the idx value is + * if we're creating, and idx == INT_MIN, we prepend the new value to the + * array also if the array is empty - in which case we don't really care + * what the idx value is */ - if ((idx == -1 || nelems == 0) && create && (level == path_len - 1)) + if ((idx == INT_MIN || nelems == 0) && create && (level == path_len - 1)) { Assert(newval != NULL); addJsonbToParseState(st, newval); diff --git a/src/include/utils/jsonapi.h b/src/include/utils/jsonapi.h index 296d20af83..55cfb791fa 100644 --- a/src/include/utils/jsonapi.h +++ b/src/include/utils/jsonapi.h @@ -103,6 +103,13 @@ typedef struct JsonSemAction */ extern void pg_parse_json(JsonLexContext *lex, JsonSemAction *sem); +/* + * json_count_array_elements performs a fast secondary parse to determine the + * number of elements in passed array lex context. It should be called from an + * array_start action. + */ +extern int json_count_array_elements(JsonLexContext *lex); + /* * constructors for JsonLexContext, with or without strval element. * If supplied, the strval element will contain a de-escaped version of diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 3942c3bee9..43ca67dddf 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -569,6 +569,14 @@ WHERE json_type = 'array'; "two" (1 row) +SELECT test_json -> -1 +FROM test_json +WHERE json_type = 'array'; + ?column? +---------- + {"f1":9} +(1 row) + SELECT test_json -> 2 FROM test_json WHERE json_type = 'object'; @@ -698,6 +706,12 @@ select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> 1; (1 row) +select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> -1; + ?column? +---------- + +(1 row) + select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> 'z'; ?column? ---------- diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out index 38f1526288..155f414ea4 100644 --- a/src/test/regress/expected/json_1.out +++ b/src/test/regress/expected/json_1.out @@ -569,6 +569,14 @@ WHERE json_type = 'array'; "two" (1 row) +SELECT test_json -> -1 +FROM test_json +WHERE json_type = 'array'; + ?column? +---------- + {"f1":9} +(1 row) + SELECT test_json -> 2 FROM test_json WHERE json_type = 'object'; @@ -698,6 +706,12 @@ select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> 1; (1 row) +select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> -1; + ?column? +---------- + +(1 row) + select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> 'z'; ?column? ---------- diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index 4416d52611..0ccc0f7a79 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -2590,6 +2590,18 @@ SELECT '["a","b","c",[1,2],null]'::jsonb -> 5; SELECT '["a","b","c",[1,2],null]'::jsonb -> -1; ?column? ---------- + null +(1 row) + +SELECT '["a","b","c",[1,2],null]'::jsonb -> -5; + ?column? +---------- + "a" +(1 row) + +SELECT '["a","b","c",[1,2],null]'::jsonb -> -6; + ?column? +---------- (1 row) @@ -2639,6 +2651,18 @@ SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,3}'; SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,-1}'; ?column? ---------- + 3 +(1 row) + +SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,-3}'; + ?column? +---------- + 1 +(1 row) + +SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,-4}'; + ?column? +---------- (1 row) @@ -3121,6 +3145,12 @@ select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{ {"a": 1, "b": [1], "c": {"1": 2}, "d": {"1": [2, 3]}, "n": null} (1 row) +select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{b,-1e}'; -- invalid array subscript + ?column? +--------------------------------------------------------------------- + {"a": 1, "b": [1, 2], "c": {"1": 2}, "d": {"1": [2, 3]}, "n": null} +(1 row) + select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{d,1,0}'; ?column? ------------------------------------------------------------------ diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out index 6d67655cf6..7b23a99357 100644 --- a/src/test/regress/expected/jsonb_1.out +++ b/src/test/regress/expected/jsonb_1.out @@ -2590,6 +2590,18 @@ SELECT '["a","b","c",[1,2],null]'::jsonb -> 5; SELECT '["a","b","c",[1,2],null]'::jsonb -> -1; ?column? ---------- + null +(1 row) + +SELECT '["a","b","c",[1,2],null]'::jsonb -> -5; + ?column? +---------- + "a" +(1 row) + +SELECT '["a","b","c",[1,2],null]'::jsonb -> -6; + ?column? +---------- (1 row) @@ -2639,6 +2651,18 @@ SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,3}'; SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,-1}'; ?column? ---------- + 3 +(1 row) + +SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,-3}'; + ?column? +---------- + 1 +(1 row) + +SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,-4}'; + ?column? +---------- (1 row) @@ -3121,6 +3145,12 @@ select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{ {"a": 1, "b": [1], "c": {"1": 2}, "d": {"1": [2, 3]}, "n": null} (1 row) +select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{b,-1e}'; -- invalid array subscript + ?column? +--------------------------------------------------------------------- + {"a": 1, "b": [1, 2], "c": {"1": 2}, "d": {"1": [2, 3]}, "n": null} +(1 row) + select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{d,1,0}'; ?column? ------------------------------------------------------------------ diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 53832a01fa..8c3b73f5b3 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -184,6 +184,10 @@ SELECT test_json -> 2 FROM test_json WHERE json_type = 'array'; +SELECT test_json -> -1 +FROM test_json +WHERE json_type = 'array'; + SELECT test_json -> 2 FROM test_json WHERE json_type = 'object'; @@ -241,6 +245,7 @@ where json_type = 'array'; select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> null::text; select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> null::int; select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> 1; +select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> -1; select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> 'z'; select '{"a": [{"b": "c"}, {"b": "cc"}]}'::json -> ''; select '[{"b": "c"}, {"b": "cc"}]'::json -> 1; diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index febdeeb797..3d2d8abfc1 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -642,6 +642,8 @@ SELECT '["a","b","c",[1,2],null]'::jsonb -> 3 -> 1; SELECT '["a","b","c",[1,2],null]'::jsonb -> 4; SELECT '["a","b","c",[1,2],null]'::jsonb -> 5; SELECT '["a","b","c",[1,2],null]'::jsonb -> -1; +SELECT '["a","b","c",[1,2],null]'::jsonb -> -5; +SELECT '["a","b","c",[1,2],null]'::jsonb -> -6; --nested path extraction SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{0}'; @@ -652,6 +654,8 @@ SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,1}'; SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,2}'; SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,3}'; SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,-1}'; +SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,-3}'; +SELECT '{"a":"b","c":[1,2,3]}'::jsonb #> '{c,-4}'; SELECT '[0,1,2,[3,4],{"5":"five"}]'::jsonb #> '{0}'; SELECT '[0,1,2,[3,4],{"5":"five"}]'::jsonb #> '{3}'; @@ -757,6 +761,7 @@ select jsonb_delete_path('{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2, select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{n}'; select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{b,-1}'; +select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{b,-1e}'; -- invalid array subscript select '{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb #- '{d,1,0}';