From 81fcc72e66222357f9bccce3eeda62eb2cb29849 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 31 Jan 2021 23:51:01 +0300 Subject: [PATCH] Filling array gaps during jsonb subscripting This commit introduces two new flags for jsonb assignment: * JB_PATH_FILL_GAPS: Appending array elements on the specified position, gaps are filled with nulls (similar to the JavaScript behavior). This mode also instructs to create the whole path in a jsonb object if some part of the path (more than just the last element) is not present. * JB_PATH_CONSISTENT_POSITION: Assigning keeps array positions consistent by preventing prepending of elements. Both flags are used only in jsonb subscripting assignment. Initially proposed by Nikita Glukhov based on polymorphic subscripting patch, but transformed into an independent change. Discussion: https://postgr.es/m/CA%2Bq6zcV8qvGcDXurwwgUbwACV86Th7G80pnubg42e-p9gsSf%3Dg%40mail.gmail.com Discussion: https://postgr.es/m/CA%2Bq6zcX3mdxGCgdThzuySwH-ApyHHM-G4oB1R0fn0j2hZqqkLQ%40mail.gmail.com Discussion: https://postgr.es/m/CA%2Bq6zcVDuGBv%3DM0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2Bq6zcVovR%2BXY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA%40mail.gmail.com Author: Dmitry Dolgov Reviewed-by: Tom Lane, Arthur Zakirov, Pavel Stehule, Dian M Fay Reviewed-by: Andrew Dunstan, Chapman Flack, Merlin Moncure, Peter Geoghegan Reviewed-by: Alvaro Herrera, Jim Nasby, Josh Berkus, Victor Wagner Reviewed-by: Aleksander Alekseev, Robert Haas, Oleg Bartunov --- doc/src/sgml/json.sgml | 24 +++ src/backend/utils/adt/jsonfuncs.c | 227 ++++++++++++++++++++++++++-- src/test/regress/expected/jsonb.out | 135 +++++++++++++++++ src/test/regress/sql/jsonb.sql | 81 ++++++++++ 4 files changed, 452 insertions(+), 15 deletions(-) diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml index 3ace5e444b..07bd19f974 100644 --- a/doc/src/sgml/json.sgml +++ b/doc/src/sgml/json.sgml @@ -648,6 +648,30 @@ UPDATE table_name SET jsonb_field['a'] = '1'; -- Where jsonb_field was NULL, it is now [1] UPDATE table_name SET jsonb_field[0] = '1'; + + + If an index is specified for an array containing too few elements, + NULL elements will be appended until the index is reachable + and the value can be set. + + +-- Where jsonb_field was [], it is now [null, null, 2]; +-- where jsonb_field was [0], it is now [0, null, 2] +UPDATE table_name SET jsonb_field[2] = '2'; + + + A jsonb value will accept assignments to nonexistent subscript + paths as long as the last existing path key is an object or an array. Since + the final subscript is not traversed, it may be an object key. Nested arrays + will be created and NULL-padded according to the path until + the value can be placed appropriately. + + +-- Where jsonb_field was {}, it is now {'a': [{'b': 1}]} +UPDATE table_name SET jsonb_field['a'][0]['b'] = '1'; + +-- Where jsonb_field was [], it is now [{'a': 1}] +UPDATE table_name SET jsonb_field[0]['a'] = '1'; diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 076cde0902..a0e1266e57 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -44,6 +44,8 @@ #define JB_PATH_INSERT_AFTER 0x0010 #define JB_PATH_CREATE_OR_INSERT \ (JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE) +#define JB_PATH_FILL_GAPS 0x0020 +#define JB_PATH_CONSISTENT_POSITION 0x0040 /* state for json_object_keys */ typedef struct OkeysState @@ -1634,14 +1636,117 @@ jsonb_set_element(Jsonb *jb, Datum *path, int path_len, it = JsonbIteratorInit(&jb->root); - res = setPath(&it, path, path_nulls, path_len, &state, 0, - newval, JB_PATH_CREATE); + res = setPath(&it, path, path_nulls, path_len, &state, 0, newval, + JB_PATH_CREATE | JB_PATH_FILL_GAPS | + JB_PATH_CONSISTENT_POSITION); pfree(path_nulls); PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); } +static void +push_null_elements(JsonbParseState **ps, int num) +{ + JsonbValue null; + + null.type = jbvNull; + + while (num-- > 0) + pushJsonbValue(ps, WJB_ELEM, &null); +} + +/* + * Prepare a new structure containing nested empty objects and arrays + * corresponding to the specified path, and assign a new value at the end of + * this path. E.g. the path [a][0][b] with the new value 1 will produce the + * structure {a: [{b: 1}]}. + * + * Called is responsible to make sure such path does not exist yet. + */ +static void +push_path(JsonbParseState **st, int level, Datum *path_elems, + bool *path_nulls, int path_len, JsonbValue *newval) +{ + /* + * tpath contains expected type of an empty jsonb created at each level + * higher or equal than the current one, either jbvObject or jbvArray. + * Since it contains only information about path slice from level to the + * end, the access index must be normalized by level. + */ + enum jbvType *tpath = palloc0((path_len - level) * sizeof(enum jbvType)); + long lindex; + JsonbValue newkey; + + /* + * Create first part of the chain with beginning tokens. For the current + * level WJB_BEGIN_OBJECT/WJB_BEGIN_ARRAY was already created, so start + * with the next one. + */ + for (int i = level + 1; i < path_len; i++) + { + char *c, + *badp; + + if (path_nulls[i]) + break; + + /* + * Try to convert to an integer to find out the expected type, object + * or array. + */ + c = TextDatumGetCString(path_elems[i]); + errno = 0; + lindex = strtol(c, &badp, 10); + if (errno != 0 || badp == c || *badp != '\0' || lindex > INT_MAX || + lindex < INT_MIN) + { + /* text, an object is expected */ + newkey.type = jbvString; + newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[i]); + newkey.val.string.val = VARDATA_ANY(path_elems[i]); + + (void) pushJsonbValue(st, WJB_BEGIN_OBJECT, NULL); + (void) pushJsonbValue(st, WJB_KEY, &newkey); + + tpath[i - level] = jbvObject; + } + else + { + /* integer, an array is expected */ + (void) pushJsonbValue(st, WJB_BEGIN_ARRAY, NULL); + + push_null_elements(st, lindex); + + tpath[i - level] = jbvArray; + } + + } + + /* Insert an actual value for either an object or array */ + if (tpath[(path_len - level) - 1] == jbvArray) + { + (void) pushJsonbValue(st, WJB_ELEM, newval); + } + else + (void) pushJsonbValue(st, WJB_VALUE, newval); + + /* + * Close everything up to the last but one level. The last one will be + * closed outside of this function. + */ + for (int i = path_len - 1; i > level; i--) + { + if (path_nulls[i]) + break; + + if (tpath[i - level] == jbvObject) + (void) pushJsonbValue(st, WJB_END_OBJECT, NULL); + else + (void) pushJsonbValue(st, WJB_END_ARRAY, NULL); + } +} + /* * Return the text representation of the given JsonbValue. */ @@ -4786,6 +4891,21 @@ IteratorConcat(JsonbIterator **it1, JsonbIterator **it2, * Bits JB_PATH_INSERT_BEFORE and JB_PATH_INSERT_AFTER in op_type * behave as JB_PATH_CREATE if new value is inserted in JsonbObject. * + * If JB_PATH_FILL_GAPS bit is set, this will change an assignment logic in + * case if target is an array. The assignment index will not be restricted by + * number of elements in the array, and if there are any empty slots between + * last element of the array and a new one they will be filled with nulls. If + * the index is negative, it still will be considered an an index from the end + * of the array. Of a part of the path is not present and this part is more + * than just one last element, this flag will instruct to create the whole + * chain of corresponding objects and insert the value. + * + * JB_PATH_CONSISTENT_POSITION for an array indicates that the called wants to + * keep values with fixed indices. Indices for existing elements could be + * changed (shifted forward) in case if the array is prepended with a new value + * and a negative index out of the range, so this behavior will be prevented + * and return an error. + * * All path elements before the last must already exist * whatever bits in op_type are set, or nothing is done. */ @@ -4880,6 +5000,8 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, memcmp(k.val.string.val, VARDATA_ANY(path_elems[level]), k.val.string.len) == 0) { + done = true; + if (level == path_len - 1) { /* @@ -4899,7 +5021,6 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, (void) pushJsonbValue(st, WJB_KEY, &k); (void) pushJsonbValue(st, WJB_VALUE, newval); } - done = true; } else { @@ -4944,6 +5065,31 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, } } } + + /*-- + * If we got here there are only few possibilities: + * - no target path was found, and an open object with some keys/values was + * pushed into the state + * - an object is empty, only WJB_BEGIN_OBJECT is pushed + * + * In both cases if instructed to create the path when not present, + * generate the whole chain of empty objects and insert the new value + * there. + */ + if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1)) + { + JsonbValue newkey; + + newkey.type = jbvString; + newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[level]); + newkey.val.string.val = VARDATA_ANY(path_elems[level]); + + (void) pushJsonbValue(st, WJB_KEY, &newkey); + (void) push_path(st, level, path_elems, path_nulls, + path_len, newval); + + /* Result is closed with WJB_END_OBJECT outside of this function */ + } } /* @@ -4982,25 +5128,48 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if (idx < 0) { if (-idx > nelems) - idx = INT_MIN; + { + /* + * If asked to keep elements position consistent, it's not allowed + * to prepend the array. + */ + if (op_type & JB_PATH_CONSISTENT_POSITION) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("path element at position %d is out of range: %d", + level + 1, idx))); + else + idx = INT_MIN; + } else idx = nelems + idx; } - if (idx > 0 && idx > nelems) - idx = nelems; + /* + * Filling the gaps means there are no limits on the positive index are + * imposed, we can set any element. Otherwise limit the index by nelems. + */ + if (!(op_type & JB_PATH_FILL_GAPS)) + { + if (idx > 0 && idx > nelems) + idx = nelems; + } /* * if we're creating, and idx == INT_MIN, we prepend the new value to the * array also if the array is empty - in which case we don't really care * what the idx value is */ - if ((idx == INT_MIN || nelems == 0) && (level == path_len - 1) && (op_type & JB_PATH_CREATE_OR_INSERT)) { Assert(newval != NULL); + + if (op_type & JB_PATH_FILL_GAPS && nelems == 0 && idx > 0) + push_null_elements(st, idx); + (void) pushJsonbValue(st, WJB_ELEM, newval); + done = true; } @@ -5011,6 +5180,8 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if (i == idx && level < path_len) { + done = true; + if (level == path_len - 1) { r = JsonbIteratorNext(it, &v, true); /* skip */ @@ -5028,8 +5199,6 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_REPLACE)) (void) pushJsonbValue(st, WJB_ELEM, newval); - - done = true; } else (void) setPath(it, path_elems, path_nulls, path_len, @@ -5057,14 +5226,42 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL); } } - - if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && - level == path_len - 1 && i == nelems - 1) - { - (void) pushJsonbValue(st, WJB_ELEM, newval); - } } } + + if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && level == path_len - 1) + { + /* + * If asked to fill the gaps, idx could be bigger than nelems, so + * prepend the new element with nulls if that's the case. + */ + if (op_type & JB_PATH_FILL_GAPS && idx > nelems) + push_null_elements(st, idx - nelems); + + (void) pushJsonbValue(st, WJB_ELEM, newval); + done = true; + } + + /*-- + * If we got here there are only few possibilities: + * - no target path was found, and an open array with some keys/values was + * pushed into the state + * - an array is empty, only WJB_BEGIN_ARRAY is pushed + * + * In both cases if instructed to create the path when not present, + * generate the whole chain of empty objects and insert the new value + * there. + */ + if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1)) + { + if (idx > 0) + push_null_elements(st, idx - nelems); + + (void) push_path(st, level, path_elems, path_nulls, + path_len, newval); + + /* Result is closed with WJB_END_OBJECT outside of this function */ + } } /* diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index 46bf2e2353..5b5510c4fd 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -4999,6 +4999,141 @@ select * from test_jsonb_subscript; 3 | [1] (3 rows) +-- Fill the gaps logic +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[0]'); +update test_jsonb_subscript set test_json[5] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+-------------------------------- + 1 | [0, null, null, null, null, 1] +(1 row) + +update test_jsonb_subscript set test_json[-4] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+----------------------------- + 1 | [0, null, 1, null, null, 1] +(1 row) + +update test_jsonb_subscript set test_json[-8] = '1'; +ERROR: path element at position 1 is out of range: -8 +select * from test_jsonb_subscript; + id | test_json +----+----------------------------- + 1 | [0, null, 1, null, null, 1] +(1 row) + +-- keep consistent values position +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[]'); +update test_jsonb_subscript set test_json[5] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+----------------------------------- + 1 | [null, null, null, null, null, 1] +(1 row) + +-- create the whole path +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json['a'][0]['b'][0]['c'] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+---------------------------- + 1 | {"a": [{"b": [{"c": 1}]}]} +(1 row) + +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json['a'][2]['b'][2]['c'][2] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+------------------------------------------------------------------ + 1 | {"a": [null, null, {"b": [null, null, {"c": [null, null, 1]}]}]} +(1 row) + +-- create the whole path with already existing keys +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{"b": 1}'); +update test_jsonb_subscript set test_json['a'][0] = '2'; +select * from test_jsonb_subscript; + id | test_json +----+-------------------- + 1 | {"a": [2], "b": 1} +(1 row) + +-- the start jsonb is an object, first subscript is treated as a key +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json[0]['a'] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+----------------- + 1 | {"0": {"a": 1}} +(1 row) + +-- the start jsonb is an array +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[]'); +update test_jsonb_subscript set test_json[0]['a'] = '1'; +update test_jsonb_subscript set test_json[2]['b'] = '2'; +select * from test_jsonb_subscript; + id | test_json +----+---------------------------- + 1 | [{"a": 1}, null, {"b": 2}] +(1 row) + +-- overwriting an existing path +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json['a']['b'][1] = '1'; +update test_jsonb_subscript set test_json['a']['b'][10] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+---------------------------------------------------------------------------- + 1 | {"a": {"b": [null, 1, null, null, null, null, null, null, null, null, 1]}} +(1 row) + +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[]'); +update test_jsonb_subscript set test_json[0][0][0] = '1'; +update test_jsonb_subscript set test_json[0][0][1] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+------------ + 1 | [[[1, 1]]] +(1 row) + +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json['a']['b'][10] = '1'; +update test_jsonb_subscript set test_json['a'][10][10] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+------------------------------------------------------------------------------------------------------------------------------------------------------ + 1 | {"a": {"b": [null, null, null, null, null, null, null, null, null, null, 1], "10": [null, null, null, null, null, null, null, null, null, null, 1]}} +(1 row) + +-- an empty sub element +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{"a": {}}'); +update test_jsonb_subscript set test_json['a']['b']['c'][2] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+-------------------------------------- + 1 | {"a": {"b": {"c": [null, null, 1]}}} +(1 row) + +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{"a": []}'); +update test_jsonb_subscript set test_json['a'][1]['c'][2] = '1'; +select * from test_jsonb_subscript; + id | test_json +----+--------------------------------------- + 1 | {"a": [null, {"c": [null, null, 1]}]} +(1 row) + -- jsonb to tsvector select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); to_tsvector diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 20aa8fe0e2..0320db0ea4 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -1290,6 +1290,87 @@ update test_jsonb_subscript set test_json = NULL where id = 3; update test_jsonb_subscript set test_json[0] = '1'; select * from test_jsonb_subscript; +-- Fill the gaps logic +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[0]'); + +update test_jsonb_subscript set test_json[5] = '1'; +select * from test_jsonb_subscript; + +update test_jsonb_subscript set test_json[-4] = '1'; +select * from test_jsonb_subscript; + +update test_jsonb_subscript set test_json[-8] = '1'; +select * from test_jsonb_subscript; + +-- keep consistent values position +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[]'); + +update test_jsonb_subscript set test_json[5] = '1'; +select * from test_jsonb_subscript; + +-- create the whole path +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json['a'][0]['b'][0]['c'] = '1'; +select * from test_jsonb_subscript; + +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json['a'][2]['b'][2]['c'][2] = '1'; +select * from test_jsonb_subscript; + +-- create the whole path with already existing keys +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{"b": 1}'); +update test_jsonb_subscript set test_json['a'][0] = '2'; +select * from test_jsonb_subscript; + +-- the start jsonb is an object, first subscript is treated as a key +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json[0]['a'] = '1'; +select * from test_jsonb_subscript; + +-- the start jsonb is an array +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[]'); +update test_jsonb_subscript set test_json[0]['a'] = '1'; +update test_jsonb_subscript set test_json[2]['b'] = '2'; +select * from test_jsonb_subscript; + +-- overwriting an existing path +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json['a']['b'][1] = '1'; +update test_jsonb_subscript set test_json['a']['b'][10] = '1'; +select * from test_jsonb_subscript; + +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[]'); +update test_jsonb_subscript set test_json[0][0][0] = '1'; +update test_jsonb_subscript set test_json[0][0][1] = '1'; +select * from test_jsonb_subscript; + +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{}'); +update test_jsonb_subscript set test_json['a']['b'][10] = '1'; +update test_jsonb_subscript set test_json['a'][10][10] = '1'; +select * from test_jsonb_subscript; + +-- an empty sub element + +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{"a": {}}'); +update test_jsonb_subscript set test_json['a']['b']['c'][2] = '1'; +select * from test_jsonb_subscript; + +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '{"a": []}'); +update test_jsonb_subscript set test_json['a'][1]['c'][2] = '1'; +select * from test_jsonb_subscript; + -- jsonb to tsvector select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);