Filling array gaps during jsonb subscripting

This commit introduces two new flags for jsonb assignment:

* JB_PATH_FILL_GAPS: Appending array elements on the specified position, gaps
  are filled with nulls (similar to the JavaScript behavior).  This mode also
  instructs to   create the whole path in a jsonb object if some part of the
  path (more than just the last element) is not present.

* JB_PATH_CONSISTENT_POSITION: Assigning keeps array positions consistent by
  preventing prepending of elements.

Both flags are used only in jsonb subscripting assignment.

Initially proposed by Nikita Glukhov based on polymorphic subscripting
patch, but transformed into an independent change.

Discussion: https://postgr.es/m/CA%2Bq6zcV8qvGcDXurwwgUbwACV86Th7G80pnubg42e-p9gsSf%3Dg%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2Bq6zcX3mdxGCgdThzuySwH-ApyHHM-G4oB1R0fn0j2hZqqkLQ%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2Bq6zcVDuGBv%3DM0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2Bq6zcVovR%2BXY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA%40mail.gmail.com
Author: Dmitry Dolgov
Reviewed-by: Tom Lane, Arthur Zakirov, Pavel Stehule, Dian M Fay
Reviewed-by: Andrew Dunstan, Chapman Flack, Merlin Moncure, Peter Geoghegan
Reviewed-by: Alvaro Herrera, Jim Nasby, Josh Berkus, Victor Wagner
Reviewed-by: Aleksander Alekseev, Robert Haas, Oleg Bartunov
This commit is contained in:
Alexander Korotkov 2021-01-31 23:51:01 +03:00
parent 676887a3b0
commit 81fcc72e66
4 changed files with 452 additions and 15 deletions

View File

@ -648,6 +648,30 @@ UPDATE table_name SET jsonb_field['a'] = '1';
-- Where jsonb_field was NULL, it is now [1]
UPDATE table_name SET jsonb_field[0] = '1';
</programlisting>
If an index is specified for an array containing too few elements,
<literal>NULL</literal> elements will be appended until the index is reachable
and the value can be set.
<programlisting>
-- Where jsonb_field was [], it is now [null, null, 2];
-- where jsonb_field was [0], it is now [0, null, 2]
UPDATE table_name SET jsonb_field[2] = '2';
</programlisting>
A <type>jsonb</type> value will accept assignments to nonexistent subscript
paths as long as the last existing path key is an object or an array. Since
the final subscript is not traversed, it may be an object key. Nested arrays
will be created and <literal>NULL</literal>-padded according to the path until
the value can be placed appropriately.
<programlisting>
-- Where jsonb_field was {}, it is now {'a': [{'b': 1}]}
UPDATE table_name SET jsonb_field['a'][0]['b'] = '1';
-- Where jsonb_field was [], it is now [{'a': 1}]
UPDATE table_name SET jsonb_field[0]['a'] = '1';
</programlisting>
</para>

View File

@ -44,6 +44,8 @@
#define JB_PATH_INSERT_AFTER 0x0010
#define JB_PATH_CREATE_OR_INSERT \
(JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE)
#define JB_PATH_FILL_GAPS 0x0020
#define JB_PATH_CONSISTENT_POSITION 0x0040
/* state for json_object_keys */
typedef struct OkeysState
@ -1634,14 +1636,117 @@ jsonb_set_element(Jsonb *jb, Datum *path, int path_len,
it = JsonbIteratorInit(&jb->root);
res = setPath(&it, path, path_nulls, path_len, &state, 0,
newval, JB_PATH_CREATE);
res = setPath(&it, path, path_nulls, path_len, &state, 0, newval,
JB_PATH_CREATE | JB_PATH_FILL_GAPS |
JB_PATH_CONSISTENT_POSITION);
pfree(path_nulls);
PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
}
static void
push_null_elements(JsonbParseState **ps, int num)
{
JsonbValue null;
null.type = jbvNull;
while (num-- > 0)
pushJsonbValue(ps, WJB_ELEM, &null);
}
/*
* Prepare a new structure containing nested empty objects and arrays
* corresponding to the specified path, and assign a new value at the end of
* this path. E.g. the path [a][0][b] with the new value 1 will produce the
* structure {a: [{b: 1}]}.
*
* Called is responsible to make sure such path does not exist yet.
*/
static void
push_path(JsonbParseState **st, int level, Datum *path_elems,
bool *path_nulls, int path_len, JsonbValue *newval)
{
/*
* tpath contains expected type of an empty jsonb created at each level
* higher or equal than the current one, either jbvObject or jbvArray.
* Since it contains only information about path slice from level to the
* end, the access index must be normalized by level.
*/
enum jbvType *tpath = palloc0((path_len - level) * sizeof(enum jbvType));
long lindex;
JsonbValue newkey;
/*
* Create first part of the chain with beginning tokens. For the current
* level WJB_BEGIN_OBJECT/WJB_BEGIN_ARRAY was already created, so start
* with the next one.
*/
for (int i = level + 1; i < path_len; i++)
{
char *c,
*badp;
if (path_nulls[i])
break;
/*
* Try to convert to an integer to find out the expected type, object
* or array.
*/
c = TextDatumGetCString(path_elems[i]);
errno = 0;
lindex = strtol(c, &badp, 10);
if (errno != 0 || badp == c || *badp != '\0' || lindex > INT_MAX ||
lindex < INT_MIN)
{
/* text, an object is expected */
newkey.type = jbvString;
newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[i]);
newkey.val.string.val = VARDATA_ANY(path_elems[i]);
(void) pushJsonbValue(st, WJB_BEGIN_OBJECT, NULL);
(void) pushJsonbValue(st, WJB_KEY, &newkey);
tpath[i - level] = jbvObject;
}
else
{
/* integer, an array is expected */
(void) pushJsonbValue(st, WJB_BEGIN_ARRAY, NULL);
push_null_elements(st, lindex);
tpath[i - level] = jbvArray;
}
}
/* Insert an actual value for either an object or array */
if (tpath[(path_len - level) - 1] == jbvArray)
{
(void) pushJsonbValue(st, WJB_ELEM, newval);
}
else
(void) pushJsonbValue(st, WJB_VALUE, newval);
/*
* Close everything up to the last but one level. The last one will be
* closed outside of this function.
*/
for (int i = path_len - 1; i > level; i--)
{
if (path_nulls[i])
break;
if (tpath[i - level] == jbvObject)
(void) pushJsonbValue(st, WJB_END_OBJECT, NULL);
else
(void) pushJsonbValue(st, WJB_END_ARRAY, NULL);
}
}
/*
* Return the text representation of the given JsonbValue.
*/
@ -4786,6 +4891,21 @@ IteratorConcat(JsonbIterator **it1, JsonbIterator **it2,
* Bits JB_PATH_INSERT_BEFORE and JB_PATH_INSERT_AFTER in op_type
* behave as JB_PATH_CREATE if new value is inserted in JsonbObject.
*
* If JB_PATH_FILL_GAPS bit is set, this will change an assignment logic in
* case if target is an array. The assignment index will not be restricted by
* number of elements in the array, and if there are any empty slots between
* last element of the array and a new one they will be filled with nulls. If
* the index is negative, it still will be considered an an index from the end
* of the array. Of a part of the path is not present and this part is more
* than just one last element, this flag will instruct to create the whole
* chain of corresponding objects and insert the value.
*
* JB_PATH_CONSISTENT_POSITION for an array indicates that the called wants to
* keep values with fixed indices. Indices for existing elements could be
* changed (shifted forward) in case if the array is prepended with a new value
* and a negative index out of the range, so this behavior will be prevented
* and return an error.
*
* All path elements before the last must already exist
* whatever bits in op_type are set, or nothing is done.
*/
@ -4880,6 +5000,8 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
memcmp(k.val.string.val, VARDATA_ANY(path_elems[level]),
k.val.string.len) == 0)
{
done = true;
if (level == path_len - 1)
{
/*
@ -4899,7 +5021,6 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
(void) pushJsonbValue(st, WJB_KEY, &k);
(void) pushJsonbValue(st, WJB_VALUE, newval);
}
done = true;
}
else
{
@ -4944,6 +5065,31 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
}
}
}
/*--
* If we got here there are only few possibilities:
* - no target path was found, and an open object with some keys/values was
* pushed into the state
* - an object is empty, only WJB_BEGIN_OBJECT is pushed
*
* In both cases if instructed to create the path when not present,
* generate the whole chain of empty objects and insert the new value
* there.
*/
if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1))
{
JsonbValue newkey;
newkey.type = jbvString;
newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[level]);
newkey.val.string.val = VARDATA_ANY(path_elems[level]);
(void) pushJsonbValue(st, WJB_KEY, &newkey);
(void) push_path(st, level, path_elems, path_nulls,
path_len, newval);
/* Result is closed with WJB_END_OBJECT outside of this function */
}
}
/*
@ -4982,25 +5128,48 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
if (idx < 0)
{
if (-idx > nelems)
idx = INT_MIN;
{
/*
* If asked to keep elements position consistent, it's not allowed
* to prepend the array.
*/
if (op_type & JB_PATH_CONSISTENT_POSITION)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("path element at position %d is out of range: %d",
level + 1, idx)));
else
idx = INT_MIN;
}
else
idx = nelems + idx;
}
if (idx > 0 && idx > nelems)
idx = nelems;
/*
* Filling the gaps means there are no limits on the positive index are
* imposed, we can set any element. Otherwise limit the index by nelems.
*/
if (!(op_type & JB_PATH_FILL_GAPS))
{
if (idx > 0 && idx > nelems)
idx = nelems;
}
/*
* if we're creating, and idx == INT_MIN, we prepend the new value to the
* array also if the array is empty - in which case we don't really care
* what the idx value is
*/
if ((idx == INT_MIN || nelems == 0) && (level == path_len - 1) &&
(op_type & JB_PATH_CREATE_OR_INSERT))
{
Assert(newval != NULL);
if (op_type & JB_PATH_FILL_GAPS && nelems == 0 && idx > 0)
push_null_elements(st, idx);
(void) pushJsonbValue(st, WJB_ELEM, newval);
done = true;
}
@ -5011,6 +5180,8 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
if (i == idx && level < path_len)
{
done = true;
if (level == path_len - 1)
{
r = JsonbIteratorNext(it, &v, true); /* skip */
@ -5028,8 +5199,6 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_REPLACE))
(void) pushJsonbValue(st, WJB_ELEM, newval);
done = true;
}
else
(void) setPath(it, path_elems, path_nulls, path_len,
@ -5057,14 +5226,42 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
(void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
}
}
if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done &&
level == path_len - 1 && i == nelems - 1)
{
(void) pushJsonbValue(st, WJB_ELEM, newval);
}
}
}
if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && level == path_len - 1)
{
/*
* If asked to fill the gaps, idx could be bigger than nelems, so
* prepend the new element with nulls if that's the case.
*/
if (op_type & JB_PATH_FILL_GAPS && idx > nelems)
push_null_elements(st, idx - nelems);
(void) pushJsonbValue(st, WJB_ELEM, newval);
done = true;
}
/*--
* If we got here there are only few possibilities:
* - no target path was found, and an open array with some keys/values was
* pushed into the state
* - an array is empty, only WJB_BEGIN_ARRAY is pushed
*
* In both cases if instructed to create the path when not present,
* generate the whole chain of empty objects and insert the new value
* there.
*/
if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1))
{
if (idx > 0)
push_null_elements(st, idx - nelems);
(void) push_path(st, level, path_elems, path_nulls,
path_len, newval);
/* Result is closed with WJB_END_OBJECT outside of this function */
}
}
/*

View File

@ -4999,6 +4999,141 @@ select * from test_jsonb_subscript;
3 | [1]
(3 rows)
-- Fill the gaps logic
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '[0]');
update test_jsonb_subscript set test_json[5] = '1';
select * from test_jsonb_subscript;
id | test_json
----+--------------------------------
1 | [0, null, null, null, null, 1]
(1 row)
update test_jsonb_subscript set test_json[-4] = '1';
select * from test_jsonb_subscript;
id | test_json
----+-----------------------------
1 | [0, null, 1, null, null, 1]
(1 row)
update test_jsonb_subscript set test_json[-8] = '1';
ERROR: path element at position 1 is out of range: -8
select * from test_jsonb_subscript;
id | test_json
----+-----------------------------
1 | [0, null, 1, null, null, 1]
(1 row)
-- keep consistent values position
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '[]');
update test_jsonb_subscript set test_json[5] = '1';
select * from test_jsonb_subscript;
id | test_json
----+-----------------------------------
1 | [null, null, null, null, null, 1]
(1 row)
-- create the whole path
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json['a'][0]['b'][0]['c'] = '1';
select * from test_jsonb_subscript;
id | test_json
----+----------------------------
1 | {"a": [{"b": [{"c": 1}]}]}
(1 row)
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json['a'][2]['b'][2]['c'][2] = '1';
select * from test_jsonb_subscript;
id | test_json
----+------------------------------------------------------------------
1 | {"a": [null, null, {"b": [null, null, {"c": [null, null, 1]}]}]}
(1 row)
-- create the whole path with already existing keys
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{"b": 1}');
update test_jsonb_subscript set test_json['a'][0] = '2';
select * from test_jsonb_subscript;
id | test_json
----+--------------------
1 | {"a": [2], "b": 1}
(1 row)
-- the start jsonb is an object, first subscript is treated as a key
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json[0]['a'] = '1';
select * from test_jsonb_subscript;
id | test_json
----+-----------------
1 | {"0": {"a": 1}}
(1 row)
-- the start jsonb is an array
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '[]');
update test_jsonb_subscript set test_json[0]['a'] = '1';
update test_jsonb_subscript set test_json[2]['b'] = '2';
select * from test_jsonb_subscript;
id | test_json
----+----------------------------
1 | [{"a": 1}, null, {"b": 2}]
(1 row)
-- overwriting an existing path
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json['a']['b'][1] = '1';
update test_jsonb_subscript set test_json['a']['b'][10] = '1';
select * from test_jsonb_subscript;
id | test_json
----+----------------------------------------------------------------------------
1 | {"a": {"b": [null, 1, null, null, null, null, null, null, null, null, 1]}}
(1 row)
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '[]');
update test_jsonb_subscript set test_json[0][0][0] = '1';
update test_jsonb_subscript set test_json[0][0][1] = '1';
select * from test_jsonb_subscript;
id | test_json
----+------------
1 | [[[1, 1]]]
(1 row)
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json['a']['b'][10] = '1';
update test_jsonb_subscript set test_json['a'][10][10] = '1';
select * from test_jsonb_subscript;
id | test_json
----+------------------------------------------------------------------------------------------------------------------------------------------------------
1 | {"a": {"b": [null, null, null, null, null, null, null, null, null, null, 1], "10": [null, null, null, null, null, null, null, null, null, null, 1]}}
(1 row)
-- an empty sub element
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{"a": {}}');
update test_jsonb_subscript set test_json['a']['b']['c'][2] = '1';
select * from test_jsonb_subscript;
id | test_json
----+--------------------------------------
1 | {"a": {"b": {"c": [null, null, 1]}}}
(1 row)
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{"a": []}');
update test_jsonb_subscript set test_json['a'][1]['c'][2] = '1';
select * from test_jsonb_subscript;
id | test_json
----+---------------------------------------
1 | {"a": [null, {"c": [null, null, 1]}]}
(1 row)
-- jsonb to tsvector
select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
to_tsvector

View File

@ -1290,6 +1290,87 @@ update test_jsonb_subscript set test_json = NULL where id = 3;
update test_jsonb_subscript set test_json[0] = '1';
select * from test_jsonb_subscript;
-- Fill the gaps logic
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '[0]');
update test_jsonb_subscript set test_json[5] = '1';
select * from test_jsonb_subscript;
update test_jsonb_subscript set test_json[-4] = '1';
select * from test_jsonb_subscript;
update test_jsonb_subscript set test_json[-8] = '1';
select * from test_jsonb_subscript;
-- keep consistent values position
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '[]');
update test_jsonb_subscript set test_json[5] = '1';
select * from test_jsonb_subscript;
-- create the whole path
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json['a'][0]['b'][0]['c'] = '1';
select * from test_jsonb_subscript;
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json['a'][2]['b'][2]['c'][2] = '1';
select * from test_jsonb_subscript;
-- create the whole path with already existing keys
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{"b": 1}');
update test_jsonb_subscript set test_json['a'][0] = '2';
select * from test_jsonb_subscript;
-- the start jsonb is an object, first subscript is treated as a key
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json[0]['a'] = '1';
select * from test_jsonb_subscript;
-- the start jsonb is an array
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '[]');
update test_jsonb_subscript set test_json[0]['a'] = '1';
update test_jsonb_subscript set test_json[2]['b'] = '2';
select * from test_jsonb_subscript;
-- overwriting an existing path
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json['a']['b'][1] = '1';
update test_jsonb_subscript set test_json['a']['b'][10] = '1';
select * from test_jsonb_subscript;
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '[]');
update test_jsonb_subscript set test_json[0][0][0] = '1';
update test_jsonb_subscript set test_json[0][0][1] = '1';
select * from test_jsonb_subscript;
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{}');
update test_jsonb_subscript set test_json['a']['b'][10] = '1';
update test_jsonb_subscript set test_json['a'][10][10] = '1';
select * from test_jsonb_subscript;
-- an empty sub element
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{"a": {}}');
update test_jsonb_subscript set test_json['a']['b']['c'][2] = '1';
select * from test_jsonb_subscript;
delete from test_jsonb_subscript;
insert into test_jsonb_subscript values (1, '{"a": []}');
update test_jsonb_subscript set test_json['a'][1]['c'][2] = '1';
select * from test_jsonb_subscript;
-- jsonb to tsvector
select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);