postgresql/src/backend/utils/adt/jsonfuncs.c

5005 lines
127 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* jsonfuncs.c
* Functions to process JSON data types.
*
2017-01-03 19:48:53 +01:00
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/adt/jsonfuncs.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <limits.h>
#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "fmgr.h"
#include "funcapi.h"
#include "lib/stringinfo.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/hsearch.h"
#include "utils/json.h"
#include "utils/jsonapi.h"
#include "utils/jsonb.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
/* Operations available for setPath */
#define JB_PATH_CREATE 0x0001
#define JB_PATH_DELETE 0x0002
#define JB_PATH_REPLACE 0x0004
#define JB_PATH_INSERT_BEFORE 0x0008
#define JB_PATH_INSERT_AFTER 0x0010
#define JB_PATH_CREATE_OR_INSERT \
(JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE)
/* state for json_object_keys */
typedef struct OkeysState
{
JsonLexContext *lex;
char **result;
int result_size;
int result_count;
int sent_count;
} OkeysState;
/* state for iterate_json_string_values function */
typedef struct IterateJsonStringValuesState
{
JsonLexContext *lex;
JsonIterateStringValuesAction action; /* an action that will be
* applied to each json value */
void *action_state; /* any necessary context for iteration */
} IterateJsonStringValuesState;
/* state for transform_json_string_values function */
typedef struct TransformJsonStringValuesState
{
JsonLexContext *lex;
StringInfo strval; /* resulting json */
JsonTransformStringValuesAction action; /* an action that will be
* applied to each json value */
void *action_state; /* any necessary context for transformation */
} TransformJsonStringValuesState;
/* state for json_get* functions */
typedef struct GetState
{
JsonLexContext *lex;
text *tresult;
char *result_start;
bool normalize_results;
bool next_scalar;
int npath; /* length of each path-related array */
char **path_names; /* field name(s) being sought */
int *path_indexes; /* array index(es) being sought */
bool *pathok; /* is path matched to current depth? */
int *array_cur_index; /* current element index at each path level */
} GetState;
/* state for json_array_length */
typedef struct AlenState
{
JsonLexContext *lex;
int count;
} AlenState;
/* state for json_each */
typedef struct EachState
{
JsonLexContext *lex;
Tuplestorestate *tuple_store;
TupleDesc ret_tdesc;
MemoryContext tmp_cxt;
char *result_start;
bool normalize_results;
bool next_scalar;
char *normalized_scalar;
} EachState;
/* state for json_array_elements */
typedef struct ElementsState
{
JsonLexContext *lex;
const char *function_name;
Tuplestorestate *tuple_store;
TupleDesc ret_tdesc;
MemoryContext tmp_cxt;
char *result_start;
bool normalize_results;
bool next_scalar;
char *normalized_scalar;
} ElementsState;
/* state for get_json_object_as_hash */
typedef struct JHashState
{
JsonLexContext *lex;
const char *function_name;
HTAB *hash;
char *saved_scalar;
char *save_json_start;
JsonTokenType saved_token_type;
} JHashState;
/* hashtable element */
typedef struct JsonHashEntry
{
char fname[NAMEDATALEN]; /* hash key (MUST BE FIRST) */
char *val;
JsonTokenType type;
} JsonHashEntry;
/* structure to cache type I/O metadata needed for populate_scalar() */
typedef struct ScalarIOData
{
Oid typioparam;
FmgrInfo typiofunc;
} ScalarIOData;
/* these two structures are used recursively */
typedef struct ColumnIOData ColumnIOData;
typedef struct RecordIOData RecordIOData;
/* structure to cache metadata needed for populate_array() */
typedef struct ArrayIOData
{
ColumnIOData *element_info; /* metadata cache */
Oid element_type; /* array element type id */
int32 element_typmod; /* array element type modifier */
} ArrayIOData;
/* structure to cache metadata needed for populate_composite() */
typedef struct CompositeIOData
{
/*
* We use pointer to a RecordIOData here because variable-length struct
* RecordIOData can't be used directly in ColumnIOData.io union
*/
RecordIOData *record_io; /* metadata cache for populate_record() */
TupleDesc tupdesc; /* cached tuple descriptor */
} CompositeIOData;
/* structure to cache metadata needed for populate_domain() */
typedef struct DomainIOData
{
ColumnIOData *base_io; /* metadata cache */
Oid base_typid; /* base type id */
int32 base_typmod; /* base type modifier */
void *domain_info; /* opaque cache for domain checks */
} DomainIOData;
/* enumeration type categories */
typedef enum TypeCat
{
TYPECAT_SCALAR = 's',
TYPECAT_ARRAY = 'a',
TYPECAT_COMPOSITE = 'c',
TYPECAT_DOMAIN = 'd'
} TypeCat;
/* these two are stolen from hstore / record_out, used in populate_record* */
/* structure to cache record metadata needed for populate_record_field() */
struct ColumnIOData
{
Oid typid; /* column type id */
int32 typmod; /* column type modifier */
TypeCat typcat; /* column type category */
ScalarIOData scalar_io; /* metadata cache for directi conversion
* through input function */
union
{
ArrayIOData array;
CompositeIOData composite;
DomainIOData domain;
} io; /* metadata cache for various column type
* categories */
};
/* structure to cache record metadata needed for populate_record() */
struct RecordIOData
{
Oid record_type;
int32 record_typmod;
int ncolumns;
ColumnIOData columns[FLEXIBLE_ARRAY_MEMBER];
};
/* state for populate_recordset */
typedef struct PopulateRecordsetState
{
JsonLexContext *lex;
const char *function_name;
HTAB *json_hash;
char *saved_scalar;
char *save_json_start;
JsonTokenType saved_token_type;
Tuplestorestate *tuple_store;
TupleDesc ret_tdesc;
HeapTupleHeader rec;
RecordIOData **my_extra;
MemoryContext fn_mcxt; /* used to stash IO funcs */
} PopulateRecordsetState;
/* structure to cache metadata needed for populate_record_worker() */
typedef struct PopulateRecordCache
{
Oid argtype; /* verified row type of the first argument */
CompositeIOData io; /* metadata cache for populate_composite() */
} PopulateRecordCache;
/* common data for populate_array_json() and populate_array_dim_jsonb() */
typedef struct PopulateArrayContext
{
ArrayBuildState *astate; /* array build state */
ArrayIOData *aio; /* metadata cache */
MemoryContext acxt; /* array build memory context */
MemoryContext mcxt; /* cache memory context */
const char *colname; /* for diagnostics only */
int *dims; /* dimensions */
int *sizes; /* current dimension counters */
int ndims; /* number of dimensions */
} PopulateArrayContext;
/* state for populate_array_json() */
typedef struct PopulateArrayState
{
JsonLexContext *lex; /* json lexer */
PopulateArrayContext *ctx; /* context */
char *element_start; /* start of the current array element */
char *element_scalar; /* current array element token if it is a
* scalar */
JsonTokenType element_type; /* current array element type */
} PopulateArrayState;
/* state for json_strip_nulls */
2015-05-24 03:35:49 +02:00
typedef struct StripnullState
{
JsonLexContext *lex;
2015-05-24 03:35:49 +02:00
StringInfo strval;
bool skip_next_null;
} StripnullState;
/* structure for generalized json/jsonb value passing */
typedef struct JsValue
{
bool is_json; /* json/jsonb */
union
{
struct
{
char *str; /* json string */
int len; /* json string length or -1 if null-terminated */
JsonTokenType type; /* json type */
} json; /* json value */
JsonbValue *jsonb; /* jsonb value */
} val;
} JsValue;
typedef struct JsObject
{
bool is_json; /* json/jsonb */
union
{
HTAB *json_hash;
JsonbContainer *jsonb_cont;
} val;
} JsObject;
/* useful macros for testing JsValue properties */
#define JsValueIsNull(jsv) \
((jsv)->is_json ? \
(!(jsv)->val.json.str || (jsv)->val.json.type == JSON_TOKEN_NULL) : \
(!(jsv)->val.jsonb || (jsv)->val.jsonb->type == jbvNull))
#define JsValueIsString(jsv) \
((jsv)->is_json ? (jsv)->val.json.type == JSON_TOKEN_STRING \
: ((jsv)->val.jsonb && (jsv)->val.jsonb->type == jbvString))
#define JsObjectIsEmpty(jso) \
((jso)->is_json \
? hash_get_num_entries((jso)->val.json_hash) == 0 \
: ((jso)->val.jsonb_cont == NULL || \
JsonContainerSize((jso)->val.jsonb_cont) == 0))
#define JsObjectFree(jso) \
do { \
if ((jso)->is_json) \
hash_destroy((jso)->val.json_hash); \
} while (0)
/* semantic action functions for json_object_keys */
static void okeys_object_field_start(void *state, char *fname, bool isnull);
static void okeys_array_start(void *state);
static void okeys_scalar(void *state, char *token, JsonTokenType tokentype);
/* semantic action functions for json_get* functions */
static void get_object_start(void *state);
static void get_object_end(void *state);
static void get_object_field_start(void *state, char *fname, bool isnull);
static void get_object_field_end(void *state, char *fname, bool isnull);
static void get_array_start(void *state);
static void get_array_end(void *state);
static void get_array_element_start(void *state, bool isnull);
static void get_array_element_end(void *state, bool isnull);
static void get_scalar(void *state, char *token, JsonTokenType tokentype);
/* common worker function for json getter functions */
static Datum get_path_all(FunctionCallInfo fcinfo, bool as_text);
static text *get_worker(text *json, char **tpath, int *ipath, int npath,
bool normalize_results);
static Datum get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text);
/* semantic action functions for json_array_length */
static void alen_object_start(void *state);
static void alen_scalar(void *state, char *token, JsonTokenType tokentype);
static void alen_array_element_start(void *state, bool isnull);
/* common workers for json{b}_each* functions */
static Datum each_worker(FunctionCallInfo fcinfo, bool as_text);
static Datum each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname,
bool as_text);
/* semantic action functions for json_each */
static void each_object_field_start(void *state, char *fname, bool isnull);
static void each_object_field_end(void *state, char *fname, bool isnull);
static void each_array_start(void *state);
static void each_scalar(void *state, char *token, JsonTokenType tokentype);
/* common workers for json{b}_array_elements_* functions */
static Datum elements_worker(FunctionCallInfo fcinfo, const char *funcname,
bool as_text);
static Datum elements_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname,
bool as_text);
/* semantic action functions for json_array_elements */
static void elements_object_start(void *state);
static void elements_array_element_start(void *state, bool isnull);
static void elements_array_element_end(void *state, bool isnull);
static void elements_scalar(void *state, char *token, JsonTokenType tokentype);
/* turn a json object into a hash table */
static HTAB *get_json_object_as_hash(char *json, int len, const char *funcname);
/* semantic actions for populate_array_json */
static void populate_array_object_start(void *_state);
static void populate_array_array_end(void *_state);
static void populate_array_element_start(void *_state, bool isnull);
static void populate_array_element_end(void *_state, bool isnull);
static void populate_array_scalar(void *_state, char *token, JsonTokenType tokentype);
/* semantic action functions for get_json_object_as_hash */
static void hash_object_field_start(void *state, char *fname, bool isnull);
static void hash_object_field_end(void *state, char *fname, bool isnull);
static void hash_array_start(void *state);
static void hash_scalar(void *state, char *token, JsonTokenType tokentype);
/* semantic action functions for populate_recordset */
static void populate_recordset_object_field_start(void *state, char *fname, bool isnull);
static void populate_recordset_object_field_end(void *state, char *fname, bool isnull);
static void populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype);
static void populate_recordset_object_start(void *state);
static void populate_recordset_object_end(void *state);
static void populate_recordset_array_start(void *state);
static void populate_recordset_array_element_start(void *state, bool isnull);
/* semantic action functions for json_strip_nulls */
static void sn_object_start(void *state);
static void sn_object_end(void *state);
static void sn_array_start(void *state);
static void sn_array_end(void *state);
static void sn_object_field_start(void *state, char *fname, bool isnull);
static void sn_array_element_start(void *state, bool isnull);
static void sn_scalar(void *state, char *token, JsonTokenType tokentype);
/* worker functions for populate_record, to_record, populate_recordset and to_recordset */
static Datum populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname,
bool have_record_arg);
static Datum populate_record_worker(FunctionCallInfo fcinfo, const char *funcname,
bool have_record_arg);
/* helper functions for populate_record[set] */
static HeapTupleHeader populate_record(TupleDesc tupdesc, RecordIOData **record_p,
HeapTupleHeader defaultval, MemoryContext mcxt,
JsObject *obj);
static Datum populate_record_field(ColumnIOData *col, Oid typid, int32 typmod,
const char *colname, MemoryContext mcxt,
Datum defaultval, JsValue *jsv, bool *isnull);
static void JsValueToJsObject(JsValue *jsv, JsObject *jso);
static Datum populate_composite(CompositeIOData *io, Oid typid, int32 typmod,
const char *colname, MemoryContext mcxt,
HeapTupleHeader defaultval, JsValue *jsv);
static Datum populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv);
static void prepare_column_cache(ColumnIOData *column, Oid typid, int32 typmod,
MemoryContext mcxt, bool json);
static Datum populate_record_field(ColumnIOData *col, Oid typid, int32 typmod,
const char *colname, MemoryContext mcxt, Datum defaultval,
JsValue *jsv, bool *isnull);
static RecordIOData *allocate_record_info(MemoryContext mcxt, int ncolumns);
static bool JsObjectGetField(JsObject *obj, char *field, JsValue *jsv);
static void populate_recordset_record(PopulateRecordsetState *state, JsObject *obj);
static void populate_array_json(PopulateArrayContext *ctx, char *json, int len);
static void populate_array_dim_jsonb(PopulateArrayContext *ctx, JsonbValue *jbv,
int ndim);
static void populate_array_report_expected_array(PopulateArrayContext *ctx, int ndim);
static void populate_array_assign_ndims(PopulateArrayContext *ctx, int ndims);
static void populate_array_check_dimension(PopulateArrayContext *ctx, int ndim);
static void populate_array_element(PopulateArrayContext *ctx, int ndim, JsValue *jsv);
static Datum populate_array(ArrayIOData *aio, const char *colname,
MemoryContext mcxt, JsValue *jsv);
static Datum populate_domain(DomainIOData *io, Oid typid, const char *colname,
MemoryContext mcxt, JsValue *jsv, bool isnull);
/* Worker that takes care of common setup for us */
static JsonbValue *findJsonbValueFromContainerLen(JsonbContainer *container,
uint32 flags,
char *key,
uint32 keylen);
/* functions supporting jsonb_delete, jsonb_set and jsonb_concat */
static JsonbValue *IteratorConcat(JsonbIterator **it1, JsonbIterator **it2,
JsonbParseState **state);
static JsonbValue *setPath(JsonbIterator **it, Datum *path_elems,
bool *path_nulls, int path_len,
JsonbParseState **st, int level, Jsonb *newval,
int op_type);
static void setPathObject(JsonbIterator **it, Datum *path_elems,
bool *path_nulls, int path_len, JsonbParseState **st,
int level,
Jsonb *newval, uint32 npairs, int op_type);
static void setPathArray(JsonbIterator **it, Datum *path_elems,
bool *path_nulls, int path_len, JsonbParseState **st,
int level, Jsonb *newval, uint32 nelems, int op_type);
static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
/* function supporting iterate_json_string_values */
static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
/* functions supporting transform_json_string_values */
static void transform_string_values_object_start(void *state);
static void transform_string_values_object_end(void *state);
static void transform_string_values_array_start(void *state);
static void transform_string_values_array_end(void *state);
static void transform_string_values_object_field_start(void *state, char *fname, bool isnull);
static void transform_string_values_array_element_start(void *state, bool isnull);
static void transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
/*
* SQL function json_object_keys
*
* Returns the set of keys for the object argument.
*
* This SRF operates in value-per-call mode. It processes the
* object during the first call, and the keys are simply stashed
* in an array, whose size is expanded as necessary. This is probably
* safe enough for a list of keys of a single object, since they are
* limited in size to NAMEDATALEN and the number of keys is unlikely to
* be so huge that it has major memory implications.
*/
Datum
jsonb_object_keys(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
OkeysState *state;
int i;
if (SRF_IS_FIRSTCALL())
{
MemoryContext oldcontext;
Jsonb *jb = PG_GETARG_JSONB(0);
bool skipNested = false;
JsonbIterator *it;
JsonbValue v;
JsonbIteratorToken r;
if (JB_ROOT_IS_SCALAR(jb))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on a scalar",
"jsonb_object_keys")));
else if (JB_ROOT_IS_ARRAY(jb))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on an array",
"jsonb_object_keys")));
funcctx = SRF_FIRSTCALL_INIT();
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
state = palloc(sizeof(OkeysState));
state->result_size = JB_ROOT_COUNT(jb);
state->result_count = 0;
state->sent_count = 0;
state->result = palloc(state->result_size * sizeof(char *));
it = JsonbIteratorInit(&jb->root);
while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
{
skipNested = true;
if (r == WJB_KEY)
{
char *cstr;
cstr = palloc(v.val.string.len + 1 * sizeof(char));
memcpy(cstr, v.val.string.val, v.val.string.len);
cstr[v.val.string.len] = '\0';
state->result[state->result_count++] = cstr;
}
}
MemoryContextSwitchTo(oldcontext);
funcctx->user_fctx = (void *) state;
}
funcctx = SRF_PERCALL_SETUP();
state = (OkeysState *) funcctx->user_fctx;
if (state->sent_count < state->result_count)
{
char *nxt = state->result[state->sent_count++];
SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt));
}
/* cleanup to reduce or eliminate memory leaks */
for (i = 0; i < state->result_count; i++)
pfree(state->result[i]);
pfree(state->result);
pfree(state);
SRF_RETURN_DONE(funcctx);
}
Datum
json_object_keys(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
OkeysState *state;
int i;
if (SRF_IS_FIRSTCALL())
{
text *json = PG_GETARG_TEXT_PP(0);
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem;
MemoryContext oldcontext;
funcctx = SRF_FIRSTCALL_INIT();
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
state = palloc(sizeof(OkeysState));
sem = palloc0(sizeof(JsonSemAction));
state->lex = lex;
state->result_size = 256;
state->result_count = 0;
state->sent_count = 0;
state->result = palloc(256 * sizeof(char *));
sem->semstate = (void *) state;
sem->array_start = okeys_array_start;
sem->scalar = okeys_scalar;
sem->object_field_start = okeys_object_field_start;
/* remainder are all NULL, courtesy of palloc0 above */
pg_parse_json(lex, sem);
/* keys are now in state->result */
pfree(lex->strval->data);
pfree(lex->strval);
pfree(lex);
pfree(sem);
MemoryContextSwitchTo(oldcontext);
funcctx->user_fctx = (void *) state;
}
funcctx = SRF_PERCALL_SETUP();
state = (OkeysState *) funcctx->user_fctx;
if (state->sent_count < state->result_count)
{
char *nxt = state->result[state->sent_count++];
SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt));
}
/* cleanup to reduce or eliminate memory leaks */
for (i = 0; i < state->result_count; i++)
pfree(state->result[i]);
pfree(state->result);
pfree(state);
SRF_RETURN_DONE(funcctx);
}
static void
okeys_object_field_start(void *state, char *fname, bool isnull)
{
OkeysState *_state = (OkeysState *) state;
/* only collecting keys for the top level object */
if (_state->lex->lex_level != 1)
return;
/* enlarge result array if necessary */
if (_state->result_count >= _state->result_size)
{
_state->result_size *= 2;
_state->result = (char **)
repalloc(_state->result, sizeof(char *) * _state->result_size);
}
/* save a copy of the field name */
_state->result[_state->result_count++] = pstrdup(fname);
}
static void
okeys_array_start(void *state)
{
OkeysState *_state = (OkeysState *) state;
/* top level must be a json object */
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on an array",
"json_object_keys")));
}
static void
okeys_scalar(void *state, char *token, JsonTokenType tokentype)
{
OkeysState *_state = (OkeysState *) state;
/* top level must be a json object */
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on a scalar",
"json_object_keys")));
}
/*
* json and jsonb getter functions
* these implement the -> ->> #> and #>> operators
* and the json{b?}_extract_path*(json, text, ...) functions
*/
Datum
json_object_field(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_PP(0);
text *fname = PG_GETARG_TEXT_PP(1);
char *fnamestr = text_to_cstring(fname);
text *result;
result = get_worker(json, &fnamestr, NULL, 1, false);
if (result != NULL)
PG_RETURN_TEXT_P(result);
else
PG_RETURN_NULL();
}
Datum
jsonb_object_field(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
text *key = PG_GETARG_TEXT_PP(1);
JsonbValue *v;
if (!JB_ROOT_IS_OBJECT(jb))
PG_RETURN_NULL();
v = findJsonbValueFromContainerLen(&jb->root, JB_FOBJECT,
VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key));
if (v != NULL)
PG_RETURN_JSONB(JsonbValueToJsonb(v));
PG_RETURN_NULL();
}
Datum
json_object_field_text(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_PP(0);
text *fname = PG_GETARG_TEXT_PP(1);
char *fnamestr = text_to_cstring(fname);
text *result;
result = get_worker(json, &fnamestr, NULL, 1, true);
if (result != NULL)
PG_RETURN_TEXT_P(result);
else
PG_RETURN_NULL();
}
Datum
jsonb_object_field_text(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
text *key = PG_GETARG_TEXT_PP(1);
JsonbValue *v;
if (!JB_ROOT_IS_OBJECT(jb))
PG_RETURN_NULL();
v = findJsonbValueFromContainerLen(&jb->root, JB_FOBJECT,
VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key));
if (v != NULL)
{
text *result = NULL;
switch (v->type)
{
case jbvNull:
break;
case jbvBool:
result = cstring_to_text(v->val.boolean ? "true" : "false");
break;
case jbvString:
result = cstring_to_text_with_len(v->val.string.val, v->val.string.len);
break;
case jbvNumeric:
result = cstring_to_text(DatumGetCString(DirectFunctionCall1(numeric_out,
PointerGetDatum(v->val.numeric))));
break;
case jbvBinary:
{
StringInfo jtext = makeStringInfo();
(void) JsonbToCString(jtext, v->val.binary.data, -1);
result = cstring_to_text_with_len(jtext->data, jtext->len);
}
break;
default:
elog(ERROR, "unrecognized jsonb type: %d", (int) v->type);
}
if (result)
PG_RETURN_TEXT_P(result);
}
PG_RETURN_NULL();
}
Datum
json_array_element(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_PP(0);
int element = PG_GETARG_INT32(1);
text *result;
result = get_worker(json, NULL, &element, 1, false);
if (result != NULL)
PG_RETURN_TEXT_P(result);
else
PG_RETURN_NULL();
}
Datum
jsonb_array_element(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
int element = PG_GETARG_INT32(1);
JsonbValue *v;
if (!JB_ROOT_IS_ARRAY(jb))
PG_RETURN_NULL();
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
/* Handle negative subscript */
if (element < 0)
{
2016-06-10 00:02:36 +02:00
uint32 nelements = JB_ROOT_COUNT(jb);
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
if (-element > nelements)
PG_RETURN_NULL();
else
element += nelements;
}
v = getIthJsonbValueFromContainer(&jb->root, element);
if (v != NULL)
PG_RETURN_JSONB(JsonbValueToJsonb(v));
PG_RETURN_NULL();
}
Datum
json_array_element_text(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_PP(0);
int element = PG_GETARG_INT32(1);
text *result;
result = get_worker(json, NULL, &element, 1, true);
if (result != NULL)
PG_RETURN_TEXT_P(result);
else
PG_RETURN_NULL();
}
Datum
jsonb_array_element_text(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
int element = PG_GETARG_INT32(1);
JsonbValue *v;
if (!JB_ROOT_IS_ARRAY(jb))
PG_RETURN_NULL();
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
/* Handle negative subscript */
if (element < 0)
{
2016-06-10 00:02:36 +02:00
uint32 nelements = JB_ROOT_COUNT(jb);
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
if (-element > nelements)
PG_RETURN_NULL();
else
element += nelements;
}
v = getIthJsonbValueFromContainer(&jb->root, element);
if (v != NULL)
{
text *result = NULL;
switch (v->type)
{
case jbvNull:
break;
case jbvBool:
result = cstring_to_text(v->val.boolean ? "true" : "false");
break;
case jbvString:
result = cstring_to_text_with_len(v->val.string.val, v->val.string.len);
break;
case jbvNumeric:
result = cstring_to_text(DatumGetCString(DirectFunctionCall1(numeric_out,
PointerGetDatum(v->val.numeric))));
break;
case jbvBinary:
{
StringInfo jtext = makeStringInfo();
(void) JsonbToCString(jtext, v->val.binary.data, -1);
result = cstring_to_text_with_len(jtext->data, jtext->len);
}
break;
default:
elog(ERROR, "unrecognized jsonb type: %d", (int) v->type);
}
if (result)
PG_RETURN_TEXT_P(result);
}
PG_RETURN_NULL();
}
Datum
json_extract_path(PG_FUNCTION_ARGS)
{
return get_path_all(fcinfo, false);
}
Datum
json_extract_path_text(PG_FUNCTION_ARGS)
{
return get_path_all(fcinfo, true);
}
/*
* common routine for extract_path functions
*/
static Datum
get_path_all(FunctionCallInfo fcinfo, bool as_text)
{
text *json = PG_GETARG_TEXT_PP(0);
ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
text *result;
Datum *pathtext;
bool *pathnulls;
int npath;
char **tpath;
int *ipath;
int i;
/*
* If the array contains any null elements, return NULL, on the grounds
* that you'd have gotten NULL if any RHS value were NULL in a nested
* series of applications of the -> operator. (Note: because we also
* return NULL for error cases such as no-such-field, this is true
* regardless of the contents of the rest of the array.)
*/
if (array_contains_nulls(path))
PG_RETURN_NULL();
deconstruct_array(path, TEXTOID, -1, false, 'i',
&pathtext, &pathnulls, &npath);
tpath = palloc(npath * sizeof(char *));
ipath = palloc(npath * sizeof(int));
for (i = 0; i < npath; i++)
{
Assert(!pathnulls[i]);
tpath[i] = TextDatumGetCString(pathtext[i]);
/*
* we have no idea at this stage what structure the document is so
* just convert anything in the path that we can to an integer and set
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
* all the other integers to INT_MIN which will never match.
*/
if (*tpath[i] != '\0')
{
long ind;
char *endptr;
errno = 0;
ind = strtol(tpath[i], &endptr, 10);
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
if (*endptr == '\0' && errno == 0 && ind <= INT_MAX && ind >= INT_MIN)
ipath[i] = (int) ind;
else
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
ipath[i] = INT_MIN;
}
else
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
ipath[i] = INT_MIN;
}
result = get_worker(json, tpath, ipath, npath, as_text);
if (result != NULL)
PG_RETURN_TEXT_P(result);
else
PG_RETURN_NULL();
}
/*
* get_worker
*
* common worker for all the json getter functions
*
* json: JSON object (in text form)
* tpath[]: field name(s) to extract
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
* ipath[]: array index(es) (zero-based) to extract, accepts negatives
* npath: length of tpath[] and/or ipath[]
* normalize_results: true to de-escape string and null scalars
*
* tpath can be NULL, or any one tpath[] entry can be NULL, if an object
* field is not to be matched at that nesting level. Similarly, ipath can
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
* be NULL, or any one ipath[] entry can be INT_MIN if an array element is
* not to be matched at that nesting level (a json datum should never be
* large enough to have -INT_MIN elements due to MaxAllocSize restriction).
*/
static text *
get_worker(text *json,
char **tpath,
int *ipath,
int npath,
bool normalize_results)
{
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
GetState *state = palloc0(sizeof(GetState));
Assert(npath >= 0);
state->lex = lex;
/* is it "_as_text" variant? */
state->normalize_results = normalize_results;
state->npath = npath;
state->path_names = tpath;
state->path_indexes = ipath;
state->pathok = palloc0(sizeof(bool) * npath);
state->array_cur_index = palloc(sizeof(int) * npath);
if (npath > 0)
state->pathok[0] = true;
sem->semstate = (void *) state;
/*
* Not all variants need all the semantic routines. Only set the ones that
* are actually needed for maximum efficiency.
*/
sem->scalar = get_scalar;
if (npath == 0)
{
sem->object_start = get_object_start;
sem->object_end = get_object_end;
sem->array_start = get_array_start;
sem->array_end = get_array_end;
}
if (tpath != NULL)
{
sem->object_field_start = get_object_field_start;
sem->object_field_end = get_object_field_end;
}
if (ipath != NULL)
{
sem->array_start = get_array_start;
sem->array_element_start = get_array_element_start;
sem->array_element_end = get_array_element_end;
}
pg_parse_json(lex, sem);
return state->tresult;
}
static void
get_object_start(void *state)
{
GetState *_state = (GetState *) state;
int lex_level = _state->lex->lex_level;
if (lex_level == 0 && _state->npath == 0)
{
/*
* Special case: we should match the entire object. We only need this
* at outermost level because at nested levels the match will have
* been started by the outer field or array element callback.
*/
_state->result_start = _state->lex->token_start;
}
}
static void
get_object_end(void *state)
{
GetState *_state = (GetState *) state;
int lex_level = _state->lex->lex_level;
if (lex_level == 0 && _state->npath == 0)
{
/* Special case: return the entire object */
char *start = _state->result_start;
int len = _state->lex->prev_token_terminator - start;
_state->tresult = cstring_to_text_with_len(start, len);
}
}
static void
get_object_field_start(void *state, char *fname, bool isnull)
{
GetState *_state = (GetState *) state;
bool get_next = false;
int lex_level = _state->lex->lex_level;
if (lex_level <= _state->npath &&
_state->pathok[lex_level - 1] &&
_state->path_names != NULL &&
_state->path_names[lex_level - 1] != NULL &&
strcmp(fname, _state->path_names[lex_level - 1]) == 0)
{
if (lex_level < _state->npath)
{
/* if not at end of path just mark path ok */
_state->pathok[lex_level] = true;
}
else
{
/* end of path, so we want this value */
get_next = true;
}
}
if (get_next)
{
/* this object overrides any previous matching object */
_state->tresult = NULL;
_state->result_start = NULL;
if (_state->normalize_results &&
_state->lex->token_type == JSON_TOKEN_STRING)
{
/* for as_text variants, tell get_scalar to set it for us */
_state->next_scalar = true;
}
else
{
/* for non-as_text variants, just note the json starting point */
_state->result_start = _state->lex->token_start;
}
}
}
static void
get_object_field_end(void *state, char *fname, bool isnull)
{
GetState *_state = (GetState *) state;
bool get_last = false;
int lex_level = _state->lex->lex_level;
/* same tests as in get_object_field_start */
if (lex_level <= _state->npath &&
_state->pathok[lex_level - 1] &&
_state->path_names != NULL &&
_state->path_names[lex_level - 1] != NULL &&
strcmp(fname, _state->path_names[lex_level - 1]) == 0)
{
if (lex_level < _state->npath)
{
/* done with this field so reset pathok */
_state->pathok[lex_level] = false;
}
else
{
/* end of path, so we want this value */
get_last = true;
}
}
/* for as_text scalar case, our work is already done */
if (get_last && _state->result_start != NULL)
{
/*
* make a text object from the string from the prevously noted json
* start up to the end of the previous token (the lexer is by now
* ahead of us on whatever came after what we're interested in).
*/
if (isnull && _state->normalize_results)
_state->tresult = (text *) NULL;
else
{
char *start = _state->result_start;
int len = _state->lex->prev_token_terminator - start;
_state->tresult = cstring_to_text_with_len(start, len);
}
/* this should be unnecessary but let's do it for cleanliness: */
_state->result_start = NULL;
}
}
static void
get_array_start(void *state)
{
GetState *_state = (GetState *) state;
int lex_level = _state->lex->lex_level;
if (lex_level < _state->npath)
{
/* Initialize counting of elements in this array */
_state->array_cur_index[lex_level] = -1;
/* INT_MIN value is reserved to represent invalid subscript */
if (_state->path_indexes[lex_level] < 0 &&
_state->path_indexes[lex_level] != INT_MIN)
{
/* Negative subscript -- convert to positive-wise subscript */
2016-06-10 00:02:36 +02:00
int nelements = json_count_array_elements(_state->lex);
if (-_state->path_indexes[lex_level] <= nelements)
_state->path_indexes[lex_level] += nelements;
}
}
else if (lex_level == 0 && _state->npath == 0)
{
/*
* Special case: we should match the entire array. We only need this
2016-06-10 00:02:36 +02:00
* at the outermost level because at nested levels the match will have
* been started by the outer field or array element callback.
*/
_state->result_start = _state->lex->token_start;
}
}
static void
get_array_end(void *state)
{
GetState *_state = (GetState *) state;
int lex_level = _state->lex->lex_level;
if (lex_level == 0 && _state->npath == 0)
{
/* Special case: return the entire array */
char *start = _state->result_start;
int len = _state->lex->prev_token_terminator - start;
_state->tresult = cstring_to_text_with_len(start, len);
}
}
static void
get_array_element_start(void *state, bool isnull)
{
GetState *_state = (GetState *) state;
bool get_next = false;
int lex_level = _state->lex->lex_level;
/* Update array element counter */
if (lex_level <= _state->npath)
_state->array_cur_index[lex_level - 1]++;
if (lex_level <= _state->npath &&
_state->pathok[lex_level - 1] &&
_state->path_indexes != NULL &&
_state->array_cur_index[lex_level - 1] == _state->path_indexes[lex_level - 1])
{
if (lex_level < _state->npath)
{
/* if not at end of path just mark path ok */
_state->pathok[lex_level] = true;
}
else
{
/* end of path, so we want this value */
get_next = true;
}
}
/* same logic as for objects */
if (get_next)
{
_state->tresult = NULL;
_state->result_start = NULL;
if (_state->normalize_results &&
_state->lex->token_type == JSON_TOKEN_STRING)
{
_state->next_scalar = true;
}
else
{
_state->result_start = _state->lex->token_start;
}
}
}
static void
get_array_element_end(void *state, bool isnull)
{
GetState *_state = (GetState *) state;
bool get_last = false;
int lex_level = _state->lex->lex_level;
/* same tests as in get_array_element_start */
if (lex_level <= _state->npath &&
_state->pathok[lex_level - 1] &&
_state->path_indexes != NULL &&
_state->array_cur_index[lex_level - 1] == _state->path_indexes[lex_level - 1])
{
if (lex_level < _state->npath)
{
/* done with this element so reset pathok */
_state->pathok[lex_level] = false;
}
else
{
/* end of path, so we want this value */
get_last = true;
}
}
/* same logic as for objects */
if (get_last && _state->result_start != NULL)
{
if (isnull && _state->normalize_results)
_state->tresult = (text *) NULL;
else
{
char *start = _state->result_start;
int len = _state->lex->prev_token_terminator - start;
_state->tresult = cstring_to_text_with_len(start, len);
}
_state->result_start = NULL;
}
}
static void
get_scalar(void *state, char *token, JsonTokenType tokentype)
{
GetState *_state = (GetState *) state;
int lex_level = _state->lex->lex_level;
/* Check for whole-object match */
if (lex_level == 0 && _state->npath == 0)
{
if (_state->normalize_results && tokentype == JSON_TOKEN_STRING)
{
/* we want the de-escaped string */
_state->next_scalar = true;
}
else if (_state->normalize_results && tokentype == JSON_TOKEN_NULL)
{
_state->tresult = (text *) NULL;
}
else
{
/*
* This is a bit hokey: we will suppress whitespace after the
* scalar token, but not whitespace before it. Probably not worth
* doing our own space-skipping to avoid that.
*/
char *start = _state->lex->input;
int len = _state->lex->prev_token_terminator - start;
_state->tresult = cstring_to_text_with_len(start, len);
}
}
if (_state->next_scalar)
{
/* a de-escaped text value is wanted, so supply it */
_state->tresult = cstring_to_text(token);
/* make sure the next call to get_scalar doesn't overwrite it */
_state->next_scalar = false;
}
}
Datum
jsonb_extract_path(PG_FUNCTION_ARGS)
{
return get_jsonb_path_all(fcinfo, false);
}
Datum
jsonb_extract_path_text(PG_FUNCTION_ARGS)
{
return get_jsonb_path_all(fcinfo, true);
}
static Datum
get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text)
{
Jsonb *jb = PG_GETARG_JSONB(0);
ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
Jsonb *res;
Datum *pathtext;
bool *pathnulls;
int npath;
int i;
bool have_object = false,
have_array = false;
JsonbValue *jbvp = NULL;
JsonbValue tv;
JsonbContainer *container;
/*
* If the array contains any null elements, return NULL, on the grounds
* that you'd have gotten NULL if any RHS value were NULL in a nested
* series of applications of the -> operator. (Note: because we also
* return NULL for error cases such as no-such-field, this is true
* regardless of the contents of the rest of the array.)
*/
if (array_contains_nulls(path))
PG_RETURN_NULL();
deconstruct_array(path, TEXTOID, -1, false, 'i',
&pathtext, &pathnulls, &npath);
/* Identify whether we have object, array, or scalar at top-level */
container = &jb->root;
if (JB_ROOT_IS_OBJECT(jb))
have_object = true;
else if (JB_ROOT_IS_ARRAY(jb) && !JB_ROOT_IS_SCALAR(jb))
have_array = true;
else
{
Assert(JB_ROOT_IS_ARRAY(jb) && JB_ROOT_IS_SCALAR(jb));
/* Extract the scalar value, if it is what we'll return */
if (npath <= 0)
jbvp = getIthJsonbValueFromContainer(container, 0);
}
/*
* If the array is empty, return the entire LHS object, on the grounds
* that we should do zero field or element extractions. For the
* non-scalar case we can just hand back the object without much work. For
* the scalar case, fall through and deal with the value below the loop.
* (This inconsistency arises because there's no easy way to generate a
* JsonbValue directly for root-level containers.)
*/
if (npath <= 0 && jbvp == NULL)
{
if (as_text)
{
PG_RETURN_TEXT_P(cstring_to_text(JsonbToCString(NULL,
container,
VARSIZE(jb))));
}
else
{
/* not text mode - just hand back the jsonb */
PG_RETURN_JSONB(jb);
}
}
for (i = 0; i < npath; i++)
{
if (have_object)
{
jbvp = findJsonbValueFromContainerLen(container,
JB_FOBJECT,
VARDATA(pathtext[i]),
VARSIZE(pathtext[i]) - VARHDRSZ);
}
else if (have_array)
{
long lindex;
uint32 index;
char *indextext = TextDatumGetCString(pathtext[i]);
char *endptr;
errno = 0;
lindex = strtol(indextext, &endptr, 10);
if (endptr == indextext || *endptr != '\0' || errno != 0 ||
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
lindex > INT_MAX || lindex < INT_MIN)
PG_RETURN_NULL();
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
if (lindex >= 0)
{
index = (uint32) lindex;
}
else
{
/* Handle negative subscript */
uint32 nelements;
/* Container must be array, but make sure */
if (!JsonContainerIsArray(container))
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
elog(ERROR, "not a jsonb array");
nelements = JsonContainerSize(container);
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
if (-lindex > nelements)
PG_RETURN_NULL();
else
index = nelements + lindex;
}
jbvp = getIthJsonbValueFromContainer(container, index);
}
else
{
/* scalar, extraction yields a null */
PG_RETURN_NULL();
}
if (jbvp == NULL)
PG_RETURN_NULL();
else if (i == npath - 1)
break;
if (jbvp->type == jbvBinary)
{
JsonbIterator *it = JsonbIteratorInit((JsonbContainer *) jbvp->val.binary.data);
JsonbIteratorToken r;
r = JsonbIteratorNext(&it, &tv, true);
container = (JsonbContainer *) jbvp->val.binary.data;
have_object = r == WJB_BEGIN_OBJECT;
have_array = r == WJB_BEGIN_ARRAY;
}
else
{
have_object = jbvp->type == jbvObject;
have_array = jbvp->type == jbvArray;
}
}
if (as_text)
{
/* special-case outputs for string and null values */
if (jbvp->type == jbvString)
PG_RETURN_TEXT_P(cstring_to_text_with_len(jbvp->val.string.val,
jbvp->val.string.len));
if (jbvp->type == jbvNull)
PG_RETURN_NULL();
}
res = JsonbValueToJsonb(jbvp);
if (as_text)
{
PG_RETURN_TEXT_P(cstring_to_text(JsonbToCString(NULL,
&res->root,
VARSIZE(res))));
}
else
{
/* not text mode - just hand back the jsonb */
PG_RETURN_JSONB(res);
}
}
/*
* SQL function json_array_length(json) -> int
*/
Datum
json_array_length(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_PP(0);
AlenState *state;
JsonLexContext *lex;
JsonSemAction *sem;
lex = makeJsonLexContext(json, false);
state = palloc0(sizeof(AlenState));
sem = palloc0(sizeof(JsonSemAction));
/* palloc0 does this for us */
#if 0
state->count = 0;
#endif
state->lex = lex;
sem->semstate = (void *) state;
sem->object_start = alen_object_start;
sem->scalar = alen_scalar;
sem->array_element_start = alen_array_element_start;
pg_parse_json(lex, sem);
PG_RETURN_INT32(state->count);
}
Datum
jsonb_array_length(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
if (JB_ROOT_IS_SCALAR(jb))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot get array length of a scalar")));
else if (!JB_ROOT_IS_ARRAY(jb))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot get array length of a non-array")));
PG_RETURN_INT32(JB_ROOT_COUNT(jb));
}
/*
* These next two checks ensure that the json is an array (since it can't be
* a scalar or an object).
*/
static void
alen_object_start(void *state)
{
AlenState *_state = (AlenState *) state;
/* json structure check */
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot get array length of a non-array")));
}
static void
alen_scalar(void *state, char *token, JsonTokenType tokentype)
{
AlenState *_state = (AlenState *) state;
/* json structure check */
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot get array length of a scalar")));
}
static void
alen_array_element_start(void *state, bool isnull)
{
AlenState *_state = (AlenState *) state;
/* just count up all the level 1 elements */
if (_state->lex->lex_level == 1)
_state->count++;
}
/*
* SQL function json_each and json_each_text
*
* decompose a json object into key value pairs.
*
* Unlike json_object_keys() these SRFs operate in materialize mode,
* stashing results into a Tuplestore object as they go.
* The construction of tuples is done using a temporary memory context
* that is cleared out after each tuple is built.
*/
Datum
json_each(PG_FUNCTION_ARGS)
{
return each_worker(fcinfo, false);
}
Datum
jsonb_each(PG_FUNCTION_ARGS)
{
return each_worker_jsonb(fcinfo, "jsonb_each", false);
}
Datum
json_each_text(PG_FUNCTION_ARGS)
{
return each_worker(fcinfo, true);
}
Datum
jsonb_each_text(PG_FUNCTION_ARGS)
{
return each_worker_jsonb(fcinfo, "jsonb_each_text", true);
}
static Datum
each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
{
Jsonb *jb = PG_GETARG_JSONB(0);
ReturnSetInfo *rsi;
Tuplestorestate *tuple_store;
TupleDesc tupdesc;
TupleDesc ret_tdesc;
MemoryContext old_cxt,
tmp_cxt;
bool skipNested = false;
JsonbIterator *it;
JsonbValue v;
JsonbIteratorToken r;
if (!JB_ROOT_IS_OBJECT(jb))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on a non-object",
funcname)));
rsi = (ReturnSetInfo *) fcinfo->resultinfo;
if (!rsi || !IsA(rsi, ReturnSetInfo) ||
(rsi->allowedModes & SFRM_Materialize) == 0 ||
rsi->expectedDesc == NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that "
"cannot accept a set")));
rsi->returnMode = SFRM_Materialize;
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("function returning record called in context "
"that cannot accept type record")));
old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
ret_tdesc = CreateTupleDescCopy(tupdesc);
BlessTupleDesc(ret_tdesc);
tuple_store =
tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
false, work_mem);
MemoryContextSwitchTo(old_cxt);
tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
"jsonb_each temporary cxt",
Add macros to make AllocSetContextCreate() calls simpler and safer. I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls had typos in the context-sizing parameters. While none of these led to especially significant problems, they did create minor inefficiencies, and it's now clear that expecting people to copy-and-paste those calls accurately is not a great idea. Let's reduce the risk of future errors by introducing single macros that encapsulate the common use-cases. Three such macros are enough to cover all but two special-purpose contexts; those two calls can be left as-is, I think. While this patch doesn't in itself improve matters for third-party extensions, it doesn't break anything for them either, and they can gradually adopt the simplified notation over time. In passing, change TopMemoryContext to use the default allocation parameters. Formerly it could only be extended 8K at a time. That was probably reasonable when this code was written; but nowadays we create many more contexts than we did then, so that it's not unusual to have a couple hundred K in TopMemoryContext, even without considering various dubious code that sticks other things there. There seems no good reason not to let it use growing blocks like most other contexts. Back-patch to 9.6, mostly because that's still close enough to HEAD that it's easy to do so, and keeping the branches in sync can be expected to avoid some future back-patching pain. The bugs fixed by these changes don't seem to be significant enough to justify fixing them further back. Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 23:50:38 +02:00
ALLOCSET_DEFAULT_SIZES);
it = JsonbIteratorInit(&jb->root);
while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
{
skipNested = true;
if (r == WJB_KEY)
{
text *key;
HeapTuple tuple;
Datum values[2];
bool nulls[2] = {false, false};
/* Use the tmp context so we can clean up after each tuple is done */
old_cxt = MemoryContextSwitchTo(tmp_cxt);
key = cstring_to_text_with_len(v.val.string.val, v.val.string.len);
/*
* The next thing the iterator fetches should be the value, no
* matter what shape it is.
*/
r = JsonbIteratorNext(&it, &v, skipNested);
values[0] = PointerGetDatum(key);
if (as_text)
{
if (v.type == jbvNull)
{
/* a json null is an sql null in text mode */
nulls[1] = true;
values[1] = (Datum) NULL;
}
else
{
text *sv;
if (v.type == jbvString)
{
/* In text mode, scalar strings should be dequoted */
sv = cstring_to_text_with_len(v.val.string.val, v.val.string.len);
}
else
{
/* Turn anything else into a json string */
StringInfo jtext = makeStringInfo();
Jsonb *jb = JsonbValueToJsonb(&v);
(void) JsonbToCString(jtext, &jb->root, 0);
sv = cstring_to_text_with_len(jtext->data, jtext->len);
}
values[1] = PointerGetDatum(sv);
}
}
else
{
/* Not in text mode, just return the Jsonb */
Jsonb *val = JsonbValueToJsonb(&v);
values[1] = PointerGetDatum(val);
}
tuple = heap_form_tuple(ret_tdesc, values, nulls);
tuplestore_puttuple(tuple_store, tuple);
/* clean up and switch back */
MemoryContextSwitchTo(old_cxt);
MemoryContextReset(tmp_cxt);
}
}
MemoryContextDelete(tmp_cxt);
rsi->setResult = tuple_store;
rsi->setDesc = ret_tdesc;
PG_RETURN_NULL();
}
static Datum
each_worker(FunctionCallInfo fcinfo, bool as_text)
{
text *json = PG_GETARG_TEXT_PP(0);
JsonLexContext *lex;
JsonSemAction *sem;
ReturnSetInfo *rsi;
MemoryContext old_cxt;
TupleDesc tupdesc;
EachState *state;
lex = makeJsonLexContext(json, true);
state = palloc0(sizeof(EachState));
sem = palloc0(sizeof(JsonSemAction));
rsi = (ReturnSetInfo *) fcinfo->resultinfo;
if (!rsi || !IsA(rsi, ReturnSetInfo) ||
(rsi->allowedModes & SFRM_Materialize) == 0 ||
rsi->expectedDesc == NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that "
"cannot accept a set")));
rsi->returnMode = SFRM_Materialize;
(void) get_call_result_type(fcinfo, NULL, &tupdesc);
/* make these in a sufficiently long-lived memory context */
old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
state->ret_tdesc = CreateTupleDescCopy(tupdesc);
BlessTupleDesc(state->ret_tdesc);
state->tuple_store =
tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
false, work_mem);
MemoryContextSwitchTo(old_cxt);
sem->semstate = (void *) state;
sem->array_start = each_array_start;
sem->scalar = each_scalar;
sem->object_field_start = each_object_field_start;
sem->object_field_end = each_object_field_end;
state->normalize_results = as_text;
state->next_scalar = false;
state->lex = lex;
state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
"json_each temporary cxt",
Add macros to make AllocSetContextCreate() calls simpler and safer. I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls had typos in the context-sizing parameters. While none of these led to especially significant problems, they did create minor inefficiencies, and it's now clear that expecting people to copy-and-paste those calls accurately is not a great idea. Let's reduce the risk of future errors by introducing single macros that encapsulate the common use-cases. Three such macros are enough to cover all but two special-purpose contexts; those two calls can be left as-is, I think. While this patch doesn't in itself improve matters for third-party extensions, it doesn't break anything for them either, and they can gradually adopt the simplified notation over time. In passing, change TopMemoryContext to use the default allocation parameters. Formerly it could only be extended 8K at a time. That was probably reasonable when this code was written; but nowadays we create many more contexts than we did then, so that it's not unusual to have a couple hundred K in TopMemoryContext, even without considering various dubious code that sticks other things there. There seems no good reason not to let it use growing blocks like most other contexts. Back-patch to 9.6, mostly because that's still close enough to HEAD that it's easy to do so, and keeping the branches in sync can be expected to avoid some future back-patching pain. The bugs fixed by these changes don't seem to be significant enough to justify fixing them further back. Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 23:50:38 +02:00
ALLOCSET_DEFAULT_SIZES);
pg_parse_json(lex, sem);
2014-02-06 05:12:51 +01:00
MemoryContextDelete(state->tmp_cxt);
rsi->setResult = state->tuple_store;
rsi->setDesc = state->ret_tdesc;
PG_RETURN_NULL();
}
static void
each_object_field_start(void *state, char *fname, bool isnull)
{
EachState *_state = (EachState *) state;
/* save a pointer to where the value starts */
if (_state->lex->lex_level == 1)
{
/*
* next_scalar will be reset in the object_field_end handler, and
* since we know the value is a scalar there is no danger of it being
* on while recursing down the tree.
*/
if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
_state->next_scalar = true;
else
_state->result_start = _state->lex->token_start;
}
}
static void
each_object_field_end(void *state, char *fname, bool isnull)
{
EachState *_state = (EachState *) state;
MemoryContext old_cxt;
int len;
text *val;
HeapTuple tuple;
Datum values[2];
bool nulls[2] = {false, false};
/* skip over nested objects */
if (_state->lex->lex_level != 1)
return;
/* use the tmp context so we can clean up after each tuple is done */
old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
values[0] = CStringGetTextDatum(fname);
if (isnull && _state->normalize_results)
{
nulls[1] = true;
values[1] = (Datum) 0;
}
else if (_state->next_scalar)
{
values[1] = CStringGetTextDatum(_state->normalized_scalar);
_state->next_scalar = false;
}
else
{
len = _state->lex->prev_token_terminator - _state->result_start;
val = cstring_to_text_with_len(_state->result_start, len);
values[1] = PointerGetDatum(val);
}
tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
tuplestore_puttuple(_state->tuple_store, tuple);
/* clean up and switch back */
MemoryContextSwitchTo(old_cxt);
MemoryContextReset(_state->tmp_cxt);
}
static void
each_array_start(void *state)
{
EachState *_state = (EachState *) state;
/* json structure check */
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot deconstruct an array as an object")));
}
static void
each_scalar(void *state, char *token, JsonTokenType tokentype)
{
EachState *_state = (EachState *) state;
/* json structure check */
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot deconstruct a scalar")));
/* supply de-escaped value if required */
if (_state->next_scalar)
_state->normalized_scalar = token;
}
/*
* SQL functions json_array_elements and json_array_elements_text
*
* get the elements from a json array
*
* a lot of this processing is similar to the json_each* functions
*/
Datum
jsonb_array_elements(PG_FUNCTION_ARGS)
{
return elements_worker_jsonb(fcinfo, "jsonb_array_elements", false);
}
Datum
jsonb_array_elements_text(PG_FUNCTION_ARGS)
{
return elements_worker_jsonb(fcinfo, "jsonb_array_elements_text", true);
}
static Datum
elements_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname,
bool as_text)
{
Jsonb *jb = PG_GETARG_JSONB(0);
ReturnSetInfo *rsi;
Tuplestorestate *tuple_store;
TupleDesc tupdesc;
TupleDesc ret_tdesc;
MemoryContext old_cxt,
tmp_cxt;
bool skipNested = false;
JsonbIterator *it;
JsonbValue v;
JsonbIteratorToken r;
if (JB_ROOT_IS_SCALAR(jb))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot extract elements from a scalar")));
else if (!JB_ROOT_IS_ARRAY(jb))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot extract elements from an object")));
rsi = (ReturnSetInfo *) fcinfo->resultinfo;
if (!rsi || !IsA(rsi, ReturnSetInfo) ||
(rsi->allowedModes & SFRM_Materialize) == 0 ||
rsi->expectedDesc == NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that "
"cannot accept a set")));
rsi->returnMode = SFRM_Materialize;
/* it's a simple type, so don't use get_call_result_type() */
tupdesc = rsi->expectedDesc;
old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
ret_tdesc = CreateTupleDescCopy(tupdesc);
BlessTupleDesc(ret_tdesc);
tuple_store =
tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
false, work_mem);
MemoryContextSwitchTo(old_cxt);
tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
"jsonb_array_elements temporary cxt",
Add macros to make AllocSetContextCreate() calls simpler and safer. I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls had typos in the context-sizing parameters. While none of these led to especially significant problems, they did create minor inefficiencies, and it's now clear that expecting people to copy-and-paste those calls accurately is not a great idea. Let's reduce the risk of future errors by introducing single macros that encapsulate the common use-cases. Three such macros are enough to cover all but two special-purpose contexts; those two calls can be left as-is, I think. While this patch doesn't in itself improve matters for third-party extensions, it doesn't break anything for them either, and they can gradually adopt the simplified notation over time. In passing, change TopMemoryContext to use the default allocation parameters. Formerly it could only be extended 8K at a time. That was probably reasonable when this code was written; but nowadays we create many more contexts than we did then, so that it's not unusual to have a couple hundred K in TopMemoryContext, even without considering various dubious code that sticks other things there. There seems no good reason not to let it use growing blocks like most other contexts. Back-patch to 9.6, mostly because that's still close enough to HEAD that it's easy to do so, and keeping the branches in sync can be expected to avoid some future back-patching pain. The bugs fixed by these changes don't seem to be significant enough to justify fixing them further back. Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 23:50:38 +02:00
ALLOCSET_DEFAULT_SIZES);
it = JsonbIteratorInit(&jb->root);
while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
{
skipNested = true;
if (r == WJB_ELEM)
{
HeapTuple tuple;
Datum values[1];
bool nulls[1] = {false};
/* use the tmp context so we can clean up after each tuple is done */
old_cxt = MemoryContextSwitchTo(tmp_cxt);
if (!as_text)
{
Jsonb *val = JsonbValueToJsonb(&v);
values[0] = PointerGetDatum(val);
}
else
{
if (v.type == jbvNull)
{
/* a json null is an sql null in text mode */
nulls[0] = true;
values[0] = (Datum) NULL;
}
else
{
text *sv;
if (v.type == jbvString)
{
/* in text mode scalar strings should be dequoted */
sv = cstring_to_text_with_len(v.val.string.val, v.val.string.len);
}
else
{
/* turn anything else into a json string */
StringInfo jtext = makeStringInfo();
Jsonb *jb = JsonbValueToJsonb(&v);
(void) JsonbToCString(jtext, &jb->root, 0);
sv = cstring_to_text_with_len(jtext->data, jtext->len);
}
values[0] = PointerGetDatum(sv);
}
}
tuple = heap_form_tuple(ret_tdesc, values, nulls);
tuplestore_puttuple(tuple_store, tuple);
/* clean up and switch back */
MemoryContextSwitchTo(old_cxt);
MemoryContextReset(tmp_cxt);
}
}
MemoryContextDelete(tmp_cxt);
rsi->setResult = tuple_store;
rsi->setDesc = ret_tdesc;
PG_RETURN_NULL();
}
Datum
json_array_elements(PG_FUNCTION_ARGS)
{
return elements_worker(fcinfo, "json_array_elements", false);
}
Datum
json_array_elements_text(PG_FUNCTION_ARGS)
{
return elements_worker(fcinfo, "json_array_elements_text", true);
}
static Datum
elements_worker(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
{
text *json = PG_GETARG_TEXT_PP(0);
/* elements only needs escaped strings when as_text */
JsonLexContext *lex = makeJsonLexContext(json, as_text);
JsonSemAction *sem;
ReturnSetInfo *rsi;
MemoryContext old_cxt;
TupleDesc tupdesc;
ElementsState *state;
state = palloc0(sizeof(ElementsState));
sem = palloc0(sizeof(JsonSemAction));
rsi = (ReturnSetInfo *) fcinfo->resultinfo;
if (!rsi || !IsA(rsi, ReturnSetInfo) ||
(rsi->allowedModes & SFRM_Materialize) == 0 ||
rsi->expectedDesc == NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that "
"cannot accept a set")));
rsi->returnMode = SFRM_Materialize;
/* it's a simple type, so don't use get_call_result_type() */
tupdesc = rsi->expectedDesc;
/* make these in a sufficiently long-lived memory context */
old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
state->ret_tdesc = CreateTupleDescCopy(tupdesc);
BlessTupleDesc(state->ret_tdesc);
state->tuple_store =
tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
false, work_mem);
MemoryContextSwitchTo(old_cxt);
sem->semstate = (void *) state;
sem->object_start = elements_object_start;
sem->scalar = elements_scalar;
sem->array_element_start = elements_array_element_start;
sem->array_element_end = elements_array_element_end;
state->function_name = funcname;
state->normalize_results = as_text;
state->next_scalar = false;
state->lex = lex;
state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
"json_array_elements temporary cxt",
Add macros to make AllocSetContextCreate() calls simpler and safer. I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls had typos in the context-sizing parameters. While none of these led to especially significant problems, they did create minor inefficiencies, and it's now clear that expecting people to copy-and-paste those calls accurately is not a great idea. Let's reduce the risk of future errors by introducing single macros that encapsulate the common use-cases. Three such macros are enough to cover all but two special-purpose contexts; those two calls can be left as-is, I think. While this patch doesn't in itself improve matters for third-party extensions, it doesn't break anything for them either, and they can gradually adopt the simplified notation over time. In passing, change TopMemoryContext to use the default allocation parameters. Formerly it could only be extended 8K at a time. That was probably reasonable when this code was written; but nowadays we create many more contexts than we did then, so that it's not unusual to have a couple hundred K in TopMemoryContext, even without considering various dubious code that sticks other things there. There seems no good reason not to let it use growing blocks like most other contexts. Back-patch to 9.6, mostly because that's still close enough to HEAD that it's easy to do so, and keeping the branches in sync can be expected to avoid some future back-patching pain. The bugs fixed by these changes don't seem to be significant enough to justify fixing them further back. Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 23:50:38 +02:00
ALLOCSET_DEFAULT_SIZES);
pg_parse_json(lex, sem);
2014-02-06 05:12:51 +01:00
MemoryContextDelete(state->tmp_cxt);
rsi->setResult = state->tuple_store;
rsi->setDesc = state->ret_tdesc;
PG_RETURN_NULL();
}
static void
elements_array_element_start(void *state, bool isnull)
{
ElementsState *_state = (ElementsState *) state;
/* save a pointer to where the value starts */
if (_state->lex->lex_level == 1)
{
/*
* next_scalar will be reset in the array_element_end handler, and
* since we know the value is a scalar there is no danger of it being
* on while recursing down the tree.
*/
if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
_state->next_scalar = true;
else
_state->result_start = _state->lex->token_start;
}
}
static void
elements_array_element_end(void *state, bool isnull)
{
ElementsState *_state = (ElementsState *) state;
MemoryContext old_cxt;
int len;
text *val;
HeapTuple tuple;
Datum values[1];
bool nulls[1] = {false};
/* skip over nested objects */
if (_state->lex->lex_level != 1)
return;
/* use the tmp context so we can clean up after each tuple is done */
old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
if (isnull && _state->normalize_results)
{
nulls[0] = true;
values[0] = (Datum) NULL;
}
else if (_state->next_scalar)
{
values[0] = CStringGetTextDatum(_state->normalized_scalar);
_state->next_scalar = false;
}
else
{
len = _state->lex->prev_token_terminator - _state->result_start;
val = cstring_to_text_with_len(_state->result_start, len);
values[0] = PointerGetDatum(val);
}
tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
tuplestore_puttuple(_state->tuple_store, tuple);
/* clean up and switch back */
MemoryContextSwitchTo(old_cxt);
MemoryContextReset(_state->tmp_cxt);
}
static void
elements_object_start(void *state)
{
ElementsState *_state = (ElementsState *) state;
/* json structure check */
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on a non-array",
_state->function_name)));
}
static void
elements_scalar(void *state, char *token, JsonTokenType tokentype)
{
ElementsState *_state = (ElementsState *) state;
/* json structure check */
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on a scalar",
_state->function_name)));
/* supply de-escaped value if required */
if (_state->next_scalar)
_state->normalized_scalar = token;
}
/*
* SQL function json_populate_record
*
* set fields in a record from the argument json
*
* Code adapted shamelessly from hstore's populate_record
* which is in turn partly adapted from record_out.
*
* The json is decomposed into a hash table, in which each
* field in the record is then looked up by name. For jsonb
* we fetch the values direct from the object.
*/
Datum
jsonb_populate_record(PG_FUNCTION_ARGS)
{
return populate_record_worker(fcinfo, "jsonb_populate_record", true);
}
Datum
jsonb_to_record(PG_FUNCTION_ARGS)
{
return populate_record_worker(fcinfo, "jsonb_to_record", false);
}
Datum
json_populate_record(PG_FUNCTION_ARGS)
{
return populate_record_worker(fcinfo, "json_populate_record", true);
}
Datum
json_to_record(PG_FUNCTION_ARGS)
{
return populate_record_worker(fcinfo, "json_to_record", false);
}
/* helper function for diagnostics */
static void
populate_array_report_expected_array(PopulateArrayContext *ctx, int ndim)
{
if (ndim <= 0)
{
if (ctx->colname)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("expected json array"),
errhint("see the value of key \"%s\"", ctx->colname)));
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("expected json array")));
}
else
{
StringInfoData indices;
int i;
initStringInfo(&indices);
Assert(ctx->ndims > 0 && ndim < ctx->ndims);
for (i = 0; i < ndim; i++)
appendStringInfo(&indices, "[%d]", ctx->sizes[i]);
if (ctx->colname)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("expected json array"),
errhint("see the array element %s of key \"%s\"",
indices.data, ctx->colname)));
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("expected json array"),
errhint("see the array element %s",
indices.data)));
}
}
/* set the number of dimensions of the populated array when it becomes known */
static void
populate_array_assign_ndims(PopulateArrayContext *ctx, int ndims)
{
int i;
Assert(ctx->ndims <= 0);
if (ndims <= 0)
populate_array_report_expected_array(ctx, ndims);
ctx->ndims = ndims;
ctx->dims = palloc(sizeof(int) * ndims);
ctx->sizes = palloc0(sizeof(int) * ndims);
for (i = 0; i < ndims; i++)
ctx->dims[i] = -1; /* dimensions are unknown yet */
}
/* check the populated subarray dimension */
static void
populate_array_check_dimension(PopulateArrayContext *ctx, int ndim)
{
int dim = ctx->sizes[ndim]; /* current dimension counter */
if (ctx->dims[ndim] == -1)
ctx->dims[ndim] = dim; /* assign dimension if not yet known */
else if (ctx->dims[ndim] != dim)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed json array"),
errdetail("Multidimensional arrays must have "
"sub-arrays with matching dimensions.")));
/* reset the current array dimension size counter */
ctx->sizes[ndim] = 0;
/* increment the parent dimension counter if it is a nested sub-array */
if (ndim > 0)
ctx->sizes[ndim - 1]++;
}
static void
populate_array_element(PopulateArrayContext *ctx, int ndim, JsValue *jsv)
{
Datum element;
bool element_isnull;
/* populate the array element */
element = populate_record_field(ctx->aio->element_info,
ctx->aio->element_type,
ctx->aio->element_typmod,
NULL, ctx->mcxt, PointerGetDatum(NULL),
jsv, &element_isnull);
accumArrayResult(ctx->astate, element, element_isnull,
ctx->aio->element_type, ctx->acxt);
Assert(ndim > 0);
ctx->sizes[ndim - 1]++; /* increment current dimension counter */
}
/* json object start handler for populate_array_json() */
static void
populate_array_object_start(void *_state)
{
PopulateArrayState *state = (PopulateArrayState *) _state;
int ndim = state->lex->lex_level;
if (state->ctx->ndims <= 0)
populate_array_assign_ndims(state->ctx, ndim);
else if (ndim < state->ctx->ndims)
populate_array_report_expected_array(state->ctx, ndim);
}
/* json array end handler for populate_array_json() */
static void
populate_array_array_end(void *_state)
{
PopulateArrayState *state = (PopulateArrayState *) _state;
PopulateArrayContext *ctx = state->ctx;
int ndim = state->lex->lex_level;
if (ctx->ndims <= 0)
populate_array_assign_ndims(ctx, ndim + 1);
if (ndim < ctx->ndims)
populate_array_check_dimension(ctx, ndim);
}
/* json array element start handler for populate_array_json() */
static void
populate_array_element_start(void *_state, bool isnull)
{
PopulateArrayState *state = (PopulateArrayState *) _state;
int ndim = state->lex->lex_level;
if (state->ctx->ndims <= 0 || ndim == state->ctx->ndims)
{
/* remember current array element start */
state->element_start = state->lex->token_start;
state->element_type = state->lex->token_type;
state->element_scalar = NULL;
}
}
/* json array element end handler for populate_array_json() */
static void
populate_array_element_end(void *_state, bool isnull)
{
PopulateArrayState *state = (PopulateArrayState *) _state;
PopulateArrayContext *ctx = state->ctx;
int ndim = state->lex->lex_level;
Assert(ctx->ndims > 0);
if (ndim == ctx->ndims)
{
JsValue jsv;
jsv.is_json = true;
jsv.val.json.type = state->element_type;
if (isnull)
{
Assert(jsv.val.json.type == JSON_TOKEN_NULL);
jsv.val.json.str = NULL;
jsv.val.json.len = 0;
}
else if (state->element_scalar)
{
jsv.val.json.str = state->element_scalar;
jsv.val.json.len = -1; /* null-terminated */
}
else
{
jsv.val.json.str = state->element_start;
jsv.val.json.len = (state->lex->prev_token_terminator -
state->element_start) * sizeof(char);
}
populate_array_element(ctx, ndim, &jsv);
}
}
/* json scalar handler for populate_array_json() */
static void
populate_array_scalar(void *_state, char *token, JsonTokenType tokentype)
{
PopulateArrayState *state = (PopulateArrayState *) _state;
PopulateArrayContext *ctx = state->ctx;
int ndim = state->lex->lex_level;
if (ctx->ndims <= 0)
populate_array_assign_ndims(ctx, ndim);
else if (ndim < ctx->ndims)
populate_array_report_expected_array(ctx, ndim);
if (ndim == ctx->ndims)
{
/* remember the scalar element token */
state->element_scalar = token;
/* element_type must already be set in populate_array_element_start() */
Assert(state->element_type == tokentype);
}
}
/* parse a json array and populate array */
static void
populate_array_json(PopulateArrayContext *ctx, char *json, int len)
{
PopulateArrayState state;
JsonSemAction sem;
state.lex = makeJsonLexContextCstringLen(json, len, true);
state.ctx = ctx;
memset(&sem, 0, sizeof(sem));
sem.semstate = (void *) &state;
sem.object_start = populate_array_object_start;
sem.array_end = populate_array_array_end;
sem.array_element_start = populate_array_element_start;
sem.array_element_end = populate_array_element_end;
sem.scalar = populate_array_scalar;
pg_parse_json(state.lex, &sem);
/* number of dimensions should be already known */
Assert(ctx->ndims > 0 && ctx->dims);
pfree(state.lex);
}
/*
* populate_array_dim_jsonb() -- Iterate recursively through jsonb sub-array
* elements and accumulate result using given ArrayBuildState.
*/
static void
populate_array_dim_jsonb(PopulateArrayContext *ctx, /* context */
JsonbValue *jbv, /* jsonb sub-array */
int ndim) /* current dimension */
{
JsonbContainer *jbc = jbv->val.binary.data;
JsonbIterator *it;
JsonbIteratorToken tok;
JsonbValue val;
JsValue jsv;
check_stack_depth();
if (jbv->type != jbvBinary || !JsonContainerIsArray(jbc))
populate_array_report_expected_array(ctx, ndim - 1);
Assert(!JsonContainerIsScalar(jbc));
it = JsonbIteratorInit(jbc);
tok = JsonbIteratorNext(&it, &val, true);
Assert(tok == WJB_BEGIN_ARRAY);
tok = JsonbIteratorNext(&it, &val, true);
/*
* If the number of dimensions is not yet known and we have found end of
* the array, or the first child element is not an array, then assign the
* number of dimensions now.
*/
if (ctx->ndims <= 0 &&
(tok == WJB_END_ARRAY ||
(tok == WJB_ELEM &&
(val.type != jbvBinary ||
!JsonContainerIsArray(val.val.binary.data)))))
populate_array_assign_ndims(ctx, ndim);
jsv.is_json = false;
jsv.val.jsonb = &val;
/* process all the array elements */
while (tok == WJB_ELEM)
{
/*
* Recurse only if the dimensions of dimensions is still unknown or if
* it is not the innermost dimension.
*/
if (ctx->ndims > 0 && ndim >= ctx->ndims)
populate_array_element(ctx, ndim, &jsv);
else
{
/* populate child sub-array */
populate_array_dim_jsonb(ctx, &val, ndim + 1);
/* number of dimensions should be already known */
Assert(ctx->ndims > 0 && ctx->dims);
populate_array_check_dimension(ctx, ndim);
}
tok = JsonbIteratorNext(&it, &val, true);
}
Assert(tok == WJB_END_ARRAY);
/* free iterator, iterating until WJB_DONE */
tok = JsonbIteratorNext(&it, &val, true);
Assert(tok == WJB_DONE && !it);
}
/* recursively populate an array from json/jsonb */
static Datum
populate_array(ArrayIOData *aio,
const char *colname,
MemoryContext mcxt,
JsValue *jsv)
{
PopulateArrayContext ctx;
Datum result;
int *lbs;
int i;
ctx.aio = aio;
ctx.mcxt = mcxt;
ctx.acxt = CurrentMemoryContext;
ctx.astate = initArrayResult(aio->element_type, ctx.acxt, true);
ctx.colname = colname;
ctx.ndims = 0; /* unknown yet */
ctx.dims = NULL;
ctx.sizes = NULL;
if (jsv->is_json)
populate_array_json(&ctx, jsv->val.json.str,
jsv->val.json.len >= 0 ? jsv->val.json.len
: strlen(jsv->val.json.str));
else
{
populate_array_dim_jsonb(&ctx, jsv->val.jsonb, 1);
ctx.dims[0] = ctx.sizes[0];
}
Assert(ctx.ndims > 0);
lbs = palloc(sizeof(int) * ctx.ndims);
for (i = 0; i < ctx.ndims; i++)
lbs[i] = 1;
result = makeMdArrayResult(ctx.astate, ctx.ndims, ctx.dims, lbs,
ctx.acxt, true);
pfree(ctx.dims);
pfree(ctx.sizes);
pfree(lbs);
return result;
}
static void
JsValueToJsObject(JsValue *jsv, JsObject *jso)
{
jso->is_json = jsv->is_json;
if (jsv->is_json)
{
/* convert plain-text json into a hash table */
jso->val.json_hash =
get_json_object_as_hash(jsv->val.json.str,
jsv->val.json.len >= 0
? jsv->val.json.len
: strlen(jsv->val.json.str),
"populate_composite");
}
else
{
JsonbValue *jbv = jsv->val.jsonb;
if (jbv->type == jbvBinary &&
JsonContainerIsObject(jbv->val.binary.data))
jso->val.jsonb_cont = jbv->val.binary.data;
else
jso->val.jsonb_cont = NULL;
}
}
/* recursively populate a composite (row type) value from json/jsonb */
static Datum
populate_composite(CompositeIOData *io,
Oid typid,
int32 typmod,
const char *colname,
MemoryContext mcxt,
HeapTupleHeader defaultval,
JsValue *jsv)
{
HeapTupleHeader tuple;
JsObject jso;
/* acquire cached tuple descriptor */
if (!io->tupdesc ||
io->tupdesc->tdtypeid != typid ||
io->tupdesc->tdtypmod != typmod)
{
TupleDesc tupdesc = lookup_rowtype_tupdesc(typid, typmod);
MemoryContext oldcxt;
if (io->tupdesc)
FreeTupleDesc(io->tupdesc);
/* copy tuple desc without constraints into cache memory context */
oldcxt = MemoryContextSwitchTo(mcxt);
io->tupdesc = CreateTupleDescCopy(tupdesc);
MemoryContextSwitchTo(oldcxt);
ReleaseTupleDesc(tupdesc);
}
/* prepare input value */
JsValueToJsObject(jsv, &jso);
/* populate resulting record tuple */
tuple = populate_record(io->tupdesc, &io->record_io,
defaultval, mcxt, &jso);
JsObjectFree(&jso);
return HeapTupleHeaderGetDatum(tuple);
}
/* populate non-null scalar value from json/jsonb value */
static Datum
populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv)
{
Datum res;
char *str = NULL;
char *json = NULL;
if (jsv->is_json)
{
int len = jsv->val.json.len;
json = jsv->val.json.str;
Assert(json);
/* already done the hard work in the json case */
if ((typid == JSONOID || typid == JSONBOID) &&
jsv->val.json.type == JSON_TOKEN_STRING)
{
/*
* Add quotes around string value (should be already escaped) if
* converting to json/jsonb.
*/
if (len < 0)
len = strlen(json);
str = palloc(len + sizeof(char) * 3);
str[0] = '"';
memcpy(&str[1], json, len);
str[len + 1] = '"';
str[len + 2] = '\0';
}
else if (len >= 0)
{
/* Need to copy non-null-terminated string */
str = palloc(len + 1 * sizeof(char));
memcpy(str, json, len);
str[len] = '\0';
}
else
str = json; /* null-terminated string */
}
else
{
JsonbValue *jbv = jsv->val.jsonb;
if (typid == JSONBOID)
{
Jsonb *jsonb = JsonbValueToJsonb(jbv); /* directly use jsonb */
return JsonbGetDatum(jsonb);
}
/* convert jsonb to string for typio call */
else if (typid == JSONOID && jbv->type != jbvBinary)
{
/*
* Convert scalar jsonb (non-scalars are passed here as jbvBinary)
* to json string, preserving quotes around top-level strings.
*/
Jsonb *jsonb = JsonbValueToJsonb(jbv);
str = JsonbToCString(NULL, &jsonb->root, VARSIZE(jsonb));
}
else if (jbv->type == jbvString) /* quotes are stripped */
str = pnstrdup(jbv->val.string.val, jbv->val.string.len);
else if (jbv->type == jbvBool)
str = pstrdup(jbv->val.boolean ? "true" : "false");
else if (jbv->type == jbvNumeric)
str = DatumGetCString(DirectFunctionCall1(numeric_out,
PointerGetDatum(jbv->val.numeric)));
else if (jbv->type == jbvBinary)
str = JsonbToCString(NULL, jbv->val.binary.data,
jbv->val.binary.len);
else
elog(ERROR, "unrecognized jsonb type: %d", (int) jbv->type);
}
res = InputFunctionCall(&io->typiofunc, str, io->typioparam, typmod);
/* free temporary buffer */
if (str != json)
pfree(str);
return res;
}
static Datum
populate_domain(DomainIOData *io,
Oid typid,
const char *colname,
MemoryContext mcxt,
JsValue *jsv,
bool isnull)
{
Datum res;
if (isnull)
res = (Datum) 0;
else
{
res = populate_record_field(io->base_io,
io->base_typid, io->base_typmod,
colname, mcxt, PointerGetDatum(NULL),
jsv, &isnull);
Assert(!isnull);
}
domain_check(res, isnull, typid, &io->domain_info, mcxt);
return res;
}
/* prepare column metadata cache for the given type */
static void
prepare_column_cache(ColumnIOData *column,
Oid typid,
int32 typmod,
MemoryContext mcxt,
bool json)
{
HeapTuple tup;
Form_pg_type type;
column->typid = typid;
column->typmod = typmod;
tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for type %u", typid);
type = (Form_pg_type) GETSTRUCT(tup);
if (type->typtype == TYPTYPE_DOMAIN)
{
column->typcat = TYPECAT_DOMAIN;
column->io.domain.base_typid = type->typbasetype;
column->io.domain.base_typmod = type->typtypmod;
column->io.domain.base_io = MemoryContextAllocZero(mcxt,
sizeof(ColumnIOData));
column->io.domain.domain_info = NULL;
}
else if (type->typtype == TYPTYPE_COMPOSITE || typid == RECORDOID)
{
column->typcat = TYPECAT_COMPOSITE;
column->io.composite.record_io = NULL;
column->io.composite.tupdesc = NULL;
}
else if (type->typlen == -1 && OidIsValid(type->typelem))
{
column->typcat = TYPECAT_ARRAY;
column->io.array.element_info = MemoryContextAllocZero(mcxt,
sizeof(ColumnIOData));
column->io.array.element_type = type->typelem;
/* array element typemod stored in attribute's typmod */
column->io.array.element_typmod = typmod;
}
else
column->typcat = TYPECAT_SCALAR;
/* don't need input function when converting from jsonb to jsonb */
if (json || typid != JSONBOID)
{
Oid typioproc;
getTypeInputInfo(typid, &typioproc, &column->scalar_io.typioparam);
fmgr_info_cxt(typioproc, &column->scalar_io.typiofunc, mcxt);
}
ReleaseSysCache(tup);
}
/* recursively populate a record field or an array element from a json/jsonb value */
static Datum
populate_record_field(ColumnIOData *col,
Oid typid,
int32 typmod,
const char *colname,
MemoryContext mcxt,
Datum defaultval,
JsValue *jsv,
bool *isnull)
{
TypeCat typcat;
check_stack_depth();
/* prepare column metadata cache for the given type */
if (col->typid != typid || col->typmod != typmod)
prepare_column_cache(col, typid, typmod, mcxt, jsv->is_json);
*isnull = JsValueIsNull(jsv);
typcat = col->typcat;
/* try to convert json string to a non-scalar type through input function */
if (JsValueIsString(jsv) &&
(typcat == TYPECAT_ARRAY || typcat == TYPECAT_COMPOSITE))
typcat = TYPECAT_SCALAR;
/* we must perform domain checks for NULLs */
if (*isnull && typcat != TYPECAT_DOMAIN)
return (Datum) 0;
switch (typcat)
{
case TYPECAT_SCALAR:
return populate_scalar(&col->scalar_io, typid, typmod, jsv);
case TYPECAT_ARRAY:
return populate_array(&col->io.array, colname, mcxt, jsv);
case TYPECAT_COMPOSITE:
return populate_composite(&col->io.composite, typid, typmod,
colname, mcxt,
DatumGetPointer(defaultval)
? DatumGetHeapTupleHeader(defaultval)
: NULL,
jsv);
case TYPECAT_DOMAIN:
return populate_domain(&col->io.domain, typid, colname, mcxt,
jsv, *isnull);
default:
elog(ERROR, "unrecognized type category '%c'", typcat);
return (Datum) 0;
}
}
static RecordIOData *
allocate_record_info(MemoryContext mcxt, int ncolumns)
{
RecordIOData *data = (RecordIOData *)
MemoryContextAlloc(mcxt,
offsetof(RecordIOData, columns) +
ncolumns * sizeof(ColumnIOData));
data->record_type = InvalidOid;
data->record_typmod = 0;
data->ncolumns = ncolumns;
MemSet(data->columns, 0, sizeof(ColumnIOData) * ncolumns);
return data;
}
static bool
JsObjectGetField(JsObject *obj, char *field, JsValue *jsv)
{
jsv->is_json = obj->is_json;
if (jsv->is_json)
{
JsonHashEntry *hashentry = hash_search(obj->val.json_hash, field,
HASH_FIND, NULL);
jsv->val.json.type = hashentry ? hashentry->type : JSON_TOKEN_NULL;
jsv->val.json.str = jsv->val.json.type == JSON_TOKEN_NULL ? NULL :
hashentry->val;
jsv->val.json.len = jsv->val.json.str ? -1 : 0; /* null-terminated */
return hashentry != NULL;
}
else
{
jsv->val.jsonb = !obj->val.jsonb_cont ? NULL :
findJsonbValueFromContainerLen(obj->val.jsonb_cont, JB_FOBJECT,
field, strlen(field));
return jsv->val.jsonb != NULL;
}
}
/* populate a record tuple from json/jsonb value */
static HeapTupleHeader
populate_record(TupleDesc tupdesc,
RecordIOData **record_p,
HeapTupleHeader defaultval,
MemoryContext mcxt,
JsObject *obj)
{
RecordIOData *record = *record_p;
Datum *values;
bool *nulls;
HeapTuple res;
int ncolumns = tupdesc->natts;
int i;
/*
* if the input json is empty, we can only skip the rest if we were passed
* in a non-null record, since otherwise there may be issues with domain
* nulls.
*/
if (defaultval && JsObjectIsEmpty(obj))
return defaultval;
/* (re)allocate metadata cache */
if (record == NULL ||
record->ncolumns != ncolumns)
*record_p = record = allocate_record_info(mcxt, ncolumns);
/* invalidate metadata cache if the record type has changed */
if (record->record_type != tupdesc->tdtypeid ||
record->record_typmod != tupdesc->tdtypmod)
{
MemSet(record, 0, offsetof(RecordIOData, columns) +
ncolumns * sizeof(ColumnIOData));
record->record_type = tupdesc->tdtypeid;
record->record_typmod = tupdesc->tdtypmod;
record->ncolumns = ncolumns;
}
values = (Datum *) palloc(ncolumns * sizeof(Datum));
nulls = (bool *) palloc(ncolumns * sizeof(bool));
if (defaultval)
{
HeapTupleData tuple;
/* Build a temporary HeapTuple control structure */
tuple.t_len = HeapTupleHeaderGetDatumLength(defaultval);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = defaultval;
/* Break down the tuple into fields */
heap_deform_tuple(&tuple, tupdesc, values, nulls);
}
else
{
for (i = 0; i < ncolumns; ++i)
{
values[i] = (Datum) 0;
nulls[i] = true;
}
}
for (i = 0; i < ncolumns; ++i)
{
Form_pg_attribute att = tupdesc->attrs[i];
char *colname = NameStr(att->attname);
JsValue field = {0};
bool found;
/* Ignore dropped columns in datatype */
if (att->attisdropped)
{
nulls[i] = true;
continue;
}
found = JsObjectGetField(obj, colname, &field);
/*
* we can't just skip here if the key wasn't found since we might have
* a domain to deal with. If we were passed in a non-null record
* datum, we assume that the existing values are valid (if they're
* not, then it's not our fault), but if we were passed in a null,
* then every field which we don't populate needs to be run through
* the input function just in case it's a domain type.
*/
if (defaultval && !found)
continue;
values[i] = populate_record_field(&record->columns[i],
att->atttypid,
att->atttypmod,
colname,
mcxt,
nulls[i] ? (Datum) 0 : values[i],
&field,
&nulls[i]);
}
res = heap_form_tuple(tupdesc, values, nulls);
pfree(values);
pfree(nulls);
return res->t_data;
}
static Datum
populate_record_worker(FunctionCallInfo fcinfo, const char *funcname,
bool have_record_arg)
{
int json_arg_num = have_record_arg ? 1 : 0;
Oid jtype = get_fn_expr_argtype(fcinfo->flinfo, json_arg_num);
JsValue jsv = {0};
HeapTupleHeader rec = NULL;
Oid tupType;
int32 tupTypmod;
TupleDesc tupdesc = NULL;
Datum rettuple;
JsonbValue jbv;
MemoryContext fnmcxt = fcinfo->flinfo->fn_mcxt;
PopulateRecordCache *cache = fcinfo->flinfo->fn_extra;
Assert(jtype == JSONOID || jtype == JSONBOID);
/*
* We arrange to look up the needed I/O info just once per series of
* calls, assuming the record type doesn't change underneath us.
*/
if (!cache)
fcinfo->flinfo->fn_extra = cache =
MemoryContextAllocZero(fnmcxt, sizeof(*cache));
if (have_record_arg)
{
Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
if (cache->argtype != argtype)
{
if (!type_is_rowtype(argtype))
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("first argument of %s must be a row type",
funcname)));
cache->argtype = argtype;
}
if (PG_ARGISNULL(0))
{
if (PG_ARGISNULL(1))
PG_RETURN_NULL();
/*
* We have no tuple to look at, so the only source of type info is
* the argtype. The lookup_rowtype_tupdesc call below will error
* out if we don't have a known composite type oid here.
*/
tupType = argtype;
tupTypmod = -1;
}
else
{
rec = PG_GETARG_HEAPTUPLEHEADER(0);
if (PG_ARGISNULL(1))
PG_RETURN_POINTER(rec);
/* Extract type info from the tuple itself */
tupType = HeapTupleHeaderGetTypeId(rec);
tupTypmod = HeapTupleHeaderGetTypMod(rec);
}
}
else
{
/* json{b}_to_record case */
if (PG_ARGISNULL(0))
PG_RETURN_NULL();
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("function returning record called in context "
"that cannot accept type record"),
errhint("Try calling the function in the FROM clause "
"using a column definition list.")));
Assert(tupdesc);
/*
* Add tupdesc to the cache and set the appropriate values of
* tupType/tupTypmod for proper cache usage in populate_composite().
*/
cache->io.tupdesc = tupdesc;
tupType = tupdesc->tdtypeid;
tupTypmod = tupdesc->tdtypmod;
}
jsv.is_json = jtype == JSONOID;
if (jsv.is_json)
{
text *json = PG_GETARG_TEXT_PP(json_arg_num);
jsv.val.json.str = VARDATA_ANY(json);
jsv.val.json.len = VARSIZE_ANY_EXHDR(json);
jsv.val.json.type = JSON_TOKEN_INVALID; /* not used in
* populate_composite() */
}
else
{
Jsonb *jb = PG_GETARG_JSONB(json_arg_num);
jsv.val.jsonb = &jbv;
/* fill binary jsonb value pointing to jb */
jbv.type = jbvBinary;
jbv.val.binary.data = &jb->root;
jbv.val.binary.len = VARSIZE(jb) - VARHDRSZ;
}
rettuple = populate_composite(&cache->io, tupType, tupTypmod,
NULL, fnmcxt, rec, &jsv);
if (tupdesc)
{
cache->io.tupdesc = NULL;
ReleaseTupleDesc(tupdesc);
}
PG_RETURN_DATUM(rettuple);
}
/*
* get_json_object_as_hash
*
* decompose a json object into a hash table.
*/
static HTAB *
get_json_object_as_hash(char *json, int len, const char *funcname)
{
HASHCTL ctl;
HTAB *tab;
JHashState *state;
JsonLexContext *lex = makeJsonLexContextCstringLen(json, len, true);
JsonSemAction *sem;
memset(&ctl, 0, sizeof(ctl));
ctl.keysize = NAMEDATALEN;
ctl.entrysize = sizeof(JsonHashEntry);
ctl.hcxt = CurrentMemoryContext;
tab = hash_create("json object hashtable",
100,
&ctl,
HASH_ELEM | HASH_CONTEXT);
state = palloc0(sizeof(JHashState));
sem = palloc0(sizeof(JsonSemAction));
state->function_name = funcname;
state->hash = tab;
state->lex = lex;
sem->semstate = (void *) state;
sem->array_start = hash_array_start;
sem->scalar = hash_scalar;
sem->object_field_start = hash_object_field_start;
sem->object_field_end = hash_object_field_end;
pg_parse_json(lex, sem);
return tab;
}
static void
hash_object_field_start(void *state, char *fname, bool isnull)
{
JHashState *_state = (JHashState *) state;
if (_state->lex->lex_level > 1)
return;
/* remember token type */
_state->saved_token_type = _state->lex->token_type;
if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
_state->lex->token_type == JSON_TOKEN_OBJECT_START)
{
/* remember start position of the whole text of the subobject */
_state->save_json_start = _state->lex->token_start;
}
else
{
/* must be a scalar */
_state->save_json_start = NULL;
}
}
static void
hash_object_field_end(void *state, char *fname, bool isnull)
{
JHashState *_state = (JHashState *) state;
JsonHashEntry *hashentry;
bool found;
/*
* Ignore nested fields.
*/
if (_state->lex->lex_level > 1)
return;
/*
* Ignore field names >= NAMEDATALEN - they can't match a record field.
* (Note: without this test, the hash code would truncate the string at
* NAMEDATALEN-1, and could then match against a similarly-truncated
* record field name. That would be a reasonable behavior, but this code
* has previously insisted on exact equality, so we keep this behavior.)
*/
if (strlen(fname) >= NAMEDATALEN)
return;
hashentry = hash_search(_state->hash, fname, HASH_ENTER, &found);
/*
* found being true indicates a duplicate. We don't do anything about
* that, a later field with the same name overrides the earlier field.
*/
hashentry->type = _state->saved_token_type;
Assert(isnull == (hashentry->type == JSON_TOKEN_NULL));
if (_state->save_json_start != NULL)
{
int len = _state->lex->prev_token_terminator - _state->save_json_start;
char *val = palloc((len + 1) * sizeof(char));
memcpy(val, _state->save_json_start, len);
val[len] = '\0';
hashentry->val = val;
}
else
{
/* must have had a scalar instead */
hashentry->val = _state->saved_scalar;
}
}
static void
hash_array_start(void *state)
{
JHashState *_state = (JHashState *) state;
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on an array", _state->function_name)));
}
static void
hash_scalar(void *state, char *token, JsonTokenType tokentype)
{
JHashState *_state = (JHashState *) state;
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on a scalar", _state->function_name)));
if (_state->lex->lex_level == 1)
{
_state->saved_scalar = token;
/* saved_token_type must already be set in hash_object_field_start() */
Assert(_state->saved_token_type == tokentype);
}
}
/*
* SQL function json_populate_recordset
*
* set fields in a set of records from the argument json,
* which must be an array of objects.
*
* similar to json_populate_record, but the tuple-building code
* is pushed down into the semantic action handlers so it's done
* per object in the array.
*/
Datum
jsonb_populate_recordset(PG_FUNCTION_ARGS)
{
return populate_recordset_worker(fcinfo, "jsonb_populate_recordset", true);
}
Datum
jsonb_to_recordset(PG_FUNCTION_ARGS)
{
return populate_recordset_worker(fcinfo, "jsonb_to_recordset", false);
}
Datum
json_populate_recordset(PG_FUNCTION_ARGS)
{
return populate_recordset_worker(fcinfo, "json_populate_recordset", true);
}
Datum
json_to_recordset(PG_FUNCTION_ARGS)
{
return populate_recordset_worker(fcinfo, "json_to_recordset", false);
}
static void
populate_recordset_record(PopulateRecordsetState *state, JsObject *obj)
{
HeapTupleData tuple;
HeapTupleHeader tuphead = populate_record(state->ret_tdesc,
state->my_extra,
state->rec,
state->fn_mcxt,
obj);
tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = tuphead;
tuplestore_puttuple(state->tuple_store, &tuple);
}
/*
* common worker for json_populate_recordset() and json_to_recordset()
*/
static Datum
populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname,
bool have_record_arg)
{
int json_arg_num = have_record_arg ? 1 : 0;
Oid jtype = get_fn_expr_argtype(fcinfo->flinfo, json_arg_num);
ReturnSetInfo *rsi;
MemoryContext old_cxt;
HeapTupleHeader rec;
TupleDesc tupdesc;
PopulateRecordsetState *state;
if (have_record_arg)
{
Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
if (!type_is_rowtype(argtype))
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("first argument of %s must be a row type",
funcname)));
}
rsi = (ReturnSetInfo *) fcinfo->resultinfo;
if (!rsi || !IsA(rsi, ReturnSetInfo) ||
(rsi->allowedModes & SFRM_Materialize) == 0 ||
rsi->expectedDesc == NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that "
"cannot accept a set")));
rsi->returnMode = SFRM_Materialize;
/*
* get the tupdesc from the result set info - it must be a record type
* because we already checked that arg1 is a record type, or we're in a
* to_record function which returns a setof record.
*/
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("function returning record called in context "
"that cannot accept type record")));
/* if the json is null send back an empty set */
if (PG_ARGISNULL(json_arg_num))
PG_RETURN_NULL();
if (!have_record_arg || PG_ARGISNULL(0))
rec = NULL;
else
rec = PG_GETARG_HEAPTUPLEHEADER(0);
state = palloc0(sizeof(PopulateRecordsetState));
/* make these in a sufficiently long-lived memory context */
old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
state->ret_tdesc = CreateTupleDescCopy(tupdesc);
BlessTupleDesc(state->ret_tdesc);
state->tuple_store = tuplestore_begin_heap(rsi->allowedModes &
SFRM_Materialize_Random,
false, work_mem);
MemoryContextSwitchTo(old_cxt);
state->function_name = funcname;
state->my_extra = (RecordIOData **) &fcinfo->flinfo->fn_extra;
state->rec = rec;
state->fn_mcxt = fcinfo->flinfo->fn_mcxt;
if (jtype == JSONOID)
{
text *json = PG_GETARG_TEXT_PP(json_arg_num);
JsonLexContext *lex;
JsonSemAction *sem;
sem = palloc0(sizeof(JsonSemAction));
lex = makeJsonLexContext(json, true);
sem->semstate = (void *) state;
sem->array_start = populate_recordset_array_start;
sem->array_element_start = populate_recordset_array_element_start;
sem->scalar = populate_recordset_scalar;
sem->object_field_start = populate_recordset_object_field_start;
sem->object_field_end = populate_recordset_object_field_end;
sem->object_start = populate_recordset_object_start;
sem->object_end = populate_recordset_object_end;
state->lex = lex;
pg_parse_json(lex, sem);
}
else
{
Jsonb *jb = PG_GETARG_JSONB(json_arg_num);
JsonbIterator *it;
JsonbValue v;
bool skipNested = false;
JsonbIteratorToken r;
Assert(jtype == JSONBOID);
if (JB_ROOT_IS_SCALAR(jb) || !JB_ROOT_IS_ARRAY(jb))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on a non-array",
funcname)));
it = JsonbIteratorInit(&jb->root);
while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
{
skipNested = true;
if (r == WJB_ELEM)
{
JsObject obj;
if (v.type != jbvBinary ||
!JsonContainerIsObject(v.val.binary.data))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument of %s must be an array of objects",
funcname)));
obj.is_json = false;
obj.val.jsonb_cont = v.val.binary.data;
populate_recordset_record(state, &obj);
}
}
}
rsi->setResult = state->tuple_store;
rsi->setDesc = state->ret_tdesc;
PG_RETURN_NULL();
}
static void
populate_recordset_object_start(void *state)
{
PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
int lex_level = _state->lex->lex_level;
HASHCTL ctl;
/* Reject object at top level: we must have an array at level 0 */
if (lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on an object",
_state->function_name)));
/* Nested objects require no special processing */
if (lex_level > 1)
return;
/* Object at level 1: set up a new hash table for this object */
memset(&ctl, 0, sizeof(ctl));
ctl.keysize = NAMEDATALEN;
ctl.entrysize = sizeof(JsonHashEntry);
ctl.hcxt = CurrentMemoryContext;
_state->json_hash = hash_create("json object hashtable",
100,
&ctl,
HASH_ELEM | HASH_CONTEXT);
}
static void
populate_recordset_object_end(void *state)
{
PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
JsObject obj;
/* Nested objects require no special processing */
if (_state->lex->lex_level > 1)
return;
obj.is_json = true;
obj.val.json_hash = _state->json_hash;
/* Otherwise, construct and return a tuple based on this level-1 object */
populate_recordset_record(_state, &obj);
/* Done with hash for this object */
hash_destroy(_state->json_hash);
_state->json_hash = NULL;
}
static void
populate_recordset_array_element_start(void *state, bool isnull)
{
PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
if (_state->lex->lex_level == 1 &&
_state->lex->token_type != JSON_TOKEN_OBJECT_START)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument of %s must be an array of objects",
_state->function_name)));
}
static void
populate_recordset_array_start(void *state)
{
/* nothing to do */
}
static void
populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype)
{
PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
if (_state->lex->lex_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot call %s on a scalar",
_state->function_name)));
if (_state->lex->lex_level == 2)
_state->saved_scalar = token;
}
static void
populate_recordset_object_field_start(void *state, char *fname, bool isnull)
{
PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
if (_state->lex->lex_level > 2)
return;
_state->saved_token_type = _state->lex->token_type;
if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
_state->lex->token_type == JSON_TOKEN_OBJECT_START)
{
_state->save_json_start = _state->lex->token_start;
}
else
{
_state->save_json_start = NULL;
}
}
static void
populate_recordset_object_field_end(void *state, char *fname, bool isnull)
{
PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
JsonHashEntry *hashentry;
bool found;
/*
* Ignore nested fields.
*/
if (_state->lex->lex_level > 2)
return;
/*
* Ignore field names >= NAMEDATALEN - they can't match a record field.
* (Note: without this test, the hash code would truncate the string at
* NAMEDATALEN-1, and could then match against a similarly-truncated
* record field name. That would be a reasonable behavior, but this code
* has previously insisted on exact equality, so we keep this behavior.)
*/
if (strlen(fname) >= NAMEDATALEN)
return;
hashentry = hash_search(_state->json_hash, fname, HASH_ENTER, &found);
/*
* found being true indicates a duplicate. We don't do anything about
* that, a later field with the same name overrides the earlier field.
*/
hashentry->type = _state->saved_token_type;
Assert(isnull == (hashentry->type == JSON_TOKEN_NULL));
if (_state->save_json_start != NULL)
{
int len = _state->lex->prev_token_terminator - _state->save_json_start;
char *val = palloc((len + 1) * sizeof(char));
memcpy(val, _state->save_json_start, len);
val[len] = '\0';
hashentry->val = val;
}
else
{
/* must have had a scalar instead */
hashentry->val = _state->saved_scalar;
}
}
/*
* findJsonbValueFromContainer() wrapper that sets up JsonbValue key string.
*/
static JsonbValue *
findJsonbValueFromContainerLen(JsonbContainer *container, uint32 flags,
char *key, uint32 keylen)
{
JsonbValue k;
k.type = jbvString;
k.val.string.val = key;
k.val.string.len = keylen;
return findJsonbValueFromContainer(container, flags, &k);
}
/*
* Semantic actions for json_strip_nulls.
*
* Simply repeat the input on the output unless we encounter
* a null object field. State for this is set when the field
* is started and reset when the scalar action (which must be next)
* is called.
*/
static void
sn_object_start(void *state)
{
StripnullState *_state = (StripnullState *) state;
2015-05-24 03:35:49 +02:00
appendStringInfoCharMacro(_state->strval, '{');
}
static void
sn_object_end(void *state)
{
StripnullState *_state = (StripnullState *) state;
2015-05-24 03:35:49 +02:00
appendStringInfoCharMacro(_state->strval, '}');
}
static void
sn_array_start(void *state)
{
StripnullState *_state = (StripnullState *) state;
2015-05-24 03:35:49 +02:00
appendStringInfoCharMacro(_state->strval, '[');
}
static void
sn_array_end(void *state)
{
StripnullState *_state = (StripnullState *) state;
2015-05-24 03:35:49 +02:00
appendStringInfoCharMacro(_state->strval, ']');
}
static void
2015-05-24 03:35:49 +02:00
sn_object_field_start(void *state, char *fname, bool isnull)
{
StripnullState *_state = (StripnullState *) state;
if (isnull)
{
/*
2015-05-24 03:35:49 +02:00
* The next thing must be a scalar or isnull couldn't be true, so
* there is no danger of this state being carried down into a nested
* object or array. The flag will be reset in the scalar action.
*/
_state->skip_next_null = true;
return;
}
if (_state->strval->data[_state->strval->len - 1] != '{')
appendStringInfoCharMacro(_state->strval, ',');
/*
2015-05-24 03:35:49 +02:00
* Unfortunately we don't have the quoted and escaped string any more, so
* we have to re-escape it.
*/
2015-05-24 03:35:49 +02:00
escape_json(_state->strval, fname);
appendStringInfoCharMacro(_state->strval, ':');
}
static void
2015-05-24 03:35:49 +02:00
sn_array_element_start(void *state, bool isnull)
{
StripnullState *_state = (StripnullState *) state;
if (_state->strval->data[_state->strval->len - 1] != '[')
appendStringInfoCharMacro(_state->strval, ',');
}
static void
sn_scalar(void *state, char *token, JsonTokenType tokentype)
{
StripnullState *_state = (StripnullState *) state;
if (_state->skip_next_null)
{
2015-05-24 03:35:49 +02:00
Assert(tokentype == JSON_TOKEN_NULL);
_state->skip_next_null = false;
return;
}
if (tokentype == JSON_TOKEN_STRING)
escape_json(_state->strval, token);
else
appendStringInfoString(_state->strval, token);
}
/*
* SQL function json_strip_nulls(json) -> json
*/
Datum
json_strip_nulls(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_PP(0);
2015-05-24 03:35:49 +02:00
StripnullState *state;
JsonLexContext *lex;
JsonSemAction *sem;
lex = makeJsonLexContext(json, true);
state = palloc0(sizeof(StripnullState));
sem = palloc0(sizeof(JsonSemAction));
state->strval = makeStringInfo();
state->skip_next_null = false;
state->lex = lex;
sem->semstate = (void *) state;
sem->object_start = sn_object_start;
sem->object_end = sn_object_end;
sem->array_start = sn_array_start;
sem->array_end = sn_array_end;
sem->scalar = sn_scalar;
sem->array_element_start = sn_array_element_start;
sem->object_field_start = sn_object_field_start;
pg_parse_json(lex, sem);
PG_RETURN_TEXT_P(cstring_to_text_with_len(state->strval->data,
state->strval->len));
}
/*
* SQL function jsonb_strip_nulls(jsonb) -> jsonb
*/
Datum
jsonb_strip_nulls(PG_FUNCTION_ARGS)
{
2015-05-24 03:35:49 +02:00
Jsonb *jb = PG_GETARG_JSONB(0);
JsonbIterator *it;
JsonbParseState *parseState = NULL;
JsonbValue *res = NULL;
2015-05-24 03:35:49 +02:00
JsonbValue v,
k;
JsonbIteratorToken type;
2015-05-24 03:35:49 +02:00
bool last_was_key = false;
if (JB_ROOT_IS_SCALAR(jb))
PG_RETURN_POINTER(jb);
it = JsonbIteratorInit(&jb->root);
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
2015-05-24 03:35:49 +02:00
Assert(!(type == WJB_KEY && last_was_key));
if (type == WJB_KEY)
{
/* stash the key until we know if it has a null value */
k = v;
last_was_key = true;
continue;
}
if (last_was_key)
{
/* if the last element was a key this one can't be */
last_was_key = false;
/* skip this field if value is null */
if (type == WJB_VALUE && v.type == jbvNull)
continue;
/* otherwise, do a delayed push of the key */
(void) pushJsonbValue(&parseState, WJB_KEY, &k);
}
if (type == WJB_VALUE || type == WJB_ELEM)
res = pushJsonbValue(&parseState, type, &v);
else
res = pushJsonbValue(&parseState, type, NULL);
}
Assert(res != NULL);
PG_RETURN_POINTER(JsonbValueToJsonb(res));
}
/*
* Add values from the jsonb to the parse state.
*
* If the parse state container is an object, the jsonb is pushed as
* a value, not a key.
*
* This needs to be done using an iterator because pushJsonbValue doesn't
* like getting jbvBinary values, so we can't just push jb as a whole.
*/
static void
2015-05-24 03:35:49 +02:00
addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb)
{
JsonbIterator *it;
2015-05-24 03:35:49 +02:00
JsonbValue *o = &(*jbps)->contVal;
JsonbValue v;
JsonbIteratorToken type;
it = JsonbIteratorInit(&jb->root);
Assert(o->type == jbvArray || o->type == jbvObject);
if (JB_ROOT_IS_SCALAR(jb))
{
2015-05-24 03:35:49 +02:00
(void) JsonbIteratorNext(&it, &v, false); /* skip array header */
(void) JsonbIteratorNext(&it, &v, false); /* fetch scalar value */
switch (o->type)
{
case jbvArray:
(void) pushJsonbValue(jbps, WJB_ELEM, &v);
break;
case jbvObject:
(void) pushJsonbValue(jbps, WJB_VALUE, &v);
break;
default:
elog(ERROR, "unexpected parent of nested structure");
}
}
else
{
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
if (type == WJB_KEY || type == WJB_VALUE || type == WJB_ELEM)
(void) pushJsonbValue(jbps, type, &v);
else
(void) pushJsonbValue(jbps, type, NULL);
}
}
}
/*
* SQL function jsonb_pretty (jsonb)
*
* Pretty-printed text for the jsonb
*/
Datum
jsonb_pretty(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
StringInfo str = makeStringInfo();
JsonbToCStringIndent(str, &jb->root, VARSIZE(jb));
PG_RETURN_TEXT_P(cstring_to_text_with_len(str->data, str->len));
}
/*
* SQL function jsonb_concat (jsonb, jsonb)
*
* function for || operator
*/
Datum
jsonb_concat(PG_FUNCTION_ARGS)
{
Jsonb *jb1 = PG_GETARG_JSONB(0);
Jsonb *jb2 = PG_GETARG_JSONB(1);
JsonbParseState *state = NULL;
JsonbValue *res;
2015-05-24 03:35:49 +02:00
JsonbIterator *it1,
*it2;
/*
2016-06-10 00:02:36 +02:00
* If one of the jsonb is empty, just return the other if it's not scalar
* and both are of the same kind. If it's a scalar or they are of
* different kinds we need to perform the concatenation even if one is
* empty.
*/
if (JB_ROOT_IS_OBJECT(jb1) == JB_ROOT_IS_OBJECT(jb2))
{
if (JB_ROOT_COUNT(jb1) == 0 && !JB_ROOT_IS_SCALAR(jb2))
PG_RETURN_JSONB(jb2);
else if (JB_ROOT_COUNT(jb2) == 0 && !JB_ROOT_IS_SCALAR(jb1))
PG_RETURN_JSONB(jb1);
}
it1 = JsonbIteratorInit(&jb1->root);
it2 = JsonbIteratorInit(&jb2->root);
res = IteratorConcat(&it1, &it2, &state);
Assert(res != NULL);
PG_RETURN_JSONB(JsonbValueToJsonb(res));
}
/*
* SQL function jsonb_delete (jsonb, text)
*
* return a copy of the jsonb with the indicated item
* removed.
*/
Datum
jsonb_delete(PG_FUNCTION_ARGS)
{
Jsonb *in = PG_GETARG_JSONB(0);
text *key = PG_GETARG_TEXT_PP(1);
char *keyptr = VARDATA_ANY(key);
int keylen = VARSIZE_ANY_EXHDR(key);
JsonbParseState *state = NULL;
JsonbIterator *it;
JsonbValue v,
*res = NULL;
bool skipNested = false;
JsonbIteratorToken r;
if (JB_ROOT_IS_SCALAR(in))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot delete from scalar")));
if (JB_ROOT_COUNT(in) == 0)
PG_RETURN_JSONB(in);
it = JsonbIteratorInit(&in->root);
while ((r = JsonbIteratorNext(&it, &v, skipNested)) != 0)
{
skipNested = true;
if ((r == WJB_ELEM || r == WJB_KEY) &&
(v.type == jbvString && keylen == v.val.string.len &&
memcmp(keyptr, v.val.string.val, keylen) == 0))
{
/* skip corresponding value as well */
if (r == WJB_KEY)
JsonbIteratorNext(&it, &v, true);
continue;
}
res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
}
Assert(res != NULL);
PG_RETURN_JSONB(JsonbValueToJsonb(res));
}
/*
* SQL function jsonb_delete (jsonb, variadic text[])
*
* return a copy of the jsonb with the indicated items
* removed.
*/
Datum
jsonb_delete_array(PG_FUNCTION_ARGS)
{
Jsonb *in = PG_GETARG_JSONB(0);
ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1);
Datum *keys_elems;
bool *keys_nulls;
int keys_len;
JsonbParseState *state = NULL;
JsonbIterator *it;
JsonbValue v,
*res = NULL;
bool skipNested = false;
JsonbIteratorToken r;
if (ARR_NDIM(keys) > 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("wrong number of array subscripts")));
if (JB_ROOT_IS_SCALAR(in))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot delete from scalar")));
if (JB_ROOT_COUNT(in) == 0)
PG_RETURN_JSONB(in);
deconstruct_array(keys, TEXTOID, -1, false, 'i',
&keys_elems, &keys_nulls, &keys_len);
if (keys_len == 0)
PG_RETURN_JSONB(in);
it = JsonbIteratorInit(&in->root);
while ((r = JsonbIteratorNext(&it, &v, skipNested)) != 0)
{
skipNested = true;
if ((r == WJB_ELEM || r == WJB_KEY) && v.type == jbvString)
{
int i;
bool found = false;
for (i = 0; i < keys_len; i++)
{
char *keyptr;
int keylen;
if (keys_nulls[i])
continue;
keyptr = VARDATA_ANY(keys_elems[i]);
keylen = VARSIZE_ANY_EXHDR(keys_elems[i]);
if (keylen == v.val.string.len &&
memcmp(keyptr, v.val.string.val, keylen) == 0)
{
found = true;
break;
}
}
if (found)
{
/* skip corresponding value as well */
if (r == WJB_KEY)
JsonbIteratorNext(&it, &v, true);
continue;
}
}
res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
}
Assert(res != NULL);
PG_RETURN_JSONB(JsonbValueToJsonb(res));
}
/*
* SQL function jsonb_delete (jsonb, int)
*
* return a copy of the jsonb with the indicated item
* removed. Negative int means count back from the
* end of the items.
*/
Datum
jsonb_delete_idx(PG_FUNCTION_ARGS)
{
Jsonb *in = PG_GETARG_JSONB(0);
int idx = PG_GETARG_INT32(1);
JsonbParseState *state = NULL;
JsonbIterator *it;
uint32 i = 0,
n;
JsonbValue v,
*res = NULL;
JsonbIteratorToken r;
if (JB_ROOT_IS_SCALAR(in))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot delete from scalar")));
if (JB_ROOT_IS_OBJECT(in))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2015-12-11 04:05:27 +01:00
errmsg("cannot delete from object using integer index")));
if (JB_ROOT_COUNT(in) == 0)
PG_RETURN_JSONB(in);
it = JsonbIteratorInit(&in->root);
r = JsonbIteratorNext(&it, &v, false);
2016-06-10 00:02:36 +02:00
Assert(r == WJB_BEGIN_ARRAY);
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
n = v.val.array.nElems;
if (idx < 0)
{
if (-idx > n)
idx = n;
else
idx = n + idx;
}
if (idx >= n)
PG_RETURN_JSONB(in);
pushJsonbValue(&state, r, NULL);
while ((r = JsonbIteratorNext(&it, &v, true)) != 0)
{
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
if (r == WJB_ELEM)
{
if (i++ == idx)
continue;
}
res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
}
2015-05-24 03:35:49 +02:00
Assert(res != NULL);
PG_RETURN_JSONB(JsonbValueToJsonb(res));
}
/*
* SQL function jsonb_set(jsonb, text[], jsonb, boolean)
*
*/
Datum
jsonb_set(PG_FUNCTION_ARGS)
{
Jsonb *in = PG_GETARG_JSONB(0);
ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
Jsonb *newval = PG_GETARG_JSONB(2);
bool create = PG_GETARG_BOOL(3);
JsonbValue *res = NULL;
Datum *path_elems;
bool *path_nulls;
int path_len;
JsonbIterator *it;
JsonbParseState *st = NULL;
if (ARR_NDIM(path) > 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("wrong number of array subscripts")));
if (JB_ROOT_IS_SCALAR(in))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot set path in scalar")));
if (JB_ROOT_COUNT(in) == 0 && !create)
PG_RETURN_JSONB(in);
deconstruct_array(path, TEXTOID, -1, false, 'i',
&path_elems, &path_nulls, &path_len);
if (path_len == 0)
PG_RETURN_JSONB(in);
it = JsonbIteratorInit(&in->root);
res = setPath(&it, path_elems, path_nulls, path_len, &st,
0, newval, create ? JB_PATH_CREATE : JB_PATH_REPLACE);
2015-05-24 03:35:49 +02:00
Assert(res != NULL);
PG_RETURN_JSONB(JsonbValueToJsonb(res));
}
/*
* SQL function jsonb_delete_path(jsonb, text[])
*/
Datum
jsonb_delete_path(PG_FUNCTION_ARGS)
{
Jsonb *in = PG_GETARG_JSONB(0);
ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
JsonbValue *res = NULL;
Datum *path_elems;
bool *path_nulls;
int path_len;
JsonbIterator *it;
JsonbParseState *st = NULL;
if (ARR_NDIM(path) > 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("wrong number of array subscripts")));
if (JB_ROOT_IS_SCALAR(in))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot delete path in scalar")));
if (JB_ROOT_COUNT(in) == 0)
PG_RETURN_JSONB(in);
deconstruct_array(path, TEXTOID, -1, false, 'i',
&path_elems, &path_nulls, &path_len);
if (path_len == 0)
PG_RETURN_JSONB(in);
it = JsonbIteratorInit(&in->root);
res = setPath(&it, path_elems, path_nulls, path_len, &st,
0, NULL, JB_PATH_DELETE);
Assert(res != NULL);
PG_RETURN_JSONB(JsonbValueToJsonb(res));
}
/*
* SQL function jsonb_insert(jsonb, text[], jsonb, boolean)
*
*/
Datum
jsonb_insert(PG_FUNCTION_ARGS)
{
Jsonb *in = PG_GETARG_JSONB(0);
ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
Jsonb *newval = PG_GETARG_JSONB(2);
bool after = PG_GETARG_BOOL(3);
JsonbValue *res = NULL;
Datum *path_elems;
bool *path_nulls;
int path_len;
JsonbIterator *it;
JsonbParseState *st = NULL;
if (ARR_NDIM(path) > 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("wrong number of array subscripts")));
if (JB_ROOT_IS_SCALAR(in))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot set path in scalar")));
deconstruct_array(path, TEXTOID, -1, false, 'i',
&path_elems, &path_nulls, &path_len);
if (path_len == 0)
PG_RETURN_JSONB(in);
it = JsonbIteratorInit(&in->root);
res = setPath(&it, path_elems, path_nulls, path_len, &st, 0, newval,
after ? JB_PATH_INSERT_AFTER : JB_PATH_INSERT_BEFORE);
2015-05-24 03:35:49 +02:00
Assert(res != NULL);
PG_RETURN_JSONB(JsonbValueToJsonb(res));
}
/*
* Iterate over all jsonb objects and merge them into one.
* The logic of this function copied from the same hstore function,
* except the case, when it1 & it2 represents jbvObject.
* In that case we just append the content of it2 to it1 without any
* verifications.
*/
static JsonbValue *
IteratorConcat(JsonbIterator **it1, JsonbIterator **it2,
JsonbParseState **state)
{
JsonbValue v1,
v2,
*res = NULL;
JsonbIteratorToken r1,
r2,
rk1,
rk2;
r1 = rk1 = JsonbIteratorNext(it1, &v1, false);
r2 = rk2 = JsonbIteratorNext(it2, &v2, false);
/*
* Both elements are objects.
*/
if (rk1 == WJB_BEGIN_OBJECT && rk2 == WJB_BEGIN_OBJECT)
{
/*
* Append the all tokens from v1 to res, except last WJB_END_OBJECT
* (because res will not be finished yet).
*/
pushJsonbValue(state, r1, NULL);
while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_OBJECT)
pushJsonbValue(state, r1, &v1);
/*
* Append the all tokens from v2 to res, include last WJB_END_OBJECT
* (the concatenation will be completed).
*/
while ((r2 = JsonbIteratorNext(it2, &v2, true)) != 0)
res = pushJsonbValue(state, r2, r2 != WJB_END_OBJECT ? &v2 : NULL);
}
/*
* Both elements are arrays (either can be scalar).
*/
else if (rk1 == WJB_BEGIN_ARRAY && rk2 == WJB_BEGIN_ARRAY)
{
pushJsonbValue(state, r1, NULL);
while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_ARRAY)
{
Assert(r1 == WJB_ELEM);
pushJsonbValue(state, r1, &v1);
}
while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_END_ARRAY)
{
Assert(r2 == WJB_ELEM);
pushJsonbValue(state, WJB_ELEM, &v2);
}
res = pushJsonbValue(state, WJB_END_ARRAY, NULL /* signal to sort */ );
}
/* have we got array || object or object || array? */
else if (((rk1 == WJB_BEGIN_ARRAY && !(*it1)->isScalar) && rk2 == WJB_BEGIN_OBJECT) ||
(rk1 == WJB_BEGIN_OBJECT && (rk2 == WJB_BEGIN_ARRAY && !(*it2)->isScalar)))
{
JsonbIterator **it_array = rk1 == WJB_BEGIN_ARRAY ? it1 : it2;
JsonbIterator **it_object = rk1 == WJB_BEGIN_OBJECT ? it1 : it2;
bool prepend = (rk1 == WJB_BEGIN_OBJECT);
pushJsonbValue(state, WJB_BEGIN_ARRAY, NULL);
if (prepend)
{
pushJsonbValue(state, WJB_BEGIN_OBJECT, NULL);
while ((r1 = JsonbIteratorNext(it_object, &v1, true)) != 0)
pushJsonbValue(state, r1, r1 != WJB_END_OBJECT ? &v1 : NULL);
while ((r2 = JsonbIteratorNext(it_array, &v2, true)) != 0)
res = pushJsonbValue(state, r2, r2 != WJB_END_ARRAY ? &v2 : NULL);
}
else
{
while ((r1 = JsonbIteratorNext(it_array, &v1, true)) != WJB_END_ARRAY)
pushJsonbValue(state, r1, &v1);
pushJsonbValue(state, WJB_BEGIN_OBJECT, NULL);
while ((r2 = JsonbIteratorNext(it_object, &v2, true)) != 0)
pushJsonbValue(state, r2, r2 != WJB_END_OBJECT ? &v2 : NULL);
res = pushJsonbValue(state, WJB_END_ARRAY, NULL);
}
}
else
{
/*
* This must be scalar || object or object || scalar, as that's all
* that's left. Both of these make no sense, so error out.
*/
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid concatenation of jsonb objects")));
}
return res;
}
/*
* Do most of the heavy work for jsonb_set/jsonb_insert
*
* If JB_PATH_DELETE bit is set in op_type, the element is to be removed.
*
* If any bit mentioned in JB_PATH_CREATE_OR_INSERT is set in op_type,
* we create the new value if the key or array index does not exist.
*
* Bits JB_PATH_INSERT_BEFORE and JB_PATH_INSERT_AFTER in op_type
* behave as JB_PATH_CREATE if new value is inserted in JsonbObject.
*
* All path elements before the last must already exist
* whatever bits in op_type are set, or nothing is done.
*/
static JsonbValue *
setPath(JsonbIterator **it, Datum *path_elems,
bool *path_nulls, int path_len,
JsonbParseState **st, int level, Jsonb *newval, int op_type)
{
JsonbValue v;
JsonbIteratorToken r;
JsonbValue *res;
check_stack_depth();
if (path_nulls[level])
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("path element at position %d is null",
level + 1)));
r = JsonbIteratorNext(it, &v, false);
switch (r)
{
case WJB_BEGIN_ARRAY:
(void) pushJsonbValue(st, r, NULL);
setPathArray(it, path_elems, path_nulls, path_len, st, level,
newval, v.val.array.nElems, op_type);
r = JsonbIteratorNext(it, &v, false);
Assert(r == WJB_END_ARRAY);
res = pushJsonbValue(st, r, NULL);
break;
case WJB_BEGIN_OBJECT:
(void) pushJsonbValue(st, r, NULL);
setPathObject(it, path_elems, path_nulls, path_len, st, level,
newval, v.val.object.nPairs, op_type);
r = JsonbIteratorNext(it, &v, true);
Assert(r == WJB_END_OBJECT);
res = pushJsonbValue(st, r, NULL);
break;
case WJB_ELEM:
case WJB_VALUE:
res = pushJsonbValue(st, r, &v);
break;
default:
elog(ERROR, "unrecognized iterator result: %d", (int) r);
res = NULL; /* keep compiler quiet */
break;
}
return res;
}
/*
* Object walker for setPath
*/
static void
setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
int path_len, JsonbParseState **st, int level,
Jsonb *newval, uint32 npairs, int op_type)
{
JsonbValue v;
int i;
JsonbValue k;
bool done = false;
if (level >= path_len || path_nulls[level])
done = true;
/* empty object is a special case for create */
if ((npairs == 0) && (op_type & JB_PATH_CREATE_OR_INSERT) &&
(level == path_len - 1))
{
JsonbValue newkey;
newkey.type = jbvString;
newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[level]);
newkey.val.string.val = VARDATA_ANY(path_elems[level]);
(void) pushJsonbValue(st, WJB_KEY, &newkey);
addJsonbToParseState(st, newval);
}
for (i = 0; i < npairs; i++)
{
JsonbIteratorToken r = JsonbIteratorNext(it, &k, true);
2015-05-24 03:35:49 +02:00
Assert(r == WJB_KEY);
if (!done &&
k.val.string.len == VARSIZE_ANY_EXHDR(path_elems[level]) &&
memcmp(k.val.string.val, VARDATA_ANY(path_elems[level]),
k.val.string.len) == 0)
{
if (level == path_len - 1)
{
/*
2016-06-10 00:02:36 +02:00
* called from jsonb_insert(), it forbids redefining an
2016-07-26 04:07:53 +02:00
* existing value
*/
if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot replace existing key"),
errhint("Try using the function jsonb_set "
"to replace key value.")));
2016-06-10 00:02:36 +02:00
r = JsonbIteratorNext(it, &v, true); /* skip value */
if (!(op_type & JB_PATH_DELETE))
{
(void) pushJsonbValue(st, WJB_KEY, &k);
addJsonbToParseState(st, newval);
}
done = true;
}
else
{
(void) pushJsonbValue(st, r, &k);
setPath(it, path_elems, path_nulls, path_len,
st, level + 1, newval, op_type);
}
}
else
{
if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done &&
level == path_len - 1 && i == npairs - 1)
{
JsonbValue newkey;
newkey.type = jbvString;
newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[level]);
newkey.val.string.val = VARDATA_ANY(path_elems[level]);
(void) pushJsonbValue(st, WJB_KEY, &newkey);
addJsonbToParseState(st, newval);
}
(void) pushJsonbValue(st, r, &k);
r = JsonbIteratorNext(it, &v, false);
(void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
{
2015-05-24 03:35:49 +02:00
int walking_level = 1;
while (walking_level != 0)
{
r = JsonbIteratorNext(it, &v, false);
if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
++walking_level;
if (r == WJB_END_ARRAY || r == WJB_END_OBJECT)
--walking_level;
(void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
}
}
}
}
}
/*
* Array walker for setPath
*/
static void
setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
int path_len, JsonbParseState **st, int level,
Jsonb *newval, uint32 nelems, int op_type)
{
JsonbValue v;
int idx,
i;
bool done = false;
/* pick correct index */
if (level < path_len && !path_nulls[level])
{
char *c = TextDatumGetCString(path_elems[level]);
long lindex;
char *badp;
errno = 0;
lindex = strtol(c, &badp, 10);
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
if (errno != 0 || badp == c || *badp != '\0' || lindex > INT_MAX ||
lindex < INT_MIN)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("path element at position %d is not an integer: \"%s\"",
level + 1, c)));
idx = lindex;
}
else
idx = nelems;
if (idx < 0)
{
if (-idx > nelems)
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
idx = INT_MIN;
else
idx = nelems + idx;
}
if (idx > 0 && idx > nelems)
idx = nelems;
/*
Support JSON negative array subscripts everywhere Previously, there was an inconsistency across json/jsonb operators that operate on datums containing JSON arrays -- only some operators supported negative array count-from-the-end subscripting. Specifically, only a new-to-9.5 jsonb deletion operator had support (the new "jsonb - integer" operator). This inconsistency seemed likely to be counter-intuitive to users. To fix, allow all places where the user can supply an integer subscript to accept a negative subscript value, including path-orientated operators and functions, as well as other extraction operators. This will need to be called out as an incompatibility in the 9.5 release notes, since it's possible that users are relying on certain established extraction operators changed here yielding NULL in the event of a negative subscript. For the json type, this requires adding a way of cheaply getting the total JSON array element count ahead of time when parsing arrays with a negative subscript involved, necessitating an ad-hoc lex and parse. This is followed by a "conversion" from a negative subscript to its equivalent positive-wise value using the count. From there on, it's as if a positive-wise value was originally provided. Note that there is still a minor inconsistency here across jsonb deletion operators. Unlike the aforementioned new "-" deletion operator that accepts an integer on its right hand side, the new "#-" path orientated deletion variant does not throw an error when it appears like an array subscript (input that could be recognized by as an integer literal) is being used on an object, which is wrong-headed. The reason for not being stricter is that it could be the case that an object pair happens to have a key value that looks like an integer; in general, these two possibilities are impossible to differentiate with rhs path text[] argument elements. However, we still don't allow the "#-" path-orientated deletion operator to perform array-style subscripting. Rather, we just return the original left operand value in the event of a negative subscript (which seems analogous to how the established "jsonb/json #> text[]" path-orientated operator may yield NULL in the event of an invalid subscript). In passing, make SetArrayPath() stricter about not accepting cases where there is trailing non-numeric garbage bytes rather than a clean NUL byte. This means, for example, that strings like "10e10" are now not accepted as an array subscript of 10 by some new-to-9.5 path-orientated jsonb operators (e.g. the new #- operator). Finally, remove dead code for jsonb subscript deletion; arguably, this should have been done in commit b81c7b409. Peter Geoghegan and Andrew Dunstan
2015-07-18 02:56:13 +02:00
* if we're creating, and idx == INT_MIN, we prepend the new value to the
* array also if the array is empty - in which case we don't really care
* what the idx value is
*/
if ((idx == INT_MIN || nelems == 0) && (level == path_len - 1) &&
(op_type & JB_PATH_CREATE_OR_INSERT))
{
Assert(newval != NULL);
addJsonbToParseState(st, newval);
done = true;
}
/* iterate over the array elements */
for (i = 0; i < nelems; i++)
{
JsonbIteratorToken r;
if (i == idx && level < path_len)
{
if (level == path_len - 1)
{
2015-05-24 03:35:49 +02:00
r = JsonbIteratorNext(it, &v, true); /* skip */
if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_CREATE))
addJsonbToParseState(st, newval);
/*
* We should keep current value only in case of
2016-06-10 00:02:36 +02:00
* JB_PATH_INSERT_BEFORE or JB_PATH_INSERT_AFTER because
* otherwise it should be deleted or replaced
*/
if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_INSERT_BEFORE))
(void) pushJsonbValue(st, r, &v);
if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_REPLACE))
addJsonbToParseState(st, newval);
done = true;
}
else
(void) setPath(it, path_elems, path_nulls, path_len,
st, level + 1, newval, op_type);
}
else
{
r = JsonbIteratorNext(it, &v, false);
(void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
{
2015-05-24 03:35:49 +02:00
int walking_level = 1;
while (walking_level != 0)
{
r = JsonbIteratorNext(it, &v, false);
if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
++walking_level;
if (r == WJB_END_ARRAY || r == WJB_END_OBJECT)
--walking_level;
(void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
}
}
if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done &&
level == path_len - 1 && i == nelems - 1)
{
addJsonbToParseState(st, newval);
}
}
}
}
/*
* Iterate over jsonb string values or elements, and pass them together with an
* iteration state to a specified JsonIterateStringValuesAction.
*/
void
iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
{
JsonbIterator *it;
JsonbValue v;
JsonbIteratorToken type;
it = JsonbIteratorInit(&jb->root);
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
{
action(state, v.val.string.val, v.val.string.len);
}
}
}
/*
* Iterate over json string values or elements, and pass them together with an
* iteration state to a specified JsonIterateStringValuesAction.
*/
void
iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action)
{
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
IterateJsonStringValuesState *state = palloc0(sizeof(IterateJsonStringValuesState));
state->lex = lex;
state->action = action;
state->action_state = action_state;
sem->semstate = (void *) state;
sem->scalar = iterate_string_values_scalar;
pg_parse_json(lex, sem);
}
/*
* An auxiliary function for iterate_json_string_values to invoke a specified
* JsonIterateStringValuesAction.
*/
static void
iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
{
IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
if (tokentype == JSON_TOKEN_STRING)
(*_state->action) (_state->action_state, token, strlen(token));
}
/*
* Iterate over a jsonb, and apply a specified JsonTransformStringValuesAction
* to every string value or element. Any necessary context for a
* JsonTransformStringValuesAction can be passed in the action_state variable.
* Function returns a copy of an original jsonb object with transformed values.
*/
Jsonb *
transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
JsonTransformStringValuesAction transform_action)
{
JsonbIterator *it;
JsonbValue v,
*res = NULL;
JsonbIteratorToken type;
JsonbParseState *st = NULL;
text *out;
bool is_scalar = false;
it = JsonbIteratorInit(&jsonb->root);
is_scalar = it->isScalar;
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
{
out = transform_action(action_state, v.val.string.val, v.val.string.len);
v.val.string.val = VARDATA_ANY(out);
v.val.string.len = VARSIZE_ANY_EXHDR(out);
res = pushJsonbValue(&st, type, type < WJB_BEGIN_ARRAY ? &v : NULL);
}
else
{
res = pushJsonbValue(&st, type, (type == WJB_KEY ||
type == WJB_VALUE ||
type == WJB_ELEM) ? &v : NULL);
}
}
if (res->type == jbvArray)
res->val.array.rawScalar = is_scalar;
return JsonbValueToJsonb(res);
}
/*
* Iterate over a json, and apply a specified JsonTransformStringValuesAction
* to every string value or element. Any necessary context for a
* JsonTransformStringValuesAction can be passed in the action_state variable.
* Function returns a StringInfo, which is a copy of an original json with
* transformed values.
*/
text *
transform_json_string_values(text *json, void *action_state,
JsonTransformStringValuesAction transform_action)
{
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
TransformJsonStringValuesState *state = palloc0(sizeof(TransformJsonStringValuesState));
state->lex = lex;
state->strval = makeStringInfo();
state->action = transform_action;
state->action_state = action_state;
sem->semstate = (void *) state;
sem->scalar = transform_string_values_scalar;
sem->object_start = transform_string_values_object_start;
sem->object_end = transform_string_values_object_end;
sem->array_start = transform_string_values_array_start;
sem->array_end = transform_string_values_array_end;
sem->scalar = transform_string_values_scalar;
sem->array_element_start = transform_string_values_array_element_start;
sem->object_field_start = transform_string_values_object_field_start;
pg_parse_json(lex, sem);
return cstring_to_text_with_len(state->strval->data, state->strval->len);
}
/*
* Set of auxiliary functions for transform_json_string_values to invoke a
* specified JsonTransformStringValuesAction for all values and left everything
* else untouched.
*/
static void
transform_string_values_object_start(void *state)
{
TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
appendStringInfoCharMacro(_state->strval, '{');
}
static void
transform_string_values_object_end(void *state)
{
TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
appendStringInfoCharMacro(_state->strval, '}');
}
static void
transform_string_values_array_start(void *state)
{
TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
appendStringInfoCharMacro(_state->strval, '[');
}
static void
transform_string_values_array_end(void *state)
{
TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
appendStringInfoCharMacro(_state->strval, ']');
}
static void
transform_string_values_object_field_start(void *state, char *fname, bool isnull)
{
TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
if (_state->strval->data[_state->strval->len - 1] != '{')
appendStringInfoCharMacro(_state->strval, ',');
/*
* Unfortunately we don't have the quoted and escaped string any more, so
* we have to re-escape it.
*/
escape_json(_state->strval, fname);
appendStringInfoCharMacro(_state->strval, ':');
}
static void
transform_string_values_array_element_start(void *state, bool isnull)
{
TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
if (_state->strval->data[_state->strval->len - 1] != '[')
appendStringInfoCharMacro(_state->strval, ',');
}
static void
transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
{
TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
if (tokentype == JSON_TOKEN_STRING)
{
text *out = (*_state->action) (_state->action_state, token, strlen(token));
escape_json(_state->strval, text_to_cstring(out));
}
else
appendStringInfoString(_state->strval, token);
}