/*------------------------------------------------------------------------- * * jsonb_gin.c * GIN support functions for jsonb * * Copyright (c) 2014-2016, PostgreSQL Global Development Group * * * IDENTIFICATION * src/backend/utils/adt/jsonb_gin.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/gin.h" #include "access/hash.h" #include "access/stratnum.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "utils/builtins.h" #include "utils/jsonb.h" typedef struct PathHashStack { uint32 hash; struct PathHashStack *parent; } PathHashStack; static Datum make_text_key(char flag, const char *str, int len); static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key); /* * * jsonb_ops GIN opclass support functions * */ Datum gin_compare_jsonb(PG_FUNCTION_ARGS) { text *arg1 = PG_GETARG_TEXT_PP(0); text *arg2 = PG_GETARG_TEXT_PP(1); int32 result; char *a1p, *a2p; int len1, len2; a1p = VARDATA_ANY(arg1); a2p = VARDATA_ANY(arg2); len1 = VARSIZE_ANY_EXHDR(arg1); len2 = VARSIZE_ANY_EXHDR(arg2); /* Compare text as bttextcmp does, but always using C collation */ result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); PG_RETURN_INT32(result); } Datum gin_extract_jsonb(PG_FUNCTION_ARGS) { Jsonb *jb = (Jsonb *) PG_GETARG_JSONB(0); int32 *nentries = (int32 *) PG_GETARG_POINTER(1); int total = 2 * JB_ROOT_COUNT(jb); JsonbIterator *it; JsonbValue v; JsonbIteratorToken r; int i = 0; Datum *entries; /* If the root level is empty, we certainly have no keys */ if (total == 0) { *nentries = 0; PG_RETURN_POINTER(NULL); } /* Otherwise, use 2 * root count as initial estimate of result size */ entries = (Datum *) palloc(sizeof(Datum) * total); it = JsonbIteratorInit(&jb->root); while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) { /* Since we recurse into the object, we might need more space */ if (i >= total) { total *= 2; entries = (Datum *) repalloc(entries, sizeof(Datum) * total); } switch (r) { case WJB_KEY: entries[i++] = make_scalar_key(&v, true); break; case WJB_ELEM: /* Pretend string array elements are keys, see jsonb.h */ entries[i++] = make_scalar_key(&v, (v.type == jbvString)); break; case WJB_VALUE: entries[i++] = make_scalar_key(&v, false); break; default: /* we can ignore structural items */ break; } } *nentries = i; PG_RETURN_POINTER(entries); } Datum gin_extract_jsonb_query(PG_FUNCTION_ARGS) { int32 *nentries = (int32 *) PG_GETARG_POINTER(1); StrategyNumber strategy = PG_GETARG_UINT16(2); int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); Datum *entries; if (strategy == JsonbContainsStrategyNumber) { /* Query is a jsonb, so just apply gin_extract_jsonb... */ entries = (Datum *) DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb, PG_GETARG_DATUM(0), PointerGetDatum(nentries))); /* ...although "contains {}" requires a full index scan */ if (*nentries == 0) *searchMode = GIN_SEARCH_MODE_ALL; } else if (strategy == JsonbExistsStrategyNumber) { /* Query is a text string, which we treat as a key */ text *query = PG_GETARG_TEXT_PP(0); *nentries = 1; entries = (Datum *) palloc(sizeof(Datum)); entries[0] = make_text_key(JGINFLAG_KEY, VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query)); } else if (strategy == JsonbExistsAnyStrategyNumber || strategy == JsonbExistsAllStrategyNumber) { /* Query is a text array; each element is treated as a key */ ArrayType *query = PG_GETARG_ARRAYTYPE_P(0); Datum *key_datums; bool *key_nulls; int key_count; int i, j; deconstruct_array(query, TEXTOID, -1, false, 'i', &key_datums, &key_nulls, &key_count); entries = (Datum *) palloc(sizeof(Datum) * key_count); for (i = 0, j = 0; i < key_count; i++) { /* Nulls in the array are ignored */ if (key_nulls[i]) continue; entries[j++] = make_text_key(JGINFLAG_KEY, VARDATA_ANY(key_datums[i]), VARSIZE_ANY_EXHDR(key_datums[i])); } *nentries = j; /* ExistsAll with no keys should match everything */ if (j == 0 && strategy == JsonbExistsAllStrategyNumber) *searchMode = GIN_SEARCH_MODE_ALL; } else { elog(ERROR, "unrecognized strategy number: %d", strategy); entries = NULL; /* keep compiler quiet */ } PG_RETURN_POINTER(entries); } Datum gin_consistent_jsonb(PG_FUNCTION_ARGS) { bool *check = (bool *) PG_GETARG_POINTER(0); StrategyNumber strategy = PG_GETARG_UINT16(1); /* Jsonb *query = PG_GETARG_JSONB(2); */ int32 nkeys = PG_GETARG_INT32(3); /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ bool *recheck = (bool *) PG_GETARG_POINTER(5); bool res = true; int32 i; if (strategy == JsonbContainsStrategyNumber) { /* * We must always recheck, since we can't tell from the index whether * the positions of the matched items match the structure of the query * object. (Even if we could, we'd also have to worry about hashed * keys and the index's failure to distinguish keys from string array * elements.) However, the tuple certainly doesn't match unless it * contains all the query keys. */ *recheck = true; for (i = 0; i < nkeys; i++) { if (!check[i]) { res = false; break; } } } else if (strategy == JsonbExistsStrategyNumber) { /* * Although the key is certainly present in the index, we must recheck * because (1) the key might be hashed, and (2) the index match might * be for a key that's not at top level of the JSON object. For (1), * we could look at the query key to see if it's hashed and not * recheck if not, but the index lacks enough info to tell about (2). */ *recheck = true; res = true; } else if (strategy == JsonbExistsAnyStrategyNumber) { /* As for plain exists, we must recheck */ *recheck = true; res = true; } else if (strategy == JsonbExistsAllStrategyNumber) { /* As for plain exists, we must recheck */ *recheck = true; /* ... but unless all the keys are present, we can say "false" */ for (i = 0; i < nkeys; i++) { if (!check[i]) { res = false; break; } } } else elog(ERROR, "unrecognized strategy number: %d", strategy); PG_RETURN_BOOL(res); } Datum gin_triconsistent_jsonb(PG_FUNCTION_ARGS) { GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); StrategyNumber strategy = PG_GETARG_UINT16(1); /* Jsonb *query = PG_GETARG_JSONB(2); */ int32 nkeys = PG_GETARG_INT32(3); /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ GinTernaryValue res = GIN_MAYBE; int32 i; /* * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this * corresponds to always forcing recheck in the regular consistent * function, for the reasons listed there. */ if (strategy == JsonbContainsStrategyNumber || strategy == JsonbExistsAllStrategyNumber) { /* All extracted keys must be present */ for (i = 0; i < nkeys; i++) { if (check[i] == GIN_FALSE) { res = GIN_FALSE; break; } } } else if (strategy == JsonbExistsStrategyNumber || strategy == JsonbExistsAnyStrategyNumber) { /* At least one extracted key must be present */ res = GIN_FALSE; for (i = 0; i < nkeys; i++) { if (check[i] == GIN_TRUE || check[i] == GIN_MAYBE) { res = GIN_MAYBE; break; } } } else elog(ERROR, "unrecognized strategy number: %d", strategy); PG_RETURN_GIN_TERNARY_VALUE(res); } /* * * jsonb_path_ops GIN opclass support functions * * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON * value; but the JSON key(s) leading to each value are also included in its * hash computation. This means we can only support containment queries, * but the index can distinguish, for example, {"foo": 42} from {"bar": 42} * since different hashes will be generated. * */ Datum gin_extract_jsonb_path(PG_FUNCTION_ARGS) { Jsonb *jb = PG_GETARG_JSONB(0); int32 *nentries = (int32 *) PG_GETARG_POINTER(1); int total = 2 * JB_ROOT_COUNT(jb); JsonbIterator *it; JsonbValue v; JsonbIteratorToken r; PathHashStack tail; PathHashStack *stack; int i = 0; Datum *entries; /* If the root level is empty, we certainly have no keys */ if (total == 0) { *nentries = 0; PG_RETURN_POINTER(NULL); } /* Otherwise, use 2 * root count as initial estimate of result size */ entries = (Datum *) palloc(sizeof(Datum) * total); /* We keep a stack of partial hashes corresponding to parent key levels */ tail.parent = NULL; tail.hash = 0; stack = &tail; it = JsonbIteratorInit(&jb->root); while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) { PathHashStack *parent; /* Since we recurse into the object, we might need more space */ if (i >= total) { total *= 2; entries = (Datum *) repalloc(entries, sizeof(Datum) * total); } switch (r) { case WJB_BEGIN_ARRAY: case WJB_BEGIN_OBJECT: /* Push a stack level for this object */ parent = stack; stack = (PathHashStack *) palloc(sizeof(PathHashStack)); /* * We pass forward hashes from outer nesting levels so that * the hashes for nested values will include outer keys as * well as their own keys. * * Nesting an array within another array will not alter * innermost scalar element hash values, but that seems * inconsequential. */ stack->hash = parent->hash; stack->parent = parent; break; case WJB_KEY: /* mix this key into the current outer hash */ JsonbHashScalarValue(&v, &stack->hash); /* hash is now ready to incorporate the value */ break; case WJB_ELEM: case WJB_VALUE: /* mix the element or value's hash into the prepared hash */ JsonbHashScalarValue(&v, &stack->hash); /* and emit an index entry */ entries[i++] = UInt32GetDatum(stack->hash); /* reset hash for next key, value, or sub-object */ stack->hash = stack->parent->hash; break; case WJB_END_ARRAY: case WJB_END_OBJECT: /* Pop the stack */ parent = stack->parent; pfree(stack); stack = parent; /* reset hash for next key, value, or sub-object */ if (stack->parent) stack->hash = stack->parent->hash; else stack->hash = 0; break; default: elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); } } *nentries = i; PG_RETURN_POINTER(entries); } Datum gin_extract_jsonb_query_path(PG_FUNCTION_ARGS) { int32 *nentries = (int32 *) PG_GETARG_POINTER(1); StrategyNumber strategy = PG_GETARG_UINT16(2); int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); Datum *entries; if (strategy != JsonbContainsStrategyNumber) elog(ERROR, "unrecognized strategy number: %d", strategy); /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */ entries = (Datum *) DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path, PG_GETARG_DATUM(0), PointerGetDatum(nentries))); /* ... although "contains {}" requires a full index scan */ if (*nentries == 0) *searchMode = GIN_SEARCH_MODE_ALL; PG_RETURN_POINTER(entries); } Datum gin_consistent_jsonb_path(PG_FUNCTION_ARGS) { bool *check = (bool *) PG_GETARG_POINTER(0); StrategyNumber strategy = PG_GETARG_UINT16(1); /* Jsonb *query = PG_GETARG_JSONB(2); */ int32 nkeys = PG_GETARG_INT32(3); /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ bool *recheck = (bool *) PG_GETARG_POINTER(5); bool res = true; int32 i; if (strategy != JsonbContainsStrategyNumber) elog(ERROR, "unrecognized strategy number: %d", strategy); /* * jsonb_path_ops is necessarily lossy, not only because of hash * collisions but also because it doesn't preserve complete information * about the structure of the JSON object. Besides, there are some * special rules around the containment of raw scalars in arrays that are * not handled here. So we must always recheck a match. However, if not * all of the keys are present, the tuple certainly doesn't match. */ *recheck = true; for (i = 0; i < nkeys; i++) { if (!check[i]) { res = false; break; } } PG_RETURN_BOOL(res); } Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS) { GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); StrategyNumber strategy = PG_GETARG_UINT16(1); /* Jsonb *query = PG_GETARG_JSONB(2); */ int32 nkeys = PG_GETARG_INT32(3); /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ GinTernaryValue res = GIN_MAYBE; int32 i; if (strategy != JsonbContainsStrategyNumber) elog(ERROR, "unrecognized strategy number: %d", strategy); /* * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this * corresponds to always forcing recheck in the regular consistent * function, for the reasons listed there. */ for (i = 0; i < nkeys; i++) { if (check[i] == GIN_FALSE) { res = GIN_FALSE; break; } } PG_RETURN_GIN_TERNARY_VALUE(res); } /* * Construct a jsonb_ops GIN key from a flag byte and a textual representation * (which need not be null-terminated). This function is responsible * for hashing overlength text representations; it will add the * JGINFLAG_HASHED bit to the flag value if it does that. */ static Datum make_text_key(char flag, const char *str, int len) { text *item; char hashbuf[10]; if (len > JGIN_MAXLENGTH) { uint32 hashval; hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len)); snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval); str = hashbuf; len = 8; flag |= JGINFLAG_HASHED; } /* * Now build the text Datum. For simplicity we build a 4-byte-header * varlena text Datum here, but we expect it will get converted to short * header format when stored in the index. */ item = (text *) palloc(VARHDRSZ + len + 1); SET_VARSIZE(item, VARHDRSZ + len + 1); *VARDATA(item) = flag; memcpy(VARDATA(item) + 1, str, len); return PointerGetDatum(item); } /* * Create a textual representation of a JsonbValue that will serve as a GIN * key in a jsonb_ops index. is_key is true if the JsonbValue is a key, * or if it is a string array element (since we pretend those are keys, * see jsonb.h). */ static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key) { Datum item; char *cstr; switch (scalarVal->type) { case jbvNull: Assert(!is_key); item = make_text_key(JGINFLAG_NULL, "", 0); break; case jbvBool: Assert(!is_key); item = make_text_key(JGINFLAG_BOOL, scalarVal->val.boolean ? "t" : "f", 1); break; case jbvNumeric: Assert(!is_key); /* * A normalized textual representation, free of trailing zeroes, * is required so that numerically equal values will produce equal * strings. * * It isn't ideal that numerics are stored in a relatively bulky * textual format. However, it's a notationally convenient way of * storing a "union" type in the GIN B-Tree, and indexing Jsonb * strings takes precedence. */ cstr = numeric_normalize(scalarVal->val.numeric); item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr)); pfree(cstr); break; case jbvString: item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR, scalarVal->val.string.val, scalarVal->val.string.len); break; default: elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type); item = 0; /* keep compiler quiet */ break; } return item; }