postgresql/src/backend/utils/adt/jsonb_util.c

1924 lines
52 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* jsonb_util.c
* converting between Jsonb and JsonbValues, and iterating.
*
* Copyright (c) 2014-2020, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* src/backend/utils/adt/jsonb_util.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "common/hashfn.h"
#include "common/jsonapi.h"
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/datetime.h"
#include "utils/json.h"
#include "utils/jsonb.h"
#include "utils/memutils.h"
#include "utils/varlena.h"
/*
* Maximum number of elements in an array (or key/value pairs in an object).
* This is limited by two things: the size of the JEntry array must fit
* in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
* reserved for that in the JsonbContainer.header field.
*
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
* (The total size of an array's or object's elements is also limited by
* JENTRY_OFFLENMASK, but we're not concerned about that here.)
*/
#define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
#define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
static void fillJsonbValue(JsonbContainer *container, int index,
char *base_addr, uint32 offset,
JsonbValue *result);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
static Jsonb *convertToJsonb(JsonbValue *val);
static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
static int reserveFromBuffer(StringInfo buffer, int len);
static void appendToBuffer(StringInfo buffer, const char *data, int len);
static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
static short padBufferToInt(StringInfo buffer);
static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
static JsonbIterator *freeAndGetParent(JsonbIterator *it);
static JsonbParseState *pushState(JsonbParseState **pstate);
static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
static int lengthCompareJsonbStringValue(const void *a, const void *b);
static int lengthCompareJsonbString(const char *val1, int len1,
const char *val2, int len2);
static int lengthCompareJsonbPair(const void *a, const void *b, void *arg);
static void uniqueifyJsonbObject(JsonbValue *object);
static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate,
JsonbIteratorToken seq,
JsonbValue *scalarVal);
/*
* Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
*
* There isn't a JsonbToJsonbValue(), because generally we find it more
* convenient to directly iterate through the Jsonb representation and only
* really convert nested scalar values. JsonbIteratorNext() does this, so that
* clients of the iteration code don't have to directly deal with the binary
* representation (JsonbDeepContains() is a notable exception, although all
* exceptions are internal to this module). In general, functions that accept
* a JsonbValue argument are concerned with the manipulation of scalar values,
* or simple containers of scalar values, where it would be inconvenient to
* deal with a great amount of other state.
*/
Jsonb *
JsonbValueToJsonb(JsonbValue *val)
{
Jsonb *out;
if (IsAJsonbScalar(val))
{
/* Scalar value */
JsonbParseState *pstate = NULL;
JsonbValue *res;
JsonbValue scalarArray;
scalarArray.type = jbvArray;
scalarArray.val.array.rawScalar = true;
scalarArray.val.array.nElems = 1;
pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
pushJsonbValue(&pstate, WJB_ELEM, val);
res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
out = convertToJsonb(res);
}
else if (val->type == jbvObject || val->type == jbvArray)
{
out = convertToJsonb(val);
}
else
{
Assert(val->type == jbvBinary);
out = palloc(VARHDRSZ + val->val.binary.len);
SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
}
return out;
}
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Get the offset of the variable-length portion of a Jsonb node within
* the variable-length-data part of its container. The node is identified
* by index within the container's JEntry array.
*/
uint32
getJsonbOffset(const JsonbContainer *jc, int index)
{
uint32 offset = 0;
int i;
/*
* Start offset of this entry is equal to the end offset of the previous
* entry. Walk backwards to the most recent entry stored as an end
* offset, returning that offset plus any lengths in between.
*/
for (i = index - 1; i >= 0; i--)
{
offset += JBE_OFFLENFLD(jc->children[i]);
if (JBE_HAS_OFF(jc->children[i]))
break;
}
return offset;
}
/*
* Get the length of the variable-length portion of a Jsonb node.
* The node is identified by index within the container's JEntry array.
*/
uint32
getJsonbLength(const JsonbContainer *jc, int index)
{
uint32 off;
uint32 len;
/*
* If the length is stored directly in the JEntry, just return it.
* Otherwise, get the begin offset of the entry, and subtract that from
* the stored end+1 offset.
*/
if (JBE_HAS_OFF(jc->children[index]))
{
off = getJsonbOffset(jc, index);
len = JBE_OFFLENFLD(jc->children[index]) - off;
}
else
len = JBE_OFFLENFLD(jc->children[index]);
return len;
}
/*
* BT comparator worker function. Returns an integer less than, equal to, or
* greater than zero, indicating whether a is less than, equal to, or greater
* than b. Consistent with the requirements for a B-Tree operator class
*
* Strings are compared lexically, in contrast with other places where we use a
* much simpler comparator logic for searching through Strings. Since this is
* called from B-Tree support function 1, we're careful about not leaking
* memory here.
*/
int
compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
{
JsonbIterator *ita,
*itb;
int res = 0;
ita = JsonbIteratorInit(a);
itb = JsonbIteratorInit(b);
do
{
JsonbValue va,
vb;
JsonbIteratorToken ra,
rb;
ra = JsonbIteratorNext(&ita, &va, false);
rb = JsonbIteratorNext(&itb, &vb, false);
if (ra == rb)
{
if (ra == WJB_DONE)
{
/* Decisively equal */
break;
}
if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
{
/*
* There is no array or object to compare at this stage of
* processing. jbvArray/jbvObject values are compared
* initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
* tokens.
*/
continue;
}
if (va.type == vb.type)
{
switch (va.type)
{
case jbvString:
case jbvNull:
case jbvNumeric:
case jbvBool:
res = compareJsonbScalarValue(&va, &vb);
break;
case jbvArray:
/*
* This could be a "raw scalar" pseudo array. That's
* a special case here though, since we still want the
* general type-based comparisons to apply, and as far
* as we're concerned a pseudo array is just a scalar.
*/
if (va.val.array.rawScalar != vb.val.array.rawScalar)
res = (va.val.array.rawScalar) ? -1 : 1;
if (va.val.array.nElems != vb.val.array.nElems)
res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
break;
case jbvObject:
if (va.val.object.nPairs != vb.val.object.nPairs)
res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
break;
case jbvBinary:
elog(ERROR, "unexpected jbvBinary value");
break;
case jbvDatetime:
elog(ERROR, "unexpected jbvDatetime value");
break;
}
}
else
{
/* Type-defined order */
res = (va.type > vb.type) ? 1 : -1;
}
}
else
{
/*
* It's safe to assume that the types differed, and that the va
* and vb values passed were set.
*
* If the two values were of the same container type, then there'd
* have been a chance to observe the variation in the number of
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
* elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
* either two heterogeneously-typed containers, or a container and
* some scalar type.
*
* We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
* cases here, because we would have seen the corresponding
* WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
* concluded that they don't match.
*/
Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
Assert(va.type != vb.type);
Assert(va.type != jbvBinary);
Assert(vb.type != jbvBinary);
/* Type-defined order */
res = (va.type > vb.type) ? 1 : -1;
}
}
while (res == 0);
while (ita != NULL)
{
JsonbIterator *i = ita->parent;
pfree(ita);
ita = i;
}
while (itb != NULL)
{
JsonbIterator *i = itb->parent;
pfree(itb);
itb = i;
}
return res;
}
/*
* Find value in object (i.e. the "value" part of some key/value pair in an
* object), or find a matching element if we're looking through an array. Do
* so on the basis of equality of the object keys only, or alternatively
* element values only, with a caller-supplied value "key". The "flags"
* argument allows the caller to specify which container types are of interest.
*
* This exported utility function exists to facilitate various cases concerned
* with "containment". If asked to look through an object, the caller had
* better pass a Jsonb String, because their keys can only be strings.
* Otherwise, for an array, any type of JsonbValue will do.
*
* In order to proceed with the search, it is necessary for callers to have
* both specified an interest in exactly one particular container type with an
* appropriate flag, as well as having the pointed-to Jsonb container be of
* one of those same container types at the top level. (Actually, we just do
* whichever makes sense to save callers the trouble of figuring it out - at
* most one can make sense, because the container either points to an array
* (possibly a "raw scalar" pseudo array) or an object.)
*
* Note that we can return a jbvBinary JsonbValue if this is called on an
* object, but we never do so on an array. If the caller asks to look through
* a container type that is not of the type pointed to by the container,
* immediately fall through and return NULL. If we cannot find the value,
* return NULL. Otherwise, return palloc()'d copy of value.
*/
JsonbValue *
findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
JsonbValue *key)
{
JEntry *children = container->children;
int count = JsonContainerSize(container);
Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Quick out without a palloc cycle if object/array is empty */
if (count <= 0)
return NULL;
if ((flags & JB_FARRAY) && JsonContainerIsArray(container))
{
JsonbValue *result = palloc(sizeof(JsonbValue));
char *base_addr = (char *) (children + count);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
uint32 offset = 0;
int i;
for (i = 0; i < count; i++)
{
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
fillJsonbValue(container, i, base_addr, offset, result);
if (key->type == result->type)
{
if (equalsJsonbScalarValue(key, result))
return result;
}
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
JBE_ADVANCE_OFFSET(offset, children[i]);
}
pfree(result);
}
else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container))
{
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Object key passed by caller must be a string */
Assert(key->type == jbvString);
return getKeyJsonValueFromContainer(container, key->val.string.val,
key->val.string.len, NULL);
}
/* Not found */
return NULL;
}
/*
* Find value by key in Jsonb object and fetch it into 'res', which is also
* returned.
*
* 'res' can be passed in as NULL, in which case it's newly palloc'ed here.
*/
JsonbValue *
getKeyJsonValueFromContainer(JsonbContainer *container,
const char *keyVal, int keyLen, JsonbValue *res)
{
JEntry *children = container->children;
int count = JsonContainerSize(container);
char *baseAddr;
uint32 stopLow,
stopHigh;
Assert(JsonContainerIsObject(container));
/* Quick out without a palloc cycle if object is empty */
if (count <= 0)
return NULL;
/*
* Binary search the container. Since we know this is an object, account
* for *Pairs* of Jentrys
*/
baseAddr = (char *) (children + count * 2);
stopLow = 0;
stopHigh = count;
while (stopLow < stopHigh)
{
uint32 stopMiddle;
int difference;
const char *candidateVal;
int candidateLen;
stopMiddle = stopLow + (stopHigh - stopLow) / 2;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
candidateVal = baseAddr + getJsonbOffset(container, stopMiddle);
candidateLen = getJsonbLength(container, stopMiddle);
difference = lengthCompareJsonbString(candidateVal, candidateLen,
keyVal, keyLen);
if (difference == 0)
{
/* Found our key, return corresponding value */
int index = stopMiddle + count;
if (!res)
res = palloc(sizeof(JsonbValue));
fillJsonbValue(container, index, baseAddr,
getJsonbOffset(container, index),
res);
return res;
}
else
{
if (difference < 0)
stopLow = stopMiddle + 1;
else
stopHigh = stopMiddle;
}
}
/* Not found */
return NULL;
}
/*
* Get i-th value of a Jsonb array.
*
* Returns palloc()'d copy of the value, or NULL if it does not exist.
*/
JsonbValue *
getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
{
JsonbValue *result;
char *base_addr;
uint32 nelements;
if (!JsonContainerIsArray(container))
elog(ERROR, "not a jsonb array");
nelements = JsonContainerSize(container);
base_addr = (char *) &container->children[nelements];
if (i >= nelements)
return NULL;
result = palloc(sizeof(JsonbValue));
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
fillJsonbValue(container, i, base_addr,
getJsonbOffset(container, i),
result);
return result;
}
/*
* A helper function to fill in a JsonbValue to represent an element of an
* array, or a key or value of an object.
*
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
* The node's JEntry is at container->children[index], and its variable-length
* data is at base_addr + offset. We make the caller determine the offset
* since in many cases the caller can amortize that work across multiple
* children. When it can't, it can just call getJsonbOffset().
*
* A nested array or object will be returned as jbvBinary, ie. it won't be
* expanded.
*/
static void
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
fillJsonbValue(JsonbContainer *container, int index,
char *base_addr, uint32 offset,
JsonbValue *result)
{
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
JEntry entry = container->children[index];
if (JBE_ISNULL(entry))
{
result->type = jbvNull;
}
else if (JBE_ISSTRING(entry))
{
result->type = jbvString;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
result->val.string.val = base_addr + offset;
result->val.string.len = getJsonbLength(container, index);
Assert(result->val.string.len >= 0);
}
else if (JBE_ISNUMERIC(entry))
{
result->type = jbvNumeric;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
}
else if (JBE_ISBOOL_TRUE(entry))
{
result->type = jbvBool;
result->val.boolean = true;
}
else if (JBE_ISBOOL_FALSE(entry))
{
result->type = jbvBool;
result->val.boolean = false;
}
else
{
Assert(JBE_ISCONTAINER(entry));
result->type = jbvBinary;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Remove alignment padding from data pointer and length */
result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
result->val.binary.len = getJsonbLength(container, index) -
(INTALIGN(offset) - offset);
}
}
/*
* Push JsonbValue into JsonbParseState.
*
* Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
* JsonbValue to a Jsonb.
*
* Initial state of *JsonbParseState is NULL, since it'll be allocated here
* originally (caller will get JsonbParseState back by reference).
*
* Only sequential tokens pertaining to non-container types should pass a
* JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a
* "raw scalar" pseudo array to append it - the actual scalar should be passed
* next and it will be added as the only member of the array.
*
* Values of type jbvBinary, which are rolled up arrays and objects,
* are unpacked before being added to the result.
*/
JsonbValue *
pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
JsonbValue *jbval)
{
JsonbIterator *it;
JsonbValue *res = NULL;
2015-05-24 03:35:49 +02:00
JsonbValue v;
JsonbIteratorToken tok;
if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) ||
jbval->type != jbvBinary)
{
/* drop through */
return pushJsonbValueScalar(pstate, seq, jbval);
}
/* unpack the binary and add each piece to the pstate */
it = JsonbIteratorInit(jbval->val.binary.data);
while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
res = pushJsonbValueScalar(pstate, tok,
tok < WJB_BEGIN_ARRAY ? &v : NULL);
return res;
}
/*
* Do the actual pushing, with only scalar or pseudo-scalar-array values
* accepted.
*/
static JsonbValue *
pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq,
2015-05-24 03:35:49 +02:00
JsonbValue *scalarVal)
{
JsonbValue *result = NULL;
switch (seq)
{
case WJB_BEGIN_ARRAY:
Assert(!scalarVal || scalarVal->val.array.rawScalar);
*pstate = pushState(pstate);
result = &(*pstate)->contVal;
(*pstate)->contVal.type = jbvArray;
(*pstate)->contVal.val.array.nElems = 0;
(*pstate)->contVal.val.array.rawScalar = (scalarVal &&
scalarVal->val.array.rawScalar);
if (scalarVal && scalarVal->val.array.nElems > 0)
{
/* Assume that this array is still really a scalar */
Assert(scalarVal->type == jbvArray);
(*pstate)->size = scalarVal->val.array.nElems;
}
else
{
(*pstate)->size = 4;
}
(*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
(*pstate)->size);
break;
case WJB_BEGIN_OBJECT:
Assert(!scalarVal);
*pstate = pushState(pstate);
result = &(*pstate)->contVal;
(*pstate)->contVal.type = jbvObject;
(*pstate)->contVal.val.object.nPairs = 0;
(*pstate)->size = 4;
(*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
(*pstate)->size);
break;
case WJB_KEY:
Assert(scalarVal->type == jbvString);
appendKey(*pstate, scalarVal);
break;
case WJB_VALUE:
Assert(IsAJsonbScalar(scalarVal));
appendValue(*pstate, scalarVal);
break;
case WJB_ELEM:
Assert(IsAJsonbScalar(scalarVal));
appendElement(*pstate, scalarVal);
break;
case WJB_END_OBJECT:
uniqueifyJsonbObject(&(*pstate)->contVal);
/* FALLTHROUGH */
case WJB_END_ARRAY:
/* Steps here common to WJB_END_OBJECT case */
Assert(!scalarVal);
result = &(*pstate)->contVal;
/*
* Pop stack and push current array/object as value in parent
* array/object
*/
*pstate = (*pstate)->next;
if (*pstate)
{
switch ((*pstate)->contVal.type)
{
case jbvArray:
appendElement(*pstate, result);
break;
case jbvObject:
appendValue(*pstate, result);
break;
default:
elog(ERROR, "invalid jsonb container type");
}
}
break;
default:
elog(ERROR, "unrecognized jsonb sequential processing token");
}
return result;
}
/*
* pushJsonbValue() worker: Iteration-like forming of Jsonb
*/
static JsonbParseState *
pushState(JsonbParseState **pstate)
{
JsonbParseState *ns = palloc(sizeof(JsonbParseState));
ns->next = *pstate;
return ns;
}
/*
* pushJsonbValue() worker: Append a pair key to state when generating a Jsonb
*/
static void
appendKey(JsonbParseState *pstate, JsonbValue *string)
{
JsonbValue *object = &pstate->contVal;
Assert(object->type == jbvObject);
Assert(string->type == jbvString);
if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
JSONB_MAX_PAIRS)));
if (object->val.object.nPairs >= pstate->size)
{
pstate->size *= 2;
object->val.object.pairs = repalloc(object->val.object.pairs,
sizeof(JsonbPair) * pstate->size);
}
object->val.object.pairs[object->val.object.nPairs].key = *string;
object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
}
/*
* pushJsonbValue() worker: Append a pair value to state when generating a
* Jsonb
*/
static void
appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
{
JsonbValue *object = &pstate->contVal;
Assert(object->type == jbvObject);
object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
}
/*
* pushJsonbValue() worker: Append an element to state when generating a Jsonb
*/
static void
appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
{
JsonbValue *array = &pstate->contVal;
Assert(array->type == jbvArray);
if (array->val.array.nElems >= JSONB_MAX_ELEMS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
JSONB_MAX_ELEMS)));
if (array->val.array.nElems >= pstate->size)
{
pstate->size *= 2;
array->val.array.elems = repalloc(array->val.array.elems,
sizeof(JsonbValue) * pstate->size);
}
array->val.array.elems[array->val.array.nElems++] = *scalarVal;
}
/*
* Given a JsonbContainer, expand to JsonbIterator to iterate over items
* fully expanded to in-memory representation for manipulation.
*
* See JsonbIteratorNext() for notes on memory management.
*/
JsonbIterator *
JsonbIteratorInit(JsonbContainer *container)
{
return iteratorFromContainer(container, NULL);
}
/*
* Get next JsonbValue while iterating
*
* Caller should initially pass their own, original iterator. They may get
* back a child iterator palloc()'d here instead. The function can be relied
* on to free those child iterators, lest the memory allocated for highly
* nested objects become unreasonable, but only if callers don't end iteration
* early (by breaking upon having found something in a search, for example).
*
* Callers in such a scenario, that are particularly sensitive to leaking
* memory in a long-lived context may walk the ancestral tree from the final
* iterator we left them with to its oldest ancestor, pfree()ing as they go.
* They do not have to free any other memory previously allocated for iterators
* but not accessible as direct ancestors of the iterator they're last passed
* back.
*
* Returns "Jsonb sequential processing" token value. Iterator "state"
* reflects the current stage of the process in a less granular fashion, and is
* mostly used here to track things internally with respect to particular
* iterators.
*
* Clients of this function should not have to handle any jbvBinary values
* (since recursive calls will deal with this), provided skipNested is false.
* It is our job to expand the jbvBinary representation without bothering them
* with it. However, clients should not take it upon themselves to touch array
* or Object element/pair buffers, since their element/pair pointers are
* garbage. Also, *val will not be set when returning WJB_END_ARRAY or
* WJB_END_OBJECT, on the assumption that it's only useful to access values
* when recursing in.
*/
JsonbIteratorToken
JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
{
if (*it == NULL)
return WJB_DONE;
/*
* When stepping into a nested container, we jump back here to start
* processing the child. We will not recurse further in one call, because
* processing the child will always begin in JBI_ARRAY_START or
* JBI_OBJECT_START state.
*/
recurse:
switch ((*it)->state)
{
case JBI_ARRAY_START:
/* Set v to array on first array call */
val->type = jbvArray;
val->val.array.nElems = (*it)->nElems;
/*
* v->val.array.elems is not actually set, because we aren't doing
* a full conversion
*/
val->val.array.rawScalar = (*it)->isScalar;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
(*it)->curIndex = 0;
(*it)->curDataOffset = 0;
(*it)->curValueOffset = 0; /* not actually used */
/* Set state for next call */
(*it)->state = JBI_ARRAY_ELEM;
return WJB_BEGIN_ARRAY;
case JBI_ARRAY_ELEM:
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
if ((*it)->curIndex >= (*it)->nElems)
{
/*
* All elements within array already processed. Report this
* to caller, and give it back original parent iterator (which
* independently tracks iteration progress at its level of
* nesting).
*/
*it = freeAndGetParent(*it);
return WJB_END_ARRAY;
}
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
fillJsonbValue((*it)->container, (*it)->curIndex,
(*it)->dataProper, (*it)->curDataOffset,
val);
JBE_ADVANCE_OFFSET((*it)->curDataOffset,
(*it)->children[(*it)->curIndex]);
(*it)->curIndex++;
if (!IsAJsonbScalar(val) && !skipNested)
{
/* Recurse into container. */
*it = iteratorFromContainer(val->val.binary.data, *it);
goto recurse;
}
else
{
/*
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
* Scalar item in array, or a container and caller didn't want
* us to recurse into it.
*/
return WJB_ELEM;
}
case JBI_OBJECT_START:
/* Set v to object on first object call */
val->type = jbvObject;
val->val.object.nPairs = (*it)->nElems;
/*
* v->val.object.pairs is not actually set, because we aren't
* doing a full conversion
*/
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
(*it)->curIndex = 0;
(*it)->curDataOffset = 0;
(*it)->curValueOffset = getJsonbOffset((*it)->container,
(*it)->nElems);
/* Set state for next call */
(*it)->state = JBI_OBJECT_KEY;
return WJB_BEGIN_OBJECT;
case JBI_OBJECT_KEY:
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
if ((*it)->curIndex >= (*it)->nElems)
{
/*
* All pairs within object already processed. Report this to
* caller, and give it back original containing iterator
* (which independently tracks iteration progress at its level
* of nesting).
*/
*it = freeAndGetParent(*it);
return WJB_END_OBJECT;
}
else
{
/* Return key of a key/value pair. */
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
fillJsonbValue((*it)->container, (*it)->curIndex,
(*it)->dataProper, (*it)->curDataOffset,
val);
if (val->type != jbvString)
elog(ERROR, "unexpected jsonb type as object key");
/* Set state for next call */
(*it)->state = JBI_OBJECT_VALUE;
return WJB_KEY;
}
case JBI_OBJECT_VALUE:
/* Set state for next call */
(*it)->state = JBI_OBJECT_KEY;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
(*it)->dataProper, (*it)->curValueOffset,
val);
JBE_ADVANCE_OFFSET((*it)->curDataOffset,
(*it)->children[(*it)->curIndex]);
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
(*it)->children[(*it)->curIndex + (*it)->nElems]);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
(*it)->curIndex++;
/*
* Value may be a container, in which case we recurse with new,
* child iterator (unless the caller asked not to, by passing
* skipNested).
*/
if (!IsAJsonbScalar(val) && !skipNested)
{
*it = iteratorFromContainer(val->val.binary.data, *it);
goto recurse;
}
else
return WJB_VALUE;
}
elog(ERROR, "invalid iterator state");
return -1;
}
/*
* Initialize an iterator for iterating all elements in a container.
*/
static JsonbIterator *
iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
{
JsonbIterator *it;
it = palloc0(sizeof(JsonbIterator));
it->container = container;
it->parent = parent;
it->nElems = JsonContainerSize(container);
/* Array starts just after header */
it->children = container->children;
switch (container->header & (JB_FARRAY | JB_FOBJECT))
{
case JB_FARRAY:
it->dataProper =
(char *) it->children + it->nElems * sizeof(JEntry);
it->isScalar = JsonContainerIsScalar(container);
/* This is either a "raw scalar", or an array */
Assert(!it->isScalar || it->nElems == 1);
it->state = JBI_ARRAY_START;
break;
case JB_FOBJECT:
it->dataProper =
(char *) it->children + it->nElems * sizeof(JEntry) * 2;
it->state = JBI_OBJECT_START;
break;
default:
elog(ERROR, "unknown type of jsonb container");
}
return it;
}
/*
* JsonbIteratorNext() worker: Return parent, while freeing memory for current
* iterator
*/
static JsonbIterator *
freeAndGetParent(JsonbIterator *it)
{
JsonbIterator *v = it->parent;
pfree(it);
return v;
}
/*
* Worker for "contains" operator's function
*
* Formally speaking, containment is top-down, unordered subtree isomorphism.
*
* Takes iterators that belong to some container type. These iterators
* "belong" to those values in the sense that they've just been initialized in
* respect of them by the caller (perhaps in a nested fashion).
*
* "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
* We determine if mContained is contained within val.
*/
bool
JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
{
JsonbValue vval,
vcontained;
JsonbIteratorToken rval,
rcont;
/*
* Guard against stack overflow due to overly complex Jsonb.
*
* Functions called here independently take this precaution, but that
* might not be sufficient since this is also a recursive function.
*/
check_stack_depth();
rval = JsonbIteratorNext(val, &vval, false);
rcont = JsonbIteratorNext(mContained, &vcontained, false);
if (rval != rcont)
{
/*
* The differing return values can immediately be taken as indicating
* two differing container types at this nesting level, which is
* sufficient reason to give up entirely (but it should be the case
* that they're both some container type).
*/
Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
return false;
}
else if (rcont == WJB_BEGIN_OBJECT)
{
Assert(vval.type == jbvObject);
Assert(vcontained.type == jbvObject);
/*
* If the lhs has fewer pairs than the rhs, it can't possibly contain
* the rhs. (This conclusion is safe only because we de-duplicate
* keys in all Jsonb objects; thus there can be no corresponding
* optimization in the array case.) The case probably won't arise
* often, but since it's such a cheap check we may as well make it.
*/
if (vval.val.object.nPairs < vcontained.val.object.nPairs)
return false;
/* Work through rhs "is it contained within?" object */
for (;;)
{
JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
JsonbValue lhsValBuf;
rcont = JsonbIteratorNext(mContained, &vcontained, false);
/*
* When we get through caller's rhs "is it contained within?"
* object without failing to find one of its values, it's
* contained.
*/
if (rcont == WJB_END_OBJECT)
return true;
Assert(rcont == WJB_KEY);
Assert(vcontained.type == jbvString);
/* First, find value by key... */
lhsVal =
getKeyJsonValueFromContainer((*val)->container,
vcontained.val.string.val,
vcontained.val.string.len,
&lhsValBuf);
if (!lhsVal)
return false;
/*
* ...at this stage it is apparent that there is at least a key
* match for this rhs pair.
*/
rcont = JsonbIteratorNext(mContained, &vcontained, true);
Assert(rcont == WJB_VALUE);
/*
* Compare rhs pair's value with lhs pair's value just found using
* key
*/
if (lhsVal->type != vcontained.type)
{
return false;
}
else if (IsAJsonbScalar(lhsVal))
{
if (!equalsJsonbScalarValue(lhsVal, &vcontained))
return false;
}
else
{
/* Nested container value (object or array) */
JsonbIterator *nestval,
*nestContained;
Assert(lhsVal->type == jbvBinary);
Assert(vcontained.type == jbvBinary);
nestval = JsonbIteratorInit(lhsVal->val.binary.data);
nestContained = JsonbIteratorInit(vcontained.val.binary.data);
/*
* Match "value" side of rhs datum object's pair recursively.
* It's a nested structure.
*
* Note that nesting still has to "match up" at the right
* nesting sub-levels. However, there need only be zero or
* more matching pairs (or elements) at each nesting level
* (provided the *rhs* pairs/elements *all* match on each
* level), which enables searching nested structures for a
* single String or other primitive type sub-datum quite
* effectively (provided the user constructed the rhs nested
* structure such that we "know where to look").
*
* In other words, the mapping of container nodes in the rhs
* "vcontained" Jsonb to internal nodes on the lhs is
* injective, and parent-child edges on the rhs must be mapped
* to parent-child edges on the lhs to satisfy the condition
* of containment (plus of course the mapped nodes must be
* equal).
*/
if (!JsonbDeepContains(&nestval, &nestContained))
return false;
}
}
}
else if (rcont == WJB_BEGIN_ARRAY)
{
JsonbValue *lhsConts = NULL;
uint32 nLhsElems = vval.val.array.nElems;
Assert(vval.type == jbvArray);
Assert(vcontained.type == jbvArray);
/*
* Handle distinction between "raw scalar" pseudo arrays, and real
* arrays.
*
* A raw scalar may contain another raw scalar, and an array may
* contain a raw scalar, but a raw scalar may not contain an array. We
* don't do something like this for the object case, since objects can
* only contain pairs, never raw scalars (a pair is represented by an
* rhs object argument with a single contained pair).
*/
if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
return false;
/* Work through rhs "is it contained within?" array */
for (;;)
{
rcont = JsonbIteratorNext(mContained, &vcontained, true);
/*
* When we get through caller's rhs "is it contained within?"
* array without failing to find one of its values, it's
* contained.
*/
if (rcont == WJB_END_ARRAY)
return true;
Assert(rcont == WJB_ELEM);
if (IsAJsonbScalar(&vcontained))
{
if (!findJsonbValueFromContainer((*val)->container,
JB_FARRAY,
&vcontained))
return false;
}
else
{
uint32 i;
/*
* If this is first container found in rhs array (at this
* depth), initialize temp lhs array of containers
*/
if (lhsConts == NULL)
{
uint32 j = 0;
/* Make room for all possible values */
lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
for (i = 0; i < nLhsElems; i++)
{
/* Store all lhs elements in temp array */
rcont = JsonbIteratorNext(val, &vval, true);
Assert(rcont == WJB_ELEM);
if (vval.type == jbvBinary)
lhsConts[j++] = vval;
}
/* No container elements in temp array, so give up now */
if (j == 0)
return false;
/* We may have only partially filled array */
nLhsElems = j;
}
/* XXX: Nested array containment is O(N^2) */
for (i = 0; i < nLhsElems; i++)
{
/* Nested container value (object or array) */
JsonbIterator *nestval,
*nestContained;
bool contains;
nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
nestContained = JsonbIteratorInit(vcontained.val.binary.data);
contains = JsonbDeepContains(&nestval, &nestContained);
if (nestval)
pfree(nestval);
if (nestContained)
pfree(nestContained);
if (contains)
break;
}
/*
* Report rhs container value is not contained if couldn't
* match rhs container to *some* lhs cont
*/
if (i == nLhsElems)
return false;
}
}
}
else
{
elog(ERROR, "invalid jsonb container type");
}
elog(ERROR, "unexpectedly fell off end of jsonb container");
return false;
}
/*
* Hash a JsonbValue scalar value, mixing the hash value into an existing
* hash provided by the caller.
*
* Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
* flags.
*/
void
JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
{
uint32 tmp;
/* Compute hash value for scalarVal */
switch (scalarVal->type)
{
case jbvNull:
tmp = 0x01;
break;
case jbvString:
tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
scalarVal->val.string.len));
break;
case jbvNumeric:
/* Must hash equal numerics to equal hash codes */
tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
NumericGetDatum(scalarVal->val.numeric)));
break;
case jbvBool:
tmp = scalarVal->val.boolean ? 0x02 : 0x04;
2015-05-24 03:35:49 +02:00
break;
default:
elog(ERROR, "invalid jsonb scalar type");
tmp = 0; /* keep compiler quiet */
break;
}
/*
* Combine hash values of successive keys, values and elements by rotating
* the previous value left 1 bit, then XOR'ing in the new
* key/value/element's hash value.
*/
*hash = (*hash << 1) | (*hash >> 31);
*hash ^= tmp;
}
/*
* Hash a value to a 64-bit value, with a seed. Otherwise, similar to
* JsonbHashScalarValue.
*/
void
JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
uint64 seed)
{
uint64 tmp;
switch (scalarVal->type)
{
case jbvNull:
tmp = seed + 0x01;
break;
case jbvString:
tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
scalarVal->val.string.len,
seed));
break;
case jbvNumeric:
tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
NumericGetDatum(scalarVal->val.numeric),
UInt64GetDatum(seed)));
break;
case jbvBool:
if (seed)
tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
BoolGetDatum(scalarVal->val.boolean),
UInt64GetDatum(seed)));
else
tmp = scalarVal->val.boolean ? 0x02 : 0x04;
break;
default:
elog(ERROR, "invalid jsonb scalar type");
break;
}
*hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
*hash ^= tmp;
}
/*
* Are two scalar JsonbValues of the same type a and b equal?
*/
static bool
equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
{
if (aScalar->type == bScalar->type)
{
switch (aScalar->type)
{
case jbvNull:
return true;
case jbvString:
return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
case jbvNumeric:
return DatumGetBool(DirectFunctionCall2(numeric_eq,
PointerGetDatum(aScalar->val.numeric),
PointerGetDatum(bScalar->val.numeric)));
case jbvBool:
return aScalar->val.boolean == bScalar->val.boolean;
default:
elog(ERROR, "invalid jsonb scalar type");
}
}
elog(ERROR, "jsonb scalar type mismatch");
return false;
}
/*
* Compare two scalar JsonbValues, returning -1, 0, or 1.
*
* Strings are compared using the default collation. Used by B-tree
* operators, where a lexical sort order is generally expected.
*/
static int
compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
{
if (aScalar->type == bScalar->type)
{
switch (aScalar->type)
{
case jbvNull:
return 0;
case jbvString:
return varstr_cmp(aScalar->val.string.val,
aScalar->val.string.len,
bScalar->val.string.val,
bScalar->val.string.len,
DEFAULT_COLLATION_OID);
case jbvNumeric:
return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
PointerGetDatum(aScalar->val.numeric),
PointerGetDatum(bScalar->val.numeric)));
case jbvBool:
if (aScalar->val.boolean == bScalar->val.boolean)
return 0;
2016-06-10 00:02:36 +02:00
else if (aScalar->val.boolean > bScalar->val.boolean)
return 1;
else
return -1;
default:
elog(ERROR, "invalid jsonb scalar type");
}
}
elog(ERROR, "jsonb scalar type mismatch");
return -1;
}
/*
* Functions for manipulating the resizable buffer used by convertJsonb and
* its subroutines.
*/
/*
* Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
* Returns the offset to the reserved area. The caller is expected to fill
* the reserved area later with copyToBuffer().
*/
static int
reserveFromBuffer(StringInfo buffer, int len)
{
int offset;
/* Make more room if needed */
enlargeStringInfo(buffer, len);
/* remember current offset */
offset = buffer->len;
/* reserve the space */
buffer->len += len;
/*
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
* Keep a trailing null in place, even though it's not useful for us; it
* seems best to preserve the invariants of StringInfos.
*/
buffer->data[buffer->len] = '\0';
return offset;
}
/*
* Copy 'len' bytes to a previously reserved area in buffer.
*/
static void
copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
{
memcpy(buffer->data + offset, data, len);
}
/*
* A shorthand for reserveFromBuffer + copyToBuffer.
*/
static void
appendToBuffer(StringInfo buffer, const char *data, int len)
{
int offset;
offset = reserveFromBuffer(buffer, len);
copyToBuffer(buffer, offset, data, len);
}
/*
* Append padding, so that the length of the StringInfo is int-aligned.
* Returns the number of padding bytes appended.
*/
static short
padBufferToInt(StringInfo buffer)
{
int padlen,
p,
offset;
padlen = INTALIGN(buffer->len) - buffer->len;
offset = reserveFromBuffer(buffer, padlen);
/* padlen must be small, so this is probably faster than a memset */
for (p = 0; p < padlen; p++)
buffer->data[offset + p] = '\0';
return padlen;
}
/*
* Given a JsonbValue, convert to Jsonb. The result is palloc'd.
*/
static Jsonb *
convertToJsonb(JsonbValue *val)
{
StringInfoData buffer;
JEntry jentry;
Jsonb *res;
/* Should not already have binary representation */
Assert(val->type != jbvBinary);
/* Allocate an output buffer. It will be enlarged as needed */
initStringInfo(&buffer);
/* Make room for the varlena header */
reserveFromBuffer(&buffer, VARHDRSZ);
convertJsonbValue(&buffer, &jentry, val, 0);
/*
* Note: the JEntry of the root is discarded. Therefore the root
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
* JsonbContainer struct must contain enough information to tell what kind
* of value it is.
*/
res = (Jsonb *) buffer.data;
SET_VARSIZE(res, buffer.len);
return res;
}
/*
* Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
*
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
* The JEntry header for this node is returned in *header. It is filled in
* with the length of this value and appropriate type bits. If we wish to
* store an end offset rather than a length, it is the caller's responsibility
* to adjust for that.
*
* If the value is an array or an object, this recurses. 'level' is only used
* for debugging purposes.
*/
static void
convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
{
check_stack_depth();
if (!val)
return;
/*
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
* A JsonbValue passed as val should never have a type of jbvBinary, and
* neither should any of its sub-components. Those values will be produced
* by convertJsonbArray and convertJsonbObject, the results of which will
* not be passed back to this function as an argument.
*/
if (IsAJsonbScalar(val))
convertJsonbScalar(buffer, header, val);
else if (val->type == jbvArray)
convertJsonbArray(buffer, header, val, level);
else if (val->type == jbvObject)
convertJsonbObject(buffer, header, val, level);
else
elog(ERROR, "unknown type of jsonb container to convert");
}
static void
convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
{
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
int base_offset;
int jentry_offset;
int i;
int totallen;
uint32 header;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
int nElems = val->val.array.nElems;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Remember where in the buffer this array starts. */
base_offset = buffer->len;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Align to 4-byte boundary (any padding counts as part of my data) */
padBufferToInt(buffer);
/*
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
* Construct the header Jentry and store it in the beginning of the
* variable-length payload.
*/
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
header = nElems | JB_FARRAY;
if (val->val.array.rawScalar)
{
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
Assert(nElems == 1);
Assert(level == 0);
header |= JB_FSCALAR;
}
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Reserve space for the JEntries of the elements. */
jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
totallen = 0;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
for (i = 0; i < nElems; i++)
{
JsonbValue *elem = &val->val.array.elems[i];
int len;
JEntry meta;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Convert element, producing a JEntry and appending its
* variable-length data to buffer
*/
convertJsonbValue(buffer, &meta, elem, level + 1);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
len = JBE_OFFLENFLD(meta);
totallen += len;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Bail out if total variable-length data exceeds what will fit in a
* JEntry length field. We check this in each iteration, not just
* once at the end, to forestall possible integer overflow.
*/
if (totallen > JENTRY_OFFLENMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
JENTRY_OFFLENMASK)));
/*
* Convert each JB_OFFSET_STRIDE'th length to an offset.
*/
if ((i % JB_OFFSET_STRIDE) == 0)
meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
jentry_offset += sizeof(JEntry);
}
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Total data size is everything we've appended to buffer */
totallen = buffer->len - base_offset;
/* Check length again, since we didn't include the metadata above */
if (totallen > JENTRY_OFFLENMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
JENTRY_OFFLENMASK)));
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Initialize the header of this node in the container's JEntry array */
*pheader = JENTRY_ISCONTAINER | totallen;
}
static void
convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
{
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
int base_offset;
int jentry_offset;
int i;
int totallen;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
uint32 header;
int nPairs = val->val.object.nPairs;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Remember where in the buffer this object starts. */
base_offset = buffer->len;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Align to 4-byte boundary (any padding counts as part of my data) */
padBufferToInt(buffer);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Construct the header Jentry and store it in the beginning of the
* variable-length payload.
*/
header = nPairs | JB_FOBJECT;
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Reserve space for the JEntries of the keys and values. */
jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Iterate over the keys, then over the values, since that is the ordering
* we want in the on-disk representation.
*/
totallen = 0;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
for (i = 0; i < nPairs; i++)
{
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
JsonbPair *pair = &val->val.object.pairs[i];
int len;
JEntry meta;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Convert key, producing a JEntry and appending its variable-length
* data to buffer
*/
convertJsonbScalar(buffer, &meta, &pair->key);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
len = JBE_OFFLENFLD(meta);
totallen += len;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Bail out if total variable-length data exceeds what will fit in a
* JEntry length field. We check this in each iteration, not just
* once at the end, to forestall possible integer overflow.
*/
if (totallen > JENTRY_OFFLENMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
JENTRY_OFFLENMASK)));
/*
* Convert each JB_OFFSET_STRIDE'th length to an offset.
*/
if ((i % JB_OFFSET_STRIDE) == 0)
meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
jentry_offset += sizeof(JEntry);
}
for (i = 0; i < nPairs; i++)
{
JsonbPair *pair = &val->val.object.pairs[i];
int len;
JEntry meta;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Convert value, producing a JEntry and appending its variable-length
* data to buffer
*/
convertJsonbValue(buffer, &meta, &pair->value, level + 1);
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
len = JBE_OFFLENFLD(meta);
totallen += len;
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Bail out if total variable-length data exceeds what will fit in a
* JEntry length field. We check this in each iteration, not just
* once at the end, to forestall possible integer overflow.
*/
if (totallen > JENTRY_OFFLENMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
JENTRY_OFFLENMASK)));
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/*
* Convert each JB_OFFSET_STRIDE'th length to an offset.
*/
if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
jentry_offset += sizeof(JEntry);
}
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Total data size is everything we've appended to buffer */
totallen = buffer->len - base_offset;
/* Check length again, since we didn't include the metadata above */
if (totallen > JENTRY_OFFLENMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
JENTRY_OFFLENMASK)));
Change JSONB's on-disk format for improved performance. The original design used an array of offsets into the variable-length portion of a JSONB container. However, such an array is basically uncompressible by simple compression techniques such as TOAST's LZ compressor. That's bad enough, but because the offset array is at the front, it tended to trigger the give-up-after-1KB heuristic in the TOAST code, so that the entire JSONB object was stored uncompressed; which was the root cause of bug #11109 from Larry White. To fix without losing the ability to extract a random array element in O(1) time, change this scheme so that most of the JEntry array elements hold lengths rather than offsets. With data that's compressible at all, there tend to be fewer distinct element lengths, so that there is scope for compression of the JEntry array. Every N'th entry is still an offset. To determine the length or offset of any specific element, we might have to examine up to N preceding JEntrys, but that's still O(1) so far as the total container size is concerned. Testing shows that this cost is negligible compared to other costs of accessing a JSONB field, and that the method does largely fix the incompressible-data problem. While at it, rearrange the order of elements in a JSONB object so that it's "all the keys, then all the values" not alternating keys and values. This doesn't really make much difference right at the moment, but it will allow providing a fast path for extracting individual object fields from large JSONB values stored EXTERNAL (ie, uncompressed), analogously to the existing optimization for substring extraction from large EXTERNAL text values. Bump catversion to denote the incompatibility in on-disk format. We will need to fix pg_upgrade to disallow upgrading jsonb data stored with 9.4 betas 1 and 2. Heikki Linnakangas and Tom Lane
2014-09-29 18:29:21 +02:00
/* Initialize the header of this node in the container's JEntry array */
*pheader = JENTRY_ISCONTAINER | totallen;
}
static void
convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
{
int numlen;
short padlen;
switch (scalarVal->type)
{
case jbvNull:
*jentry = JENTRY_ISNULL;
break;
case jbvString:
appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
*jentry = scalarVal->val.string.len;
break;
case jbvNumeric:
Partial implementation of SQL/JSON path language SQL 2016 standards among other things contains set of SQL/JSON features for JSON processing inside of relational database. The core of SQL/JSON is JSON path language, allowing access parts of JSON documents and make computations over them. This commit implements partial support JSON path language as separate datatype called "jsonpath". The implementation is partial because it's lacking datetime support and suppression of numeric errors. Missing features will be added later by separate commits. Support of SQL/JSON features requires implementation of separate nodes, and it will be considered in subsequent patches. This commit includes following set of plain functions, allowing to execute jsonpath over jsonb values: * jsonb_path_exists(jsonb, jsonpath[, jsonb, bool]), * jsonb_path_match(jsonb, jsonpath[, jsonb, bool]), * jsonb_path_query(jsonb, jsonpath[, jsonb, bool]), * jsonb_path_query_array(jsonb, jsonpath[, jsonb, bool]). * jsonb_path_query_first(jsonb, jsonpath[, jsonb, bool]). This commit also implements "jsonb @? jsonpath" and "jsonb @@ jsonpath", which are wrappers over jsonpath_exists(jsonb, jsonpath) and jsonpath_predicate(jsonb, jsonpath) correspondingly. These operators will have an index support (implemented in subsequent patches). Catversion bumped, to add new functions and operators. Code was written by Nikita Glukhov and Teodor Sigaev, revised by me. Documentation was written by Oleg Bartunov and Liudmila Mantrova. The work was inspired by Oleg Bartunov. Discussion: https://postgr.es/m/fcc6fc6a-b497-f39a-923d-aa34d0c588e8%402ndQuadrant.com Author: Nikita Glukhov, Teodor Sigaev, Alexander Korotkov, Oleg Bartunov, Liudmila Mantrova Reviewed-by: Tomas Vondra, Andrew Dunstan, Pavel Stehule, Alexander Korotkov
2019-03-16 10:15:37 +01:00
/* replace numeric NaN with string "NaN" */
if (numeric_is_nan(scalarVal->val.numeric))
{
appendToBuffer(buffer, "NaN", 3);
*jentry = 3;
break;
}
numlen = VARSIZE_ANY(scalarVal->val.numeric);
padlen = padBufferToInt(buffer);
appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
*jentry = JENTRY_ISNUMERIC | (padlen + numlen);
break;
case jbvBool:
*jentry = (scalarVal->val.boolean) ?
JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
break;
case jbvDatetime:
{
char buf[MAXDATELEN + 1];
size_t len;
JsonEncodeDateTime(buf,
scalarVal->val.datetime.value,
scalarVal->val.datetime.typid,
&scalarVal->val.datetime.tz);
len = strlen(buf);
appendToBuffer(buffer, buf, len);
*jentry = len;
}
break;
default:
elog(ERROR, "invalid jsonb scalar type");
}
}
/*
* Compare two jbvString JsonbValue values, a and b.
*
* This is a special qsort() comparator used to sort strings in certain
* internal contexts where it is sufficient to have a well-defined sort order.
* In particular, object pair keys are sorted according to this criteria to
* facilitate cheap binary searches where we don't care about lexical sort
* order.
*
* a and b are first sorted based on their length. If a tie-breaker is
* required, only then do we consider string binary equality.
*/
static int
lengthCompareJsonbStringValue(const void *a, const void *b)
{
const JsonbValue *va = (const JsonbValue *) a;
const JsonbValue *vb = (const JsonbValue *) b;
Assert(va->type == jbvString);
Assert(vb->type == jbvString);
return lengthCompareJsonbString(va->val.string.val, va->val.string.len,
vb->val.string.val, vb->val.string.len);
}
/*
* Subroutine for lengthCompareJsonbStringValue
*
* This is also useful separately to implement binary search on
* JsonbContainers.
*/
static int
lengthCompareJsonbString(const char *val1, int len1, const char *val2, int len2)
{
if (len1 == len2)
return memcmp(val1, val2, len1);
else
return len1 > len2 ? 1 : -1;
}
/*
* qsort_arg() comparator to compare JsonbPair values.
*
* Third argument 'binequal' may point to a bool. If it's set, *binequal is set
* to true iff a and b have full binary equality, since some callers have an
* interest in whether the two values are equal or merely equivalent.
*
* N.B: String comparisons here are "length-wise"
*
* Pairs with equals keys are ordered such that the order field is respected.
*/
static int
lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
{
const JsonbPair *pa = (const JsonbPair *) a;
const JsonbPair *pb = (const JsonbPair *) b;
int res;
res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
if (res == 0 && binequal)
*((bool *) binequal) = true;
/*
* Guarantee keeping order of equal pair. Unique algorithm will prefer
* first element as value.
*/
if (res == 0)
res = (pa->order > pb->order) ? -1 : 1;
return res;
}
/*
* Sort and unique-ify pairs in JsonbValue object
*/
static void
uniqueifyJsonbObject(JsonbValue *object)
{
bool hasNonUniq = false;
Assert(object->type == jbvObject);
if (object->val.object.nPairs > 1)
qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
lengthCompareJsonbPair, &hasNonUniq);
if (hasNonUniq)
{
JsonbPair *ptr = object->val.object.pairs + 1,
*res = object->val.object.pairs;
while (ptr - object->val.object.pairs < object->val.object.nPairs)
{
/* Avoid copying over duplicate */
if (lengthCompareJsonbStringValue(ptr, res) != 0)
{
res++;
if (ptr != res)
memcpy(res, ptr, sizeof(JsonbPair));
}
ptr++;
}
object->val.object.nPairs = res + 1 - object->val.object.pairs;
}
}