Optimize partial TOAST decompression

Commit 4d0e994eed added support for partial TOAST decompression, so the
decompression is interrupted after producing the requested prefix. For
prefix and slices near the beginning of the entry, this may saves a lot
of decompression work.

That however only deals with decompression - the whole compressed entry
was still fetched and re-assembled, even though the compression used
only a small fraction of it. This commit improves that by computing how
much compressed data may be needed to decompress the requested prefix,
and then fetches only the necessary part.

We always need to fetch a bit more compressed data than the requested
(uncompressed) prefix, because the prefix may not be compressible at all
and pglz itself adds a bit of overhead. That means this optimization is
most effective when the requested prefix is much smaller than the whole
compressed entry.

Author: Binguo Bao
Reviewed-by: Andrey Borodin, Tomas Vondra, Paul Ramsey
Discussion: https://www.postgresql.org/message-id/flat/CAL-OGkthU9Gs7TZchf5OWaL-Gsi=hXqufTxKv9qpNG73d5na_g@mail.gmail.com
This commit is contained in:
Tomas Vondra 2019-10-01 14:13:44 +02:00
parent 002962dc72
commit 11a078cf87
4 changed files with 86 additions and 8 deletions

View File

@ -196,6 +196,8 @@ heap_tuple_untoast_attr(struct varlena *attr)
*
* Public entry point to get back part of a toasted value
* from compression or external storage.
*
* Note: When slicelength is negative, return suffix of the value.
* ----------
*/
struct varlena *
@ -217,8 +219,30 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
/* fetch it back (compressed marker will get set automatically) */
preslice = toast_fetch_datum(attr);
/*
* For compressed values, we need to fetch enough slices to decompress
* at least the requested part (when a prefix is requested). Otherwise,
* just fetch all slices.
*/
if (slicelength > 0 && sliceoffset >= 0)
{
int32 max_size;
/*
* Determine maximum amount of compressed data needed for a prefix
* of a given length (after decompression).
*/
max_size = pglz_maximum_compressed_size(sliceoffset + slicelength,
TOAST_COMPRESS_SIZE(attr));
/*
* Fetch enough compressed slices (compressed marker will get set
* automatically).
*/
preslice = toast_fetch_datum_slice(attr, 0, max_size);
}
else
preslice = toast_fetch_datum(attr);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
@ -476,7 +500,9 @@ toast_fetch_datum(struct varlena *attr)
* Reconstruct a segment of a Datum from the chunks saved
* in the toast relation
*
* Note that this function only supports non-compressed external datums.
* Note that this function supports non-compressed external datums
* and compressed external datums (in which case the requrested slice
* has to be a prefix, i.e. sliceoffset has to be 0).
* ----------
*/
static struct varlena *
@ -517,10 +543,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
* It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
* we can't return a compressed datum which is meaningful to toast later
* It's nonsense to fetch slices of a compressed datum unless when it's
* a prefix -- this isn't lo_* we can't return a compressed datum which
* is meaningful to toast later.
*/
Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
attrsize = toast_pointer.va_extsize;
totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
@ -531,12 +558,23 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
length = 0;
}
/*
* When fetching a prefix of a compressed external datum, account for the
* rawsize tracking amount of raw data, which is stored at the beginning
* as an int32 value).
*/
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && length > 0)
length = length + sizeof(int32);
if (((sliceoffset + length) > attrsize) || length < 0)
length = attrsize - sliceoffset;
result = (struct varlena *) palloc(length + VARHDRSZ);
SET_VARSIZE(result, length + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
else
SET_VARSIZE(result, length + VARHDRSZ);
if (length == 0)
return result; /* Can save a lot of work at this point! */
@ -720,7 +758,7 @@ toast_decompress_datum(struct varlena *attr)
SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
TOAST_COMPRESS_SIZE(attr),
VARDATA(result),
TOAST_COMPRESS_RAWSIZE(attr), true) < 0)
elog(ERROR, "compressed data is corrupted");

View File

@ -771,3 +771,40 @@ pglz_decompress(const char *source, int32 slen, char *dest,
*/
return (char *) dp - dest;
}
/* ----------
* pglz_max_compressed_size -
*
* Calculate the maximum compressed size for a given amount of raw data.
* Return the maximum size, or total compressed size if maximum size is
* larger than total compressed size.
*
* We can't use PGLZ_MAX_OUTPUT for this purpose, because that's used to size
* the compression buffer (and abort the compression). It does not really say
* what's the maximum compressed size for an input of a given length, and it
* may happen that while the whole value is compressible (and thus fits into
* PGLZ_MAX_OUTPUT nicely), the prefix is not compressible at all.
* ----------
*/
int32
pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size)
{
int32 compressed_size;
/*
* pglz uses one control bit per byte, so we need (rawsize * 9) bits. We
* care about bytes though, so we add 7 to make sure we include the last
* incomplete byte (integer division rounds down).
*
* XXX Use int64 to prevent overflow during calculation.
*/
compressed_size = (int32) ((int64) rawsize * 9 + 7) / 8;
/*
* Maximum compressed size can't be larger than total compressed size.
*/
compressed_size = Min(compressed_size, total_compressed_size);
return compressed_size;
}

View File

@ -31,6 +31,7 @@ typedef struct toast_compress_header
*/
#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header))
#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
#define TOAST_COMPRESS_SIZE(ptr) ((int32) VARSIZE(ptr) - TOAST_COMPRESS_HDRSZ)
#define TOAST_COMPRESS_RAWDATA(ptr) \
(((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \

View File

@ -87,5 +87,7 @@ extern int32 pglz_compress(const char *source, int32 slen, char *dest,
const PGLZ_Strategy *strategy);
extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
int32 rawsize, bool check_complete);
extern int32 pglz_maximum_compressed_size(int32 rawsize,
int32 total_compressed_size);
#endif /* _PG_LZCOMPRESS_H_ */