/*------------------------------------------------------------------------- * * toast_compression.c * Functions for toast compression. * * Copyright (c) 2021-2024, PostgreSQL Global Development Group * * * IDENTIFICATION * src/backend/access/common/toast_compression.c * *------------------------------------------------------------------------- */ #include "postgres.h" #ifdef USE_LZ4 #include #endif #include "access/detoast.h" #include "access/toast_compression.h" #include "common/pg_lzcompress.h" #include "varatt.h" /* GUC */ int default_toast_compression = TOAST_PGLZ_COMPRESSION; #define NO_LZ4_SUPPORT() \ ereport(ERROR, \ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ errmsg("compression method lz4 not supported"), \ errdetail("This functionality requires the server to be built with lz4 support."))) /* * Compress a varlena using PGLZ. * * Returns the compressed varlena, or NULL if compression fails. */ struct varlena * pglz_compress_datum(const struct varlena *value) { int32 valsize, len; struct varlena *tmp = NULL; valsize = VARSIZE_ANY_EXHDR(value); /* * No point in wasting a palloc cycle if value size is outside the allowed * range for compression. */ if (valsize < PGLZ_strategy_default->min_input_size || valsize > PGLZ_strategy_default->max_input_size) return NULL; /* * Figure out the maximum possible size of the pglz output, add the bytes * that will be needed for varlena overhead, and allocate that amount. */ tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) + VARHDRSZ_COMPRESSED); len = pglz_compress(VARDATA_ANY(value), valsize, (char *) tmp + VARHDRSZ_COMPRESSED, NULL); if (len < 0) { pfree(tmp); return NULL; } SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED); return tmp; } /* * Decompress a varlena that was compressed using PGLZ. */ struct varlena * pglz_decompress_datum(const struct varlena *value) { struct varlena *result; int32 rawsize; /* allocate memory for the uncompressed data */ result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); /* decompress the data */ rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED, VARSIZE(value) - VARHDRSZ_COMPRESSED, VARDATA(result), VARDATA_COMPRESSED_GET_EXTSIZE(value), true); if (rawsize < 0) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("compressed pglz data is corrupt"))); SET_VARSIZE(result, rawsize + VARHDRSZ); return result; } /* * Decompress part of a varlena that was compressed using PGLZ. */ struct varlena * pglz_decompress_datum_slice(const struct varlena *value, int32 slicelength) { struct varlena *result; int32 rawsize; /* allocate memory for the uncompressed data */ result = (struct varlena *) palloc(slicelength + VARHDRSZ); /* decompress the data */ rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED, VARSIZE(value) - VARHDRSZ_COMPRESSED, VARDATA(result), slicelength, false); if (rawsize < 0) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("compressed pglz data is corrupt"))); SET_VARSIZE(result, rawsize + VARHDRSZ); return result; } /* * Compress a varlena using LZ4. * * Returns the compressed varlena, or NULL if compression fails. */ struct varlena * lz4_compress_datum(const struct varlena *value) { #ifndef USE_LZ4 NO_LZ4_SUPPORT(); return NULL; /* keep compiler quiet */ #else int32 valsize; int32 len; int32 max_size; struct varlena *tmp = NULL; valsize = VARSIZE_ANY_EXHDR(value); /* * Figure out the maximum possible size of the LZ4 output, add the bytes * that will be needed for varlena overhead, and allocate that amount. */ max_size = LZ4_compressBound(valsize); tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED); len = LZ4_compress_default(VARDATA_ANY(value), (char *) tmp + VARHDRSZ_COMPRESSED, valsize, max_size); if (len <= 0) elog(ERROR, "lz4 compression failed"); /* data is incompressible so just free the memory and return NULL */ if (len > valsize) { pfree(tmp); return NULL; } SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED); return tmp; #endif } /* * Decompress a varlena that was compressed using LZ4. */ struct varlena * lz4_decompress_datum(const struct varlena *value) { #ifndef USE_LZ4 NO_LZ4_SUPPORT(); return NULL; /* keep compiler quiet */ #else int32 rawsize; struct varlena *result; /* allocate memory for the uncompressed data */ result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); /* decompress the data */ rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED, VARDATA(result), VARSIZE(value) - VARHDRSZ_COMPRESSED, VARDATA_COMPRESSED_GET_EXTSIZE(value)); if (rawsize < 0) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("compressed lz4 data is corrupt"))); SET_VARSIZE(result, rawsize + VARHDRSZ); return result; #endif } /* * Decompress part of a varlena that was compressed using LZ4. */ struct varlena * lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) { #ifndef USE_LZ4 NO_LZ4_SUPPORT(); return NULL; /* keep compiler quiet */ #else int32 rawsize; struct varlena *result; /* slice decompression not supported prior to 1.8.3 */ if (LZ4_versionNumber() < 10803) return lz4_decompress_datum(value); /* allocate memory for the uncompressed data */ result = (struct varlena *) palloc(slicelength + VARHDRSZ); /* decompress the data */ rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESSED, VARDATA(result), VARSIZE(value) - VARHDRSZ_COMPRESSED, slicelength, slicelength); if (rawsize < 0) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("compressed lz4 data is corrupt"))); SET_VARSIZE(result, rawsize + VARHDRSZ); return result; #endif } /* * Extract compression ID from a varlena. * * Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed. */ ToastCompressionId toast_get_compression_id(struct varlena *attr) { ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; /* * If it is stored externally then fetch the compression method id from * the external toast pointer. If compressed inline, fetch it from the * toast compression header. */ if (VARATT_IS_EXTERNAL_ONDISK(attr)) { struct varatt_external toast_pointer; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer); } else if (VARATT_IS_COMPRESSED(attr)) cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); return cmid; } /* * CompressionNameToMethod - Get compression method from compression name * * Search in the available built-in methods. If the compression not found * in the built-in methods then return InvalidCompressionMethod. */ char CompressionNameToMethod(const char *compression) { if (strcmp(compression, "pglz") == 0) return TOAST_PGLZ_COMPRESSION; else if (strcmp(compression, "lz4") == 0) { #ifndef USE_LZ4 NO_LZ4_SUPPORT(); #endif return TOAST_LZ4_COMPRESSION; } return InvalidCompressionMethod; } /* * GetCompressionMethodName - Get compression method name */ const char * GetCompressionMethodName(char method) { switch (method) { case TOAST_PGLZ_COMPRESSION: return "pglz"; case TOAST_LZ4_COMPRESSION: return "lz4"; default: elog(ERROR, "invalid compression method %c", method); return NULL; /* keep compiler quiet */ } }