diff --git a/configure b/configure index 3fd4cecbeb..8176e99756 100755 --- a/configure +++ b/configure @@ -699,6 +699,9 @@ with_gnu_ld LD LDFLAGS_SL LDFLAGS_EX +LZ4_LIBS +LZ4_CFLAGS +with_lz4 with_zlib with_system_tzdata with_libxslt @@ -864,6 +867,7 @@ with_libxml with_libxslt with_system_tzdata with_zlib +with_lz4 with_gnu_ld with_ssl with_openssl @@ -891,6 +895,8 @@ ICU_LIBS XML2_CONFIG XML2_CFLAGS XML2_LIBS +LZ4_CFLAGS +LZ4_LIBS LDFLAGS_EX LDFLAGS_SL PERL @@ -1569,6 +1575,7 @@ Optional Packages: --with-system-tzdata=DIR use system time zone data in DIR --without-zlib do not use Zlib + --with-lz4 build with LZ4 support --with-gnu-ld assume the C compiler uses GNU ld [default=no] --with-ssl=LIB use LIB for SSL/TLS support (openssl) --with-openssl obsolete spelling of --with-ssl=openssl @@ -1596,6 +1603,8 @@ Some influential environment variables: XML2_CONFIG path to xml2-config utility XML2_CFLAGS C compiler flags for XML2, overriding pkg-config XML2_LIBS linker flags for XML2, overriding pkg-config + LZ4_CFLAGS C compiler flags for LZ4, overriding pkg-config + LZ4_LIBS linker flags for LZ4, overriding pkg-config LDFLAGS_EX extra linker flags for linking executables only LDFLAGS_SL extra linker flags for linking shared libraries only PERL Perl program @@ -8563,6 +8572,137 @@ fi +# +# LZ4 +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with LZ4 support" >&5 +$as_echo_n "checking whether to build with LZ4 support... " >&6; } + + + +# Check whether --with-lz4 was given. +if test "${with_lz4+set}" = set; then : + withval=$with_lz4; + case $withval in + yes) + +$as_echo "#define USE_LZ4 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-lz4 option" "$LINENO" 5 + ;; + esac + +else + with_lz4=no + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_lz4" >&5 +$as_echo "$with_lz4" >&6; } + + +if test "$with_lz4" = yes; then + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for liblz4" >&5 +$as_echo_n "checking for liblz4... " >&6; } + +if test -n "$LZ4_CFLAGS"; then + pkg_cv_LZ4_CFLAGS="$LZ4_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"liblz4\""; } >&5 + ($PKG_CONFIG --exists --print-errors "liblz4") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_LZ4_CFLAGS=`$PKG_CONFIG --cflags "liblz4" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$LZ4_LIBS"; then + pkg_cv_LZ4_LIBS="$LZ4_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"liblz4\""; } >&5 + ($PKG_CONFIG --exists --print-errors "liblz4") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_LZ4_LIBS=`$PKG_CONFIG --libs "liblz4" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + LZ4_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "liblz4" 2>&1` + else + LZ4_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "liblz4" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$LZ4_PKG_ERRORS" >&5 + + as_fn_error $? "Package requirements (liblz4) were not met: + +$LZ4_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +Alternatively, you may set the environment variables LZ4_CFLAGS +and LZ4_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details." "$LINENO" 5 +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +Alternatively, you may set the environment variables LZ4_CFLAGS +and LZ4_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details. + +To get pkg-config, see . +See \`config.log' for more details" "$LINENO" 5; } +else + LZ4_CFLAGS=$pkg_cv_LZ4_CFLAGS + LZ4_LIBS=$pkg_cv_LZ4_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +fi + LIBS="$LZ4_LIBS $LIBS" + CFLAGS="$LZ4_CFLAGS $CFLAGS" +fi + # # Assignments # @@ -13379,6 +13519,36 @@ Use --without-zlib to disable zlib support." "$LINENO" 5 fi +fi + +if test "$with_lz4" = yes; then + for ac_header in lz4/lz4.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "lz4/lz4.h" "ac_cv_header_lz4_lz4_h" "$ac_includes_default" +if test "x$ac_cv_header_lz4_lz4_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LZ4_LZ4_H 1 +_ACEOF + +else + for ac_header in lz4.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "lz4.h" "ac_cv_header_lz4_h" "$ac_includes_default" +if test "x$ac_cv_header_lz4_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LZ4_H 1 +_ACEOF + +else + as_fn_error $? "lz4.h header file is required for LZ4" "$LINENO" 5 +fi + +done + +fi + +done + fi if test "$with_gssapi" = yes ; then diff --git a/configure.ac b/configure.ac index 2f1585adc0..54efbb22a3 100644 --- a/configure.ac +++ b/configure.ac @@ -986,6 +986,21 @@ PGAC_ARG_BOOL(with, zlib, yes, [do not use Zlib]) AC_SUBST(with_zlib) +# +# LZ4 +# +AC_MSG_CHECKING([whether to build with LZ4 support]) +PGAC_ARG_BOOL(with, lz4, no, [build with LZ4 support], + [AC_DEFINE([USE_LZ4], 1, [Define to 1 to build with LZ4 support. (--with-lz4)])]) +AC_MSG_RESULT([$with_lz4]) +AC_SUBST(with_lz4) + +if test "$with_lz4" = yes; then + PKG_CHECK_MODULES(LZ4, liblz4) + LIBS="$LZ4_LIBS $LIBS" + CFLAGS="$LZ4_CFLAGS $CFLAGS" +fi + # # Assignments # @@ -1410,6 +1425,11 @@ failure. It is possible the compiler isn't looking in the proper directory. Use --without-zlib to disable zlib support.])]) fi +if test "$with_lz4" = yes; then + AC_CHECK_HEADERS(lz4/lz4.h, [], + [AC_CHECK_HEADERS(lz4.h, [], [AC_MSG_ERROR([lz4.h header file is required for LZ4])])]) +fi + if test "$with_gssapi" = yes ; then AC_CHECK_HEADERS(gssapi/gssapi.h, [], [AC_CHECK_HEADERS(gssapi.h, [], [AC_MSG_ERROR([gssapi.h header file is required for GSSAPI])])]) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index e614c12a14..6f972e630a 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1069,7 +1069,7 @@ check_tuple_attribute(HeapCheckContext *ctx) */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); - ctx->attrsize = toast_pointer.va_extsize; + ctx->attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); ctx->endchunk = (ctx->attrsize - 1) / TOAST_MAX_CHUNK_SIZE; ctx->totalchunks = ctx->endchunk + 1; diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 5c9f4af1d5..68d1960698 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1355,6 +1355,18 @@ + + + attcompression char + + + The current compression method of the column. If it is an invalid + compression method ('\0') then column data will not + be compressed. Otherwise, 'p' = pglz compression or + 'l' = lz4 compression. + + + attacl aclitem[] diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9492a3c6b9..68fe6a95b4 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -25992,8 +25992,8 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); The functions shown in calculate the disk space usage of database objects, or assist in presentation - of usage results. - All these functions return sizes measured in bytes. If an OID that does + or understanding of usage results. bigint results + are measured in bytes. If an OID that does not represent an existing object is passed to one of these functions, NULL is returned. @@ -26028,6 +26028,20 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); + + + + pg_column_compression + + pg_column_compression ( "any" ) + integer + + + Shows the compression algorithm that was used to compress a + an individual variable-length value. + + + diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 3c091b8041..80a8efaa27 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -54,6 +54,7 @@ ALTER TABLE [ IF EXISTS ] name ALTER [ COLUMN ] column_name SET ( attribute_option = value [, ... ] ) ALTER [ COLUMN ] column_name RESET ( attribute_option [, ... ] ) ALTER [ COLUMN ] column_name SET STORAGE { PLAIN | EXTERNAL | EXTENDED | MAIN } + ALTER [ COLUMN ] column_name SET COMPRESSION compression_method ADD table_constraint [ NOT VALID ] ADD table_constraint_using_index ALTER CONSTRAINT constraint_name [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ] @@ -103,6 +104,7 @@ WITH ( MODULUS numeric_literal, REM GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) ] | UNIQUE index_parameters | PRIMARY KEY index_parameters | + COMPRESSION compression_method | REFERENCES reftable [ ( refcolumn ) ] [ MATCH FULL | MATCH PARTIAL | MATCH SIMPLE ] [ ON DELETE referential_action ] [ ON UPDATE referential_action ] } [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ] @@ -383,6 +385,20 @@ WITH ( MODULUS numeric_literal, REM + + + SET COMPRESSION compression_method + + + + This sets the compression method for a column. The supported compression + methods are pglz and lz4. + lz4 is available only if --with-lz4 + was used when building PostgreSQL. + + + + ADD table_constraint [ NOT VALID ] diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 1fe4fb6e36..c6c248f1e9 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -22,7 +22,7 @@ PostgreSQL documentation CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] table_name ( [ - { column_name data_type [ COLLATE collation ] [ column_constraint [ ... ] ] + { column_name data_type [ COLLATE collation ] [ COMPRESSION compression_method ] [ column_constraint [ ... ] ] | table_constraint | LIKE source_table [ like_option ... ] } [, ... ] @@ -288,6 +288,26 @@ WITH ( MODULUS numeric_literal, REM + + COMPRESSION compression_method + + + The COMPRESSION clause sets the compression method + for a column. Compression is supported only for variable-width data + types, and is used only for columns whose storage type is main or + extended. (See for information on + column storage types.) Setting this property for a partitioned table + has no direct effect, because such tables have no storage of their own, + but the configured value is inherited by newly-created partitions. + The supported compression methods are pglz and + lz4. lz4 is available only if + --with-lz4 was used when building + PostgreSQL. The default + is pglz. + + + + INHERITS ( parent_table [, ... ] ) @@ -605,6 +625,17 @@ WITH ( MODULUS numeric_literal, REM + + INCLUDING COMPRESSION + + + Compression method of the columns will be copied. The default + behavior is to exclude compression methods, resulting in columns + having the default compression method. + + + + INCLUDING CONSTRAINTS diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 13c1edfa4d..01ec9b8b0a 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -3863,6 +3863,17 @@ bar + + HIDE_TOAST_COMPRESSION + + + If this variable is set to true, column + compression method details are not displayed. This is mainly + useful for regression tests. + + + + HIDE_TABLEAM diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c index a7eb1c9473..0ab5712c71 100644 --- a/src/backend/access/brin/brin_tuple.c +++ b/src/backend/access/brin/brin_tuple.c @@ -213,7 +213,10 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, (atttype->typstorage == TYPSTORAGE_EXTENDED || atttype->typstorage == TYPSTORAGE_MAIN)) { - Datum cvalue = toast_compress_datum(value); + Form_pg_attribute att = TupleDescAttr(brdesc->bd_tupdesc, + keyno); + Datum cvalue = toast_compress_datum(value, + att->attcompression); if (DatumGetPointer(cvalue) != NULL) { diff --git a/src/backend/access/common/Makefile b/src/backend/access/common/Makefile index 5a007d63f1..b9aff0ccfd 100644 --- a/src/backend/access/common/Makefile +++ b/src/backend/access/common/Makefile @@ -25,6 +25,7 @@ OBJS = \ scankey.o \ session.o \ syncscan.o \ + toast_compression.o \ toast_internals.o \ tupconvert.o \ tupdesc.o diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index d1cdbaf648..2fef40c2e9 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -240,14 +240,20 @@ detoast_attr_slice(struct varlena *attr, */ if (slicelimit >= 0) { - int32 max_size; + int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); /* * Determine maximum amount of compressed data needed for a prefix * of a given length (after decompression). + * + * At least for now, if it's LZ4 data, we'll have to fetch the + * whole thing, because there doesn't seem to be an API call to + * determine how much compressed data we need to be sure of being + * able to decompress the required slice. */ - max_size = pglz_maximum_compressed_size(slicelimit, - toast_pointer.va_extsize); + if (VARATT_EXTERNAL_GET_COMPRESSION(toast_pointer) == + TOAST_PGLZ_COMPRESSION_ID) + max_size = pglz_maximum_compressed_size(slicelimit, max_size); /* * Fetch enough compressed slices (compressed marker will get set @@ -347,7 +353,7 @@ toast_fetch_datum(struct varlena *attr) /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); - attrsize = toast_pointer.va_extsize; + attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); result = (struct varlena *) palloc(attrsize + VARHDRSZ); @@ -408,7 +414,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, */ Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset); - attrsize = toast_pointer.va_extsize; + attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); if (sliceoffset >= attrsize) { @@ -418,8 +424,8 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, /* * When fetching a prefix of a compressed external datum, account for the - * rawsize tracking amount of raw data, which is stored at the beginning - * as an int32 value). + * space required by va_tcinfo, which is stored at the beginning as an + * int32 value. */ if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0) slicelength = slicelength + sizeof(int32); @@ -464,21 +470,24 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, static struct varlena * toast_decompress_datum(struct varlena *attr) { - struct varlena *result; + ToastCompressionId cmid; Assert(VARATT_IS_COMPRESSED(attr)); - result = (struct varlena *) - palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); - SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); - - if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr), - TOAST_COMPRESS_SIZE(attr), - VARDATA(result), - TOAST_COMPRESS_RAWSIZE(attr), true) < 0) - elog(ERROR, "compressed data is corrupted"); - - return result; + /* + * Fetch the compression method id stored in the compression header and + * decompress the data using the appropriate decompression routine. + */ + cmid = TOAST_COMPRESS_METHOD(attr); + switch (cmid) + { + case TOAST_PGLZ_COMPRESSION_ID: + return pglz_decompress_datum(attr); + case TOAST_LZ4_COMPRESSION_ID: + return lz4_decompress_datum(attr); + default: + elog(ERROR, "invalid compression method id %d", cmid); + } } @@ -492,22 +501,24 @@ toast_decompress_datum(struct varlena *attr) static struct varlena * toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) { - struct varlena *result; - int32 rawsize; + ToastCompressionId cmid; Assert(VARATT_IS_COMPRESSED(attr)); - result = (struct varlena *) palloc(slicelength + VARHDRSZ); - - rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr), - VARSIZE(attr) - TOAST_COMPRESS_HDRSZ, - VARDATA(result), - slicelength, false); - if (rawsize < 0) - elog(ERROR, "compressed data is corrupted"); - - SET_VARSIZE(result, rawsize + VARHDRSZ); - return result; + /* + * Fetch the compression method id stored in the compression header and + * decompress the data slice using the appropriate decompression routine. + */ + cmid = TOAST_COMPRESS_METHOD(attr); + switch (cmid) + { + case TOAST_PGLZ_COMPRESSION_ID: + return pglz_decompress_datum_slice(attr, slicelength); + case TOAST_LZ4_COMPRESSION_ID: + return lz4_decompress_datum_slice(attr, slicelength); + default: + elog(ERROR, "invalid compression method id %d", cmid); + } } /* ---------- @@ -589,7 +600,7 @@ toast_datum_size(Datum value) struct varatt_external toast_pointer; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); - result = toast_pointer.va_extsize; + result = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index b72a138497..1f6b7b77d4 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -103,7 +103,8 @@ index_form_tuple(TupleDesc tupleDescriptor, (att->attstorage == TYPSTORAGE_EXTENDED || att->attstorage == TYPSTORAGE_MAIN)) { - Datum cvalue = toast_compress_datum(untoasted_values[i]); + Datum cvalue = toast_compress_datum(untoasted_values[i], + att->attcompression); if (DatumGetPointer(cvalue) != NULL) { diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c new file mode 100644 index 0000000000..a6f8b79a9e --- /dev/null +++ b/src/backend/access/common/toast_compression.c @@ -0,0 +1,313 @@ +/*------------------------------------------------------------------------- + * + * toast_compression.c + * Functions for toast compression. + * + * Copyright (c) 2021, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/access/common/toast_compression.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#ifdef USE_LZ4 +#include +#endif + +#include "access/detoast.h" +#include "access/toast_compression.h" +#include "common/pg_lzcompress.h" +#include "fmgr.h" +#include "utils/builtins.h" + +/* Compile-time default */ +char *default_toast_compression = DEFAULT_TOAST_COMPRESSION; + +/* + * Compress a varlena using PGLZ. + * + * Returns the compressed varlena, or NULL if compression fails. + */ +struct varlena * +pglz_compress_datum(const struct varlena *value) +{ + int32 valsize, + len; + struct varlena *tmp = NULL; + + valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); + + /* + * No point in wasting a palloc cycle if value size is outside the allowed + * range for compression. + */ + if (valsize < PGLZ_strategy_default->min_input_size || + valsize > PGLZ_strategy_default->max_input_size) + return NULL; + + /* + * Figure out the maximum possible size of the pglz output, add the bytes + * that will be needed for varlena overhead, and allocate that amount. + */ + tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) + + VARHDRSZ_COMPRESS); + + len = pglz_compress(VARDATA_ANY(value), + valsize, + (char *) tmp + VARHDRSZ_COMPRESS, + NULL); + if (len < 0) + { + pfree(tmp); + return NULL; + } + + SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESS); + + return tmp; +} + +/* + * Decompress a varlena that was compressed using PGLZ. + */ +struct varlena * +pglz_decompress_datum(const struct varlena *value) +{ + struct varlena *result; + int32 rawsize; + + /* allocate memory for the uncompressed data */ + result = (struct varlena *) palloc(VARRAWSIZE_4B_C(value) + VARHDRSZ); + + /* decompress the data */ + rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESS, + VARSIZE(value) - VARHDRSZ_COMPRESS, + VARDATA(result), + VARRAWSIZE_4B_C(value), true); + if (rawsize < 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("compressed pglz data is corrupt"))); + + SET_VARSIZE(result, rawsize + VARHDRSZ); + + return result; +} + +/* + * Decompress part of a varlena that was compressed using PGLZ. + */ +struct varlena * +pglz_decompress_datum_slice(const struct varlena *value, + int32 slicelength) +{ + struct varlena *result; + int32 rawsize; + + /* allocate memory for the uncompressed data */ + result = (struct varlena *) palloc(slicelength + VARHDRSZ); + + /* decompress the data */ + rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESS, + VARSIZE(value) - VARHDRSZ_COMPRESS, + VARDATA(result), + slicelength, false); + if (rawsize < 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("compressed pglz data is corrupt"))); + + SET_VARSIZE(result, rawsize + VARHDRSZ); + + return result; +} + +/* + * Compress a varlena using LZ4. + * + * Returns the compressed varlena, or NULL if compression fails. + */ +struct varlena * +lz4_compress_datum(const struct varlena *value) +{ +#ifndef USE_LZ4 + NO_LZ4_SUPPORT(); +#else + int32 valsize; + int32 len; + int32 max_size; + struct varlena *tmp = NULL; + + valsize = VARSIZE_ANY_EXHDR(value); + + /* + * Figure out the maximum possible size of the LZ4 output, add the bytes + * that will be needed for varlena overhead, and allocate that amount. + */ + max_size = LZ4_compressBound(valsize); + tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESS); + + len = LZ4_compress_default(VARDATA_ANY(value), + (char *) tmp + VARHDRSZ_COMPRESS, + valsize, max_size); + if (len <= 0) + elog(ERROR, "lz4 compression failed"); + + /* data is incompressible so just free the memory and return NULL */ + if (len > valsize) + { + pfree(tmp); + return NULL; + } + + SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESS); + + return tmp; +#endif +} + +/* + * Decompress a varlena that was compressed using LZ4. + */ +struct varlena * +lz4_decompress_datum(const struct varlena *value) +{ +#ifndef USE_LZ4 + NO_LZ4_SUPPORT(); +#else + int32 rawsize; + struct varlena *result; + + /* allocate memory for the uncompressed data */ + result = (struct varlena *) palloc(VARRAWSIZE_4B_C(value) + VARHDRSZ); + + /* decompress the data */ + rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESS, + VARDATA(result), + VARSIZE(value) - VARHDRSZ_COMPRESS, + VARRAWSIZE_4B_C(value)); + if (rawsize < 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("compressed lz4 data is corrupt"))); + + + SET_VARSIZE(result, rawsize + VARHDRSZ); + + return result; +#endif +} + +/* + * Decompress part of a varlena that was compressed using LZ4. + */ +struct varlena * +lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) +{ +#ifndef USE_LZ4 + NO_LZ4_SUPPORT(); +#else + int32 rawsize; + struct varlena *result; + + /* slice decompression not supported prior to 1.8.3 */ + if (LZ4_versionNumber() < 10803) + return lz4_decompress_datum(value); + + /* allocate memory for the uncompressed data */ + result = (struct varlena *) palloc(slicelength + VARHDRSZ); + + /* decompress the data */ + rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESS, + VARDATA(result), + VARSIZE(value) - VARHDRSZ_COMPRESS, + slicelength, + slicelength); + if (rawsize < 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("compressed lz4 data is corrupt"))); + + SET_VARSIZE(result, rawsize + VARHDRSZ); + + return result; +#endif +} + +/* + * Extract compression ID from a varlena. + * + * Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed. + */ +ToastCompressionId +toast_get_compression_id(struct varlena *attr) +{ + ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; + + /* + * If it is stored externally then fetch the compression method id from the + * external toast pointer. If compressed inline, fetch it from the toast + * compression header. + */ + if (VARATT_IS_EXTERNAL_ONDISK(attr)) + { + struct varatt_external toast_pointer; + + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + cmid = VARATT_EXTERNAL_GET_COMPRESSION(toast_pointer); + } + else if (VARATT_IS_COMPRESSED(attr)) + cmid = VARCOMPRESS_4B_C(attr); + + return cmid; +} + +/* + * Validate a new value for the default_toast_compression GUC. + */ +bool +check_default_toast_compression(char **newval, void **extra, GucSource source) +{ + if (**newval == '\0') + { + GUC_check_errdetail("%s cannot be empty.", + "default_toast_compression"); + return false; + } + + if (strlen(*newval) >= NAMEDATALEN) + { + GUC_check_errdetail("%s is too long (maximum %d characters).", + "default_toast_compression", NAMEDATALEN - 1); + return false; + } + + if (!CompressionMethodIsValid(CompressionNameToMethod(*newval))) + { + /* + * When source == PGC_S_TEST, don't throw a hard error for a + * nonexistent compression method, only a NOTICE. See comments in + * guc.h. + */ + if (source == PGC_S_TEST) + { + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("compression method \"%s\" does not exist", + *newval))); + } + else + { + GUC_check_errdetail("Compression method \"%s\" does not exist.", + *newval); + return false; + } + } + + return true; +} diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 9b9da0f41b..c81ce17822 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -44,46 +44,54 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid); * ---------- */ Datum -toast_compress_datum(Datum value) +toast_compress_datum(Datum value, char cmethod) { - struct varlena *tmp; - int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); - int32 len; + struct varlena *tmp = NULL; + int32 valsize; + ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); + Assert(CompressionMethodIsValid(cmethod)); + + valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); + /* - * No point in wasting a palloc cycle if value size is out of the allowed - * range for compression + * Call appropriate compression routine for the compression method. */ - if (valsize < PGLZ_strategy_default->min_input_size || - valsize > PGLZ_strategy_default->max_input_size) + switch (cmethod) + { + case TOAST_PGLZ_COMPRESSION: + tmp = pglz_compress_datum((const struct varlena *) value); + cmid = TOAST_PGLZ_COMPRESSION_ID; + break; + case TOAST_LZ4_COMPRESSION: + tmp = lz4_compress_datum((const struct varlena *) value); + cmid = TOAST_LZ4_COMPRESSION_ID; + break; + default: + elog(ERROR, "invalid compression method %c", cmethod); + } + + if (tmp == NULL) return PointerGetDatum(NULL); - tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) + - TOAST_COMPRESS_HDRSZ); - /* - * We recheck the actual size even if pglz_compress() reports success, - * because it might be satisfied with having saved as little as one byte - * in the compressed data --- which could turn into a net loss once you - * consider header and alignment padding. Worst case, the compressed - * format might require three padding bytes (plus header, which is - * included in VARSIZE(tmp)), whereas the uncompressed format would take - * only one header byte and no padding if the value is short enough. So - * we insist on a savings of more than 2 bytes to ensure we have a gain. + * We recheck the actual size even if compression reports success, because + * it might be satisfied with having saved as little as one byte in the + * compressed data --- which could turn into a net loss once you consider + * header and alignment padding. Worst case, the compressed format might + * require three padding bytes (plus header, which is included in + * VARSIZE(tmp)), whereas the uncompressed format would take only one + * header byte and no padding if the value is short enough. So we insist + * on a savings of more than 2 bytes to ensure we have a gain. */ - len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)), - valsize, - TOAST_COMPRESS_RAWDATA(tmp), - PGLZ_strategy_default); - if (len >= 0 && - len + TOAST_COMPRESS_HDRSZ < valsize - 2) + if (VARSIZE(tmp) < valsize - 2) { - TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize); - SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ); /* successful compression */ + Assert(cmid != TOAST_INVALID_COMPRESSION_ID); + TOAST_COMPRESS_SET_SIZE_AND_METHOD(tmp, valsize, cmid); return PointerGetDatum(tmp); } else @@ -152,19 +160,21 @@ toast_save_datum(Relation rel, Datum value, &num_indexes); /* - * Get the data pointer and length, and compute va_rawsize and va_extsize. + * Get the data pointer and length, and compute va_rawsize and va_extinfo. * * va_rawsize is the size of the equivalent fully uncompressed datum, so * we have to adjust for short headers. * - * va_extsize is the actual size of the data payload in the toast records. + * va_extinfo stored the actual size of the data payload in the toast + * records and the compression method in first 2 bits if data is + * compressed. */ if (VARATT_IS_SHORT(dval)) { data_p = VARDATA_SHORT(dval); data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT; toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */ - toast_pointer.va_extsize = data_todo; + toast_pointer.va_extinfo = data_todo; } else if (VARATT_IS_COMPRESSED(dval)) { @@ -172,7 +182,10 @@ toast_save_datum(Relation rel, Datum value, data_todo = VARSIZE(dval) - VARHDRSZ; /* rawsize in a compressed datum is just the size of the payload */ toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ; - toast_pointer.va_extsize = data_todo; + + /* set external size and compression method */ + VARATT_EXTERNAL_SET_SIZE_AND_COMPRESSION(toast_pointer, data_todo, + VARCOMPRESS_4B_C(dval)); /* Assert that the numbers look like it's compressed */ Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); } @@ -181,7 +194,7 @@ toast_save_datum(Relation rel, Datum value, data_p = VARDATA(dval); data_todo = VARSIZE(dval) - VARHDRSZ; toast_pointer.va_rawsize = VARSIZE(dval); - toast_pointer.va_extsize = data_todo; + toast_pointer.va_extinfo = data_todo; } /* diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 902f59440c..cb76465050 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/toast_compression.h" #include "access/tupdesc_details.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" @@ -664,6 +665,11 @@ TupleDescInitEntry(TupleDesc desc, att->attstorage = typeForm->typstorage; att->attcollation = typeForm->typcollation; + if (IsStorageCompressible(typeForm->typstorage)) + att->attcompression = GetDefaultToastCompression(); + else + att->attcompression = InvalidCompressionMethod; + ReleaseSysCache(tuple); } diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index bd5faf0c1f..7b475f2950 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -19,6 +19,7 @@ */ #include "postgres.h" +#include "access/detoast.h" #include "access/genam.h" #include "access/heapam.h" #include "access/heaptoast.h" @@ -26,6 +27,7 @@ #include "access/rewriteheap.h" #include "access/syncscan.h" #include "access/tableam.h" +#include "access/toast_compression.h" #include "access/tsmapi.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -2469,6 +2471,44 @@ reform_and_rewrite_tuple(HeapTuple tuple, { if (TupleDescAttr(newTupDesc, i)->attisdropped) isnull[i] = true; + + /* + * Use this opportunity to force recompression of any data that's + * compressed with some TOAST compression method other than the one + * configured for the column. We don't actually need to perform the + * compression here; we just need to decompress. That will trigger + * recompression later on. + */ + else if (!isnull[i] && TupleDescAttr(newTupDesc, i)->attlen == -1) + { + struct varlena *new_value; + ToastCompressionId cmid; + char cmethod; + + new_value = (struct varlena *) DatumGetPointer(values[i]); + cmid = toast_get_compression_id(new_value); + + /* nothing to be done for uncompressed data */ + if (cmid == TOAST_INVALID_COMPRESSION_ID) + continue; + + /* convert compression id to compression method */ + switch (cmid) + { + case TOAST_PGLZ_COMPRESSION_ID: + cmethod = TOAST_PGLZ_COMPRESSION; + break; + case TOAST_LZ4_COMPRESSION_ID: + cmethod = TOAST_LZ4_COMPRESSION; + break; + default: + elog(ERROR, "invalid compression method id %d", cmid); + } + + /* if compression method doesn't match then detoast the value */ + if (TupleDescAttr(newTupDesc, i)->attcompression != cmethod) + values[i] = PointerGetDatum(detoast_attr(new_value)); + } } copiedTuple = heap_form_tuple(newTupDesc, values, isnull); diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c index fb36151ce5..53f78f9c3e 100644 --- a/src/backend/access/table/toast_helper.c +++ b/src/backend/access/table/toast_helper.c @@ -54,6 +54,7 @@ toast_tuple_init(ToastTupleContext *ttc) ttc->ttc_attr[i].tai_colflags = 0; ttc->ttc_attr[i].tai_oldexternal = NULL; + ttc->ttc_attr[i].tai_compression = att->attcompression; if (ttc->ttc_oldvalues != NULL) { @@ -226,9 +227,11 @@ void toast_tuple_try_compression(ToastTupleContext *ttc, int attribute) { Datum *value = &ttc->ttc_values[attribute]; - Datum new_value = toast_compress_datum(*value); + Datum new_value; ToastAttrInfo *attr = &ttc->ttc_attr[attribute]; + new_value = toast_compress_datum(*value, attr->tai_compression); + if (DatumGetPointer(new_value) != NULL) { /* successful compression */ diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 41da0c5059..99e5968ea4 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -21,6 +21,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/tableam.h" +#include "access/toast_compression.h" #include "access/xact.h" #include "access/xlog_internal.h" #include "bootstrap/bootstrap.h" @@ -733,6 +734,10 @@ DefineAttr(char *name, char *type, int attnum, int nullness) attrtypes[attnum]->attcacheoff = -1; attrtypes[attnum]->atttypmod = -1; attrtypes[attnum]->attislocal = true; + if (IsStorageCompressible(attrtypes[attnum]->attstorage)) + attrtypes[attnum]->attcompression = GetDefaultToastCompression(); + else + attrtypes[attnum]->attcompression = InvalidCompressionMethod; if (nullness == BOOTCOL_NULL_FORCE_NOT_NULL) { diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index b159958112..9586c29ad0 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -906,6 +906,9 @@ sub morph_row_for_pgattr $row->{attcollation} = $type->{typcollation} ne '0' ? $C_COLLATION_OID : 0; + $row->{attcompression} = + $type->{typstorage} ne 'p' && $type->{typstorage} ne 'e' ? 'p' : '\0'; + if (defined $attr->{forcenotnull}) { $row->{attnotnull} = 't'; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 9abc4a1f55..d0ec44bb40 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -36,6 +36,7 @@ #include "access/sysattr.h" #include "access/table.h" #include "access/tableam.h" +#include "access/toast_compression.h" #include "access/transam.h" #include "access/xact.h" #include "access/xlog.h" @@ -789,6 +790,7 @@ InsertPgAttributeTuples(Relation pg_attribute_rel, slot[slotCount]->tts_values[Anum_pg_attribute_attislocal - 1] = BoolGetDatum(attrs->attislocal); slot[slotCount]->tts_values[Anum_pg_attribute_attinhcount - 1] = Int32GetDatum(attrs->attinhcount); slot[slotCount]->tts_values[Anum_pg_attribute_attcollation - 1] = ObjectIdGetDatum(attrs->attcollation); + slot[slotCount]->tts_values[Anum_pg_attribute_attcompression - 1] = CharGetDatum(attrs->attcompression); if (attoptions && attoptions[natts] != (Datum) 0) slot[slotCount]->tts_values[Anum_pg_attribute_attoptions - 1] = attoptions[natts]; else @@ -1715,6 +1717,8 @@ RemoveAttributeById(Oid relid, AttrNumber attnum) /* Unset this so no one tries to look up the generation expression */ attStruct->attgenerated = '\0'; + attStruct->attcompression = InvalidCompressionMethod; + /* * Change the column name to something that isn't likely to conflict */ diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 4ef61b5efd..397d70d226 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -348,6 +348,7 @@ ConstructTupleDescriptor(Relation heapRelation, to->attbyval = from->attbyval; to->attstorage = from->attstorage; to->attalign = from->attalign; + to->attcompression = from->attcompression; } else { diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index d7b806020d..933a0734d1 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/heapam.h" +#include "access/toast_compression.h" #include "access/xact.h" #include "catalog/binary_upgrade.h" #include "catalog/catalog.h" @@ -220,6 +221,11 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, TupleDescAttr(tupdesc, 1)->attstorage = TYPSTORAGE_PLAIN; TupleDescAttr(tupdesc, 2)->attstorage = TYPSTORAGE_PLAIN; + /* Toast field should not be compressed */ + TupleDescAttr(tupdesc, 0)->attcompression = InvalidCompressionMethod; + TupleDescAttr(tupdesc, 1)->attcompression = InvalidCompressionMethod; + TupleDescAttr(tupdesc, 2)->attcompression = InvalidCompressionMethod; + /* * Toast tables for regular relations go in pg_toast; those for temp * relations go into the per-backend temp-toast-table namespace. diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index ffb1308a0c..ab89935ba7 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -23,6 +23,7 @@ #include "access/relscan.h" #include "access/sysattr.h" #include "access/tableam.h" +#include "access/toast_compression.h" #include "access/xact.h" #include "access/xlog.h" #include "catalog/catalog.h" @@ -527,6 +528,8 @@ static void ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKM static void ATExecGenericOptions(Relation rel, List *options); static void ATExecSetRowSecurity(Relation rel, bool rls); static void ATExecForceNoForceRowSecurity(Relation rel, bool force_rls); +static ObjectAddress ATExecSetCompression(AlteredTableInfo *tab, Relation rel, + const char *column, Node *newValue, LOCKMODE lockmode); static void index_copy_data(Relation rel, RelFileNode newrnode); static const char *storage_name(char c); @@ -558,6 +561,7 @@ static void refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, static List *GetParentedForeignKeyRefs(Relation partition); static void ATDetachCheckNoForeignKeyRefs(Relation partition); static void ATExecAlterCollationRefreshVersion(Relation rel, List *coll); +static char GetAttributeCompression(Form_pg_attribute att, char *compression); /* ---------------------------------------------------------------- @@ -852,6 +856,18 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, if (colDef->generated) attr->attgenerated = colDef->generated; + + /* + * lookup attribute's compression method and store it in the + * attr->attcompression. + */ + if (relkind == RELKIND_RELATION || + relkind == RELKIND_PARTITIONED_TABLE || + relkind == RELKIND_MATVIEW) + attr->attcompression = + GetAttributeCompression(attr, colDef->compression); + else + attr->attcompression = InvalidCompressionMethod; } /* @@ -2396,6 +2412,22 @@ MergeAttributes(List *schema, List *supers, char relpersistence, storage_name(def->storage), storage_name(attribute->attstorage)))); + /* Copy/check compression parameter */ + if (CompressionMethodIsValid(attribute->attcompression)) + { + const char *compression = + GetCompressionMethodName(attribute->attcompression); + + if (def->compression == NULL) + def->compression = pstrdup(compression); + else if (strcmp(def->compression, compression) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" has a compression method conflict", + attributeName), + errdetail("%s versus %s", def->compression, compression))); + } + def->inhcount++; /* Merge of NOT NULL constraints = OR 'em together */ def->is_not_null |= attribute->attnotnull; @@ -2430,6 +2462,11 @@ MergeAttributes(List *schema, List *supers, char relpersistence, def->collOid = attribute->attcollation; def->constraints = NIL; def->location = -1; + if (CompressionMethodIsValid(attribute->attcompression)) + def->compression = pstrdup(GetCompressionMethodName( + attribute->attcompression)); + else + def->compression = NULL; inhSchema = lappend(inhSchema, def); newattmap->attnums[parent_attno - 1] = ++child_attno; } @@ -2675,6 +2712,19 @@ MergeAttributes(List *schema, List *supers, char relpersistence, storage_name(def->storage), storage_name(newdef->storage)))); + /* Copy compression parameter */ + if (def->compression == NULL) + def->compression = newdef->compression; + else if (newdef->compression != NULL) + { + if (strcmp(def->compression, newdef->compression) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" has a compression method conflict", + attributeName), + errdetail("%s versus %s", def->compression, newdef->compression))); + } + /* Mark the column as locally defined */ def->is_local = true; /* Merge of NOT NULL constraints = OR 'em together */ @@ -3961,6 +4011,7 @@ AlterTableGetLockLevel(List *cmds) case AT_DropIdentity: case AT_SetIdentity: case AT_DropExpression: + case AT_SetCompression: cmd_lockmode = AccessExclusiveLock; break; @@ -4283,6 +4334,12 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, /* No command-specific prep needed */ pass = AT_PASS_MISC; break; + case AT_SetCompression: /* ALTER COLUMN SET COMPRESSION */ + ATSimplePermissions(rel, ATT_TABLE | ATT_MATVIEW); + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; case AT_DropColumn: /* DROP COLUMN */ ATSimplePermissions(rel, ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE); @@ -4626,6 +4683,10 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, case AT_SetStorage: /* ALTER COLUMN SET STORAGE */ address = ATExecSetStorage(rel, cmd->name, cmd->def, lockmode); break; + case AT_SetCompression: + address = ATExecSetCompression(tab, rel, cmd->name, cmd->def, + lockmode); + break; case AT_DropColumn: /* DROP COLUMN */ address = ATExecDropColumn(wqueue, rel, cmd->name, cmd->behavior, false, false, @@ -6340,6 +6401,18 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, attribute.attislocal = colDef->is_local; attribute.attinhcount = colDef->inhcount; attribute.attcollation = collOid; + + /* + * lookup attribute's compression method and store it in the + * attr->attcompression. + */ + if (rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + attribute.attcompression = GetAttributeCompression(&attribute, + colDef->compression); + else + attribute.attcompression = InvalidCompressionMethod; + /* attribute.attacl is handled by InsertPgAttributeTuples() */ ReleaseSysCache(typeTuple); @@ -7712,6 +7785,68 @@ ATExecSetOptions(Relation rel, const char *colName, Node *options, return address; } +/* + * Helper function for ATExecSetStorage and ATExecSetCompression + * + * Set the attcompression and/or attstorage for the respective index attribute + * if the respective input values are valid. + */ +static void +SetIndexStorageProperties(Relation rel, Relation attrelation, + AttrNumber attnum, char newcompression, + char newstorage, LOCKMODE lockmode) +{ + HeapTuple tuple; + ListCell *lc; + Form_pg_attribute attrtuple; + + foreach(lc, RelationGetIndexList(rel)) + { + Oid indexoid = lfirst_oid(lc); + Relation indrel; + AttrNumber indattnum = 0; + + indrel = index_open(indexoid, lockmode); + + for (int i = 0; i < indrel->rd_index->indnatts; i++) + { + if (indrel->rd_index->indkey.values[i] == attnum) + { + indattnum = i + 1; + break; + } + } + + if (indattnum == 0) + { + index_close(indrel, lockmode); + continue; + } + + tuple = SearchSysCacheCopyAttNum(RelationGetRelid(indrel), indattnum); + + if (HeapTupleIsValid(tuple)) + { + attrtuple = (Form_pg_attribute) GETSTRUCT(tuple); + + if (CompressionMethodIsValid(newcompression)) + attrtuple->attcompression = newcompression; + + if (newstorage != '\0') + attrtuple->attstorage = newstorage; + + CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attrtuple->attnum); + + heap_freetuple(tuple); + } + + index_close(indrel, lockmode); + } +} /* * ALTER TABLE ALTER COLUMN SET STORAGE * @@ -7727,7 +7862,6 @@ ATExecSetStorage(Relation rel, const char *colName, Node *newValue, LOCKMODE loc Form_pg_attribute attrtuple; AttrNumber attnum; ObjectAddress address; - ListCell *lc; Assert(IsA(newValue, String)); storagemode = strVal(newValue); @@ -7791,47 +7925,9 @@ ATExecSetStorage(Relation rel, const char *colName, Node *newValue, LOCKMODE loc * Apply the change to indexes as well (only for simple index columns, * matching behavior of index.c ConstructTupleDescriptor()). */ - foreach(lc, RelationGetIndexList(rel)) - { - Oid indexoid = lfirst_oid(lc); - Relation indrel; - AttrNumber indattnum = 0; - - indrel = index_open(indexoid, lockmode); - - for (int i = 0; i < indrel->rd_index->indnatts; i++) - { - if (indrel->rd_index->indkey.values[i] == attnum) - { - indattnum = i + 1; - break; - } - } - - if (indattnum == 0) - { - index_close(indrel, lockmode); - continue; - } - - tuple = SearchSysCacheCopyAttNum(RelationGetRelid(indrel), indattnum); - - if (HeapTupleIsValid(tuple)) - { - attrtuple = (Form_pg_attribute) GETSTRUCT(tuple); - attrtuple->attstorage = newstorage; - - CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); - - InvokeObjectPostAlterHook(RelationRelationId, - RelationGetRelid(rel), - attrtuple->attnum); - - heap_freetuple(tuple); - } - - index_close(indrel, lockmode); - } + SetIndexStorageProperties(rel, attrelation, attnum, + InvalidCompressionMethod, + newstorage, lockmode); table_close(attrelation, RowExclusiveLock); @@ -11859,6 +11955,23 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel, ReleaseSysCache(typeTuple); + /* Setup attribute compression */ + if (rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * No compression for plain/external storage, otherwise, default + * compression method if it is not already set, refer comments atop + * attcompression parameter in pg_attribute.h. + */ + if (!IsStorageCompressible(tform->typstorage)) + attTup->attcompression = InvalidCompressionMethod; + else if (!CompressionMethodIsValid(attTup->attcompression)) + attTup->attcompression = GetDefaultToastCompression(); + } + else + attTup->attcompression = InvalidCompressionMethod; + CatalogTupleUpdate(attrelation, &heapTup->t_self, heapTup); table_close(attrelation, RowExclusiveLock); @@ -14939,6 +15052,89 @@ ATExecGenericOptions(Relation rel, List *options) heap_freetuple(tuple); } +/* + * ALTER TABLE ALTER COLUMN SET COMPRESSION + * + * Return value is the address of the modified column + */ +static ObjectAddress +ATExecSetCompression(AlteredTableInfo *tab, + Relation rel, + const char *column, + Node *newValue, + LOCKMODE lockmode) +{ + Relation attrel; + HeapTuple tuple; + Form_pg_attribute atttableform; + AttrNumber attnum; + char *compression; + char typstorage; + Oid cmoid; + Datum values[Natts_pg_attribute]; + bool nulls[Natts_pg_attribute]; + bool replace[Natts_pg_attribute]; + ObjectAddress address; + + Assert(IsA(newValue, String)); + compression = strVal(newValue); + + attrel = table_open(AttributeRelationId, RowExclusiveLock); + + tuple = SearchSysCacheAttName(RelationGetRelid(rel), column); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + column, RelationGetRelationName(rel)))); + + /* prevent them from altering a system attribute */ + atttableform = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = atttableform->attnum; + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", column))); + + typstorage = get_typstorage(atttableform->atttypid); + + /* prevent from setting compression methods for uncompressible type */ + if (!IsStorageCompressible(typstorage)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("column data type %s does not support compression", + format_type_be(atttableform->atttypid)))); + + /* initialize buffers for new tuple values */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replace, false, sizeof(replace)); + + /* get the attribute compression method. */ + cmoid = GetAttributeCompression(atttableform, compression); + + atttableform->attcompression = cmoid; + CatalogTupleUpdate(attrel, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + atttableform->attnum); + + ReleaseSysCache(tuple); + + /* apply changes to the index column as well */ + SetIndexStorageProperties(rel, attrel, attnum, cmoid, '\0', lockmode); + table_close(attrel, RowExclusiveLock); + + /* make changes visible */ + CommandCounterIncrement(); + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), atttableform->attnum); + return address; +} + + /* * Preparation phase for SET LOGGED/UNLOGGED * @@ -17641,3 +17837,36 @@ ATExecAlterCollationRefreshVersion(Relation rel, List *coll) index_update_collation_versions(rel->rd_id, get_collation_oid(coll, false)); CacheInvalidateRelcache(rel); } + +/* + * resolve column compression specification to compression method. + */ +static char +GetAttributeCompression(Form_pg_attribute att, char *compression) +{ + char typstorage = get_typstorage(att->atttypid); + char cmethod; + + /* + * No compression for plain/external storage, refer comments atop + * attcompression parameter in pg_attribute.h + */ + if (!IsStorageCompressible(typstorage)) + { + if (compression == NULL) + return InvalidCompressionMethod; + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("column data type %s does not support compression", + format_type_be(att->atttypid)))); + } + + /* fallback to default compression if it's not specified */ + if (compression == NULL) + cmethod = GetDefaultToastCompression(); + else + cmethod = CompressionNameToMethod(compression); + + return cmethod; +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index bda379ba91..2c20541e92 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2988,6 +2988,7 @@ _copyColumnDef(const ColumnDef *from) COPY_STRING_FIELD(colname); COPY_NODE_FIELD(typeName); + COPY_STRING_FIELD(compression); COPY_SCALAR_FIELD(inhcount); COPY_SCALAR_FIELD(is_local); COPY_SCALAR_FIELD(is_not_null); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index bc5e9e52fe..3e980c457c 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2601,6 +2601,7 @@ _equalColumnDef(const ColumnDef *a, const ColumnDef *b) { COMPARE_STRING_FIELD(colname); COMPARE_NODE_FIELD(typeName); + COMPARE_STRING_FIELD(compression); COMPARE_SCALAR_FIELD(inhcount); COMPARE_SCALAR_FIELD(is_local); COMPARE_SCALAR_FIELD(is_not_null); diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 49357ac5c2..38226530c6 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -3897,6 +3897,8 @@ raw_expression_tree_walker(Node *node, if (walker(coldef->typeName, context)) return true; + if (walker(coldef->compression, context)) + return true; if (walker(coldef->raw_default, context)) return true; if (walker(coldef->collClause, context)) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 5054490c58..305311d4a7 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2877,6 +2877,7 @@ _outColumnDef(StringInfo str, const ColumnDef *node) WRITE_STRING_FIELD(colname); WRITE_NODE_FIELD(typeName); + WRITE_STRING_FIELD(compression); WRITE_INT_FIELD(inhcount); WRITE_BOOL_FIELD(is_local); WRITE_BOOL_FIELD(is_not_null); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index fd07e7107d..bc43641ffe 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -606,6 +606,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type hash_partbound %type hash_partbound_elem +%type optColumnCompression + /* * Non-keyword token types. These are hard-wired into the "flex" lexer. * They must be listed first so that their numeric codes do not depend on @@ -641,9 +643,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT - COMMITTED CONCURRENTLY CONFIGURATION CONFLICT CONNECTION CONSTRAINT - CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE - CROSS CSV CUBE CURRENT_P + COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT + CONNECTION CONSTRAINT CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY + COST CREATE CROSS CSV CUBE CURRENT_P CURRENT_CATALOG CURRENT_DATE CURRENT_ROLE CURRENT_SCHEMA CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE @@ -2316,6 +2318,15 @@ alter_table_cmd: n->missing_ok = true; $$ = (Node *)n; } + /* ALTER TABLE ALTER [COLUMN] SET (COMPRESSION ) */ + | ALTER opt_column ColId SET optColumnCompression + { + AlterTableCmd *n = makeNode(AlterTableCmd); + n->subtype = AT_SetCompression; + n->name = $3; + n->def = (Node *) makeString($5); + $$ = (Node *)n; + } /* ALTER TABLE DROP [COLUMN] IF EXISTS [RESTRICT|CASCADE] */ | DROP opt_column IF_P EXISTS ColId opt_drop_behavior { @@ -3431,11 +3442,12 @@ TypedTableElement: | TableConstraint { $$ = $1; } ; -columnDef: ColId Typename create_generic_options ColQualList +columnDef: ColId Typename optColumnCompression create_generic_options ColQualList { ColumnDef *n = makeNode(ColumnDef); n->colname = $1; n->typeName = $2; + n->compression = $3; n->inhcount = 0; n->is_local = true; n->is_not_null = false; @@ -3444,8 +3456,8 @@ columnDef: ColId Typename create_generic_options ColQualList n->raw_default = NULL; n->cooked_default = NULL; n->collOid = InvalidOid; - n->fdwoptions = $3; - SplitColQualList($4, &n->constraints, &n->collClause, + n->fdwoptions = $4; + SplitColQualList($5, &n->constraints, &n->collClause, yyscanner); n->location = @1; $$ = (Node *)n; @@ -3490,6 +3502,14 @@ columnOptions: ColId ColQualList } ; +optColumnCompression: + COMPRESSION name + { + $$ = $2; + } + | /*EMPTY*/ { $$ = NULL; } + ; + ColQualList: ColQualList ColConstraint { $$ = lappend($1, $2); } | /*EMPTY*/ { $$ = NIL; } @@ -3720,6 +3740,7 @@ TableLikeOption: | INDEXES { $$ = CREATE_TABLE_LIKE_INDEXES; } | STATISTICS { $$ = CREATE_TABLE_LIKE_STATISTICS; } | STORAGE { $$ = CREATE_TABLE_LIKE_STORAGE; } + | COMPRESSION { $$ = CREATE_TABLE_LIKE_COMPRESSION; } | ALL { $$ = CREATE_TABLE_LIKE_ALL; } ; @@ -15321,6 +15342,7 @@ unreserved_keyword: | COMMENTS | COMMIT | COMMITTED + | COMPRESSION | CONFIGURATION | CONFLICT | CONNECTION @@ -15841,6 +15863,7 @@ bare_label_keyword: | COMMENTS | COMMIT | COMMITTED + | COMPRESSION | CONCURRENTLY | CONFIGURATION | CONFLICT diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index d56f81c79f..aa6c19adad 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -31,6 +31,7 @@ #include "access/relation.h" #include "access/reloptions.h" #include "access/table.h" +#include "access/toast_compression.h" #include "catalog/dependency.h" #include "catalog/heap.h" #include "catalog/index.h" @@ -1092,6 +1093,14 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla else def->storage = 0; + /* Likewise, copy compression if requested */ + if ((table_like_clause->options & CREATE_TABLE_LIKE_COMPRESSION) != 0 + && CompressionMethodIsValid(attribute->attcompression)) + def->compression = + pstrdup(GetCompressionMethodName(attribute->attcompression)); + else + def->compression = NULL; + /* Likewise, copy comment if requested */ if ((table_like_clause->options & CREATE_TABLE_LIKE_COMMENTS) && (comment = GetComment(attribute->attrelid, diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 91600ac566..c291b05a42 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -4641,7 +4641,7 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn, VARSIZE(chunk) - VARHDRSZ); data_done += VARSIZE(chunk) - VARHDRSZ; } - Assert(data_done == toast_pointer.va_extsize); + Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer)); /* make sure its marked as compressed or not */ if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 479ed9ae54..0bc345aa4d 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -18,6 +18,7 @@ #include #include "access/detoast.h" +#include "access/toast_compression.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "common/hashfn.h" @@ -5299,6 +5300,59 @@ pg_column_size(PG_FUNCTION_ARGS) PG_RETURN_INT32(result); } +/* + * Return the compression method stored in the compressed attribute. Return + * NULL for non varlena type or uncompressed data. + */ +Datum +pg_column_compression(PG_FUNCTION_ARGS) +{ + int typlen; + char *result; + ToastCompressionId cmid; + + /* On first call, get the input type's typlen, and save at *fn_extra */ + if (fcinfo->flinfo->fn_extra == NULL) + { + /* Lookup the datatype of the supplied argument */ + Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0); + + typlen = get_typlen(argtypeid); + if (typlen == 0) /* should not happen */ + elog(ERROR, "cache lookup failed for type %u", argtypeid); + + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(int)); + *((int *) fcinfo->flinfo->fn_extra) = typlen; + } + else + typlen = *((int *) fcinfo->flinfo->fn_extra); + + if (typlen != -1) + PG_RETURN_NULL(); + + /* get the compression method id stored in the compressed varlena */ + cmid = toast_get_compression_id((struct varlena *) + DatumGetPointer(PG_GETARG_DATUM(0))); + if (cmid == TOAST_INVALID_COMPRESSION_ID) + PG_RETURN_NULL(); + + /* convert compression method id to compression method name */ + switch (cmid) + { + case TOAST_PGLZ_COMPRESSION_ID: + result = "pglz"; + break; + case TOAST_LZ4_COMPRESSION_ID: + result = "lz4"; + break; + default: + elog(ERROR, "invalid compression method id %d", cmid); + } + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + /* * string_agg - Concatenates values and returns string. * diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 997b4b70ee..f720b093fe 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -33,6 +33,7 @@ #include "access/gin.h" #include "access/rmgr.h" #include "access/tableam.h" +#include "access/toast_compression.h" #include "access/transam.h" #include "access/twophase.h" #include "access/xact.h" @@ -3934,6 +3935,17 @@ static struct config_string ConfigureNamesString[] = check_default_table_access_method, NULL, NULL }, + { + {"default_toast_compression", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the default compression for new columns."), + NULL, + GUC_IS_NAME + }, + &default_toast_compression, + DEFAULT_TOAST_COMPRESSION, + check_default_toast_compression, NULL, NULL + }, + { {"default_tablespace", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the default tablespace to create tables and indexes in."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 3ff507d5f6..b0b49b3823 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -660,6 +660,7 @@ #temp_tablespaces = '' # a list of tablespace names, '' uses # only default tablespace #default_table_access_method = 'heap' +#default_toast_compression = 'pglz' # 'pglz' or 'lz4' #check_function_bodies = on #default_transaction_isolation = 'read committed' #default_transaction_read_only = off diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl index 16574cb1f8..36607596b1 100644 --- a/src/bin/pg_amcheck/t/004_verify_heapam.pl +++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl @@ -124,7 +124,7 @@ sub read_tuple c_va_header => shift, c_va_vartag => shift, c_va_rawsize => shift, - c_va_extsize => shift, + c_va_extinfo => shift, c_va_valueid => shift, c_va_toastrelid => shift); # Stitch together the text for column 'b' @@ -169,7 +169,7 @@ sub write_tuple $tup->{c_va_header}, $tup->{c_va_vartag}, $tup->{c_va_rawsize}, - $tup->{c_va_extsize}, + $tup->{c_va_extinfo}, $tup->{c_va_valueid}, $tup->{c_va_toastrelid}); seek($fh, $offset, 0) diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index eea9f30a79..0296b9bb5e 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -160,6 +160,7 @@ typedef struct _dumpOptions int no_subscriptions; int no_synchronized_snapshots; int no_unlogged_table_data; + int no_toast_compression; int serializable_deferrable; int disable_triggers; int outputNoTablespaces; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index eb988d7eb4..f8bec3ffcc 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -387,6 +387,7 @@ main(int argc, char **argv) {"no-synchronized-snapshots", no_argument, &dopt.no_synchronized_snapshots, 1}, {"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1}, {"no-subscriptions", no_argument, &dopt.no_subscriptions, 1}, + {"no-toast-compression", no_argument, &dopt.no_toast_compression, 1}, {"no-sync", no_argument, NULL, 7}, {"on-conflict-do-nothing", no_argument, &dopt.do_nothing, 1}, {"rows-per-insert", required_argument, NULL, 10}, @@ -1047,6 +1048,7 @@ help(const char *progname) printf(_(" --no-publications do not dump publications\n")); printf(_(" --no-security-labels do not dump security label assignments\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); + printf(_(" --no-toast-compression do not dump toast compression methods\n")); printf(_(" --no-synchronized-snapshots do not use synchronized snapshots in parallel jobs\n")); printf(_(" --no-tablespaces do not dump tablespace assignments\n")); printf(_(" --no-unlogged-table-data do not dump unlogged table data\n")); @@ -8617,6 +8619,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) { DumpOptions *dopt = fout->dopt; PQExpBuffer q = createPQExpBuffer(); + bool createWithCompression; for (int i = 0; i < numTables; i++) { @@ -8702,6 +8705,15 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) appendPQExpBufferStr(q, "'' AS attidentity,\n"); + createWithCompression = (fout->remoteVersion >= 140000); + + if (createWithCompression) + appendPQExpBuffer(q, + "a.attcompression AS attcompression,\n"); + else + appendPQExpBuffer(q, + "NULL AS attcompression,\n"); + if (fout->remoteVersion >= 110000) appendPQExpBufferStr(q, "CASE WHEN a.atthasmissing AND NOT a.attisdropped " @@ -8747,6 +8759,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) tbinfo->attcollation = (Oid *) pg_malloc(ntups * sizeof(Oid)); tbinfo->attfdwoptions = (char **) pg_malloc(ntups * sizeof(char *)); tbinfo->attmissingval = (char **) pg_malloc(ntups * sizeof(char *)); + tbinfo->attcompression = (char *) pg_malloc(ntups * sizeof(char *)); tbinfo->notnull = (bool *) pg_malloc(ntups * sizeof(bool)); tbinfo->inhNotNull = (bool *) pg_malloc(ntups * sizeof(bool)); tbinfo->attrdefs = (AttrDefInfo **) pg_malloc(ntups * sizeof(AttrDefInfo *)); @@ -8775,6 +8788,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) tbinfo->attcollation[j] = atooid(PQgetvalue(res, j, PQfnumber(res, "attcollation"))); tbinfo->attfdwoptions[j] = pg_strdup(PQgetvalue(res, j, PQfnumber(res, "attfdwoptions"))); tbinfo->attmissingval[j] = pg_strdup(PQgetvalue(res, j, PQfnumber(res, "attmissingval"))); + tbinfo->attcompression[j] = *(PQgetvalue(res, j, PQfnumber(res, "attcompression"))); tbinfo->attrdefs[j] = NULL; /* fix below */ if (PQgetvalue(res, j, PQfnumber(res, "atthasdef"))[0] == 't') hasdefaults = true; @@ -15891,6 +15905,31 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) tbinfo->atttypnames[j]); } + /* + * Attribute compression + */ + if (!dopt->no_toast_compression && + tbinfo->attcompression != NULL) + { + char *cmname; + + switch (tbinfo->attcompression[j]) + { + case 'p': + cmname = "pglz"; + break; + case 'l': + cmname = "lz4"; + break; + default: + cmname = NULL; + break; + } + + if (cmname != NULL) + appendPQExpBuffer(q, " COMPRESSION %s", cmname); + } + if (print_default) { if (tbinfo->attgenerated[j] == ATTRIBUTE_GENERATED_STORED) diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 0a2213fb06..453f9467c6 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -326,6 +326,7 @@ typedef struct _tableInfo char *partbound; /* partition bound definition */ bool needs_override; /* has GENERATED ALWAYS AS IDENTITY */ char *amname; /* relation access method */ + char *attcompression; /* per-attribute current compression method */ /* * Stuff computed only for dumpable tables. diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 737e46464a..bc91bb12ac 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -2284,9 +2284,9 @@ my %tests = ( regexp => qr/^ \QCREATE TABLE dump_test.test_table (\E\n \s+\Qcol1 integer NOT NULL,\E\n - \s+\Qcol2 text,\E\n - \s+\Qcol3 text,\E\n - \s+\Qcol4 text,\E\n + \s+\Qcol2 text COMPRESSION\E\D*,\n + \s+\Qcol3 text COMPRESSION\E\D*,\n + \s+\Qcol4 text COMPRESSION\E\D*,\n \s+\QCONSTRAINT test_table_col1_check CHECK ((col1 <= 1000))\E\n \Q)\E\n \QWITH (autovacuum_enabled='false', fillfactor='80');\E\n/xm, @@ -2326,7 +2326,7 @@ my %tests = ( regexp => qr/^ \QCREATE TABLE dump_test.test_second_table (\E \n\s+\Qcol1 integer,\E - \n\s+\Qcol2 text\E + \n\s+\Qcol2 text COMPRESSION\E\D* \n\); /xm, like => @@ -2441,7 +2441,7 @@ my %tests = ( \n\s+\Qcol1 integer,\E \n\s+\Qcol2 boolean,\E \n\s+\Qcol3 boolean,\E - \n\s+\Qcol4 bit(5),\E + \n\s+\Qcol4 bit(5) COMPRESSION\E\D*, \n\s+\Qcol5 double precision\E \n\); /xm, @@ -2459,7 +2459,7 @@ my %tests = ( regexp => qr/^ \QCREATE TABLE dump_test.test_table_identity (\E\n \s+\Qcol1 integer NOT NULL,\E\n - \s+\Qcol2 text\E\n + \s+\Qcol2 text COMPRESSION\E\D*\n \); .* \QALTER TABLE dump_test.test_table_identity ALTER COLUMN col1 ADD GENERATED ALWAYS AS IDENTITY (\E\n diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 20af5a92b4..eeac0efc4f 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -1459,7 +1459,7 @@ describeOneTableDetails(const char *schemaname, bool printTableInitialized = false; int i; char *view_def = NULL; - char *headers[11]; + char *headers[12]; PQExpBufferData title; PQExpBufferData tmpbuf; int cols; @@ -1475,7 +1475,8 @@ describeOneTableDetails(const char *schemaname, fdwopts_col = -1, attstorage_col = -1, attstattarget_col = -1, - attdescr_col = -1; + attdescr_col = -1, + attcompression_col = -1; int numrows; struct { @@ -1892,6 +1893,17 @@ describeOneTableDetails(const char *schemaname, appendPQExpBufferStr(&buf, ",\n a.attstorage"); attstorage_col = cols++; + /* compression info */ + if (pset.sversion >= 140000 && + !pset.hide_compression && + (tableinfo.relkind == RELKIND_RELATION || + tableinfo.relkind == RELKIND_PARTITIONED_TABLE || + tableinfo.relkind == RELKIND_MATVIEW)) + { + appendPQExpBufferStr(&buf, ",\n a.attcompression AS attcompression"); + attcompression_col = cols++; + } + /* stats target, if relevant to relkind */ if (tableinfo.relkind == RELKIND_RELATION || tableinfo.relkind == RELKIND_INDEX || @@ -2018,6 +2030,8 @@ describeOneTableDetails(const char *schemaname, headers[cols++] = gettext_noop("FDW options"); if (attstorage_col >= 0) headers[cols++] = gettext_noop("Storage"); + if (attcompression_col >= 0) + headers[cols++] = gettext_noop("Compression"); if (attstattarget_col >= 0) headers[cols++] = gettext_noop("Stats target"); if (attdescr_col >= 0) @@ -2097,6 +2111,19 @@ describeOneTableDetails(const char *schemaname, false, false); } + /* Column compression. */ + if (attcompression_col >= 0) + { + char *compression = PQgetvalue(res, i, attcompression_col); + + /* these strings are literal in our syntax, so not translated. */ + printTableAddCell(&cont, (compression[0] == 'p' ? "pglz" : + (compression[0] == 'l' ? "lz4" : + (compression[0] == '\0' ? "" : + "???"))), + false, false); + } + /* Statistics target, if the relkind supports this feature */ if (attstattarget_col >= 0) printTableAddCell(&cont, PQgetvalue(res, i, attstattarget_col), diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c index daa5081eac..99a59470c5 100644 --- a/src/bin/psql/help.c +++ b/src/bin/psql/help.c @@ -372,6 +372,8 @@ helpVariables(unsigned short int pager) " true if last query failed, else false\n")); fprintf(output, _(" FETCH_COUNT\n" " the number of result rows to fetch and display at a time (0 = unlimited)\n")); + fprintf(output, _(" HIDE_TOAST_COMPRESSION\n" + " if set, compression methods are not displayed\n")); fprintf(output, _(" HIDE_TABLEAM\n" " if set, table access methods are not displayed\n")); fprintf(output, _(" HISTCONTROL\n" diff --git a/src/bin/psql/settings.h b/src/bin/psql/settings.h index d65990059d..83f2e6f254 100644 --- a/src/bin/psql/settings.h +++ b/src/bin/psql/settings.h @@ -134,6 +134,7 @@ typedef struct _psqlSettings bool quiet; bool singleline; bool singlestep; + bool hide_compression; bool hide_tableam; int fetch_count; int histsize; diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c index 780479c8d7..110906a4e9 100644 --- a/src/bin/psql/startup.c +++ b/src/bin/psql/startup.c @@ -1159,6 +1159,13 @@ show_context_hook(const char *newval) return true; } +static bool +hide_compression_hook(const char *newval) +{ + return ParseVariableBool(newval, "HIDE_TOAST_COMPRESSION", + &pset.hide_compression); +} + static bool hide_tableam_hook(const char *newval) { @@ -1227,6 +1234,9 @@ EstablishVariableSpace(void) SetVariableHooks(pset.vars, "SHOW_CONTEXT", show_context_substitute_hook, show_context_hook); + SetVariableHooks(pset.vars, "HIDE_TOAST_COMPRESSION", + bool_substitute_hook, + hide_compression_hook); SetVariableHooks(pset.vars, "HIDE_TABLEAM", bool_substitute_hook, hide_tableam_hook); diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 316bec8b01..b67f4ea609 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2116,7 +2116,7 @@ psql_completion(const char *text, int start, int end) /* ALTER TABLE ALTER [COLUMN] SET */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET")) - COMPLETE_WITH("(", "DEFAULT", "NOT NULL", "STATISTICS", "STORAGE"); + COMPLETE_WITH("(", "COMPRESSION", "DEFAULT", "NOT NULL", "STATISTICS", "STORAGE"); /* ALTER TABLE ALTER [COLUMN] SET ( */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "(") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "(")) diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h index 0adf53c77b..773a02f89b 100644 --- a/src/include/access/detoast.h +++ b/src/include/access/detoast.h @@ -12,16 +12,6 @@ #ifndef DETOAST_H #define DETOAST_H -/* - * Testing whether an externally-stored value is compressed now requires - * comparing extsize (the actual length of the external data) to rawsize - * (the original uncompressed datum's size). The latter includes VARHDRSZ - * overhead, the former doesn't. We never use compression unless it actually - * saves space, so we expect either equality or less-than. - */ -#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ - ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ) - /* * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum * into a local "struct varatt_external" toast pointer. This should be diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h new file mode 100644 index 0000000000..514df0bed1 --- /dev/null +++ b/src/include/access/toast_compression.h @@ -0,0 +1,123 @@ +/*------------------------------------------------------------------------- + * + * toast_compression.h + * Functions for toast compression. + * + * Copyright (c) 2021, PostgreSQL Global Development Group + * + * src/include/access/toast_compression.h + * + *------------------------------------------------------------------------- + */ + +#ifndef TOAST_COMPRESSION_H +#define TOAST_COMPRESSION_H + +#include "utils/guc.h" + +/* GUCs */ +extern char *default_toast_compression; + +/* default compression method if not specified. */ +#define DEFAULT_TOAST_COMPRESSION "pglz" + +/* + * Built-in compression method-id. The toast compression header will store + * this in the first 2 bits of the raw length. These built-in compression + * method-id are directly mapped to the built-in compression methods. + */ +typedef enum ToastCompressionId +{ + TOAST_PGLZ_COMPRESSION_ID = 0, + TOAST_LZ4_COMPRESSION_ID = 1, + TOAST_INVALID_COMPRESSION_ID = 2 +} ToastCompressionId; + +/* + * Built-in compression methods. pg_attribute will store this in the + * attcompression column. + */ +#define TOAST_PGLZ_COMPRESSION 'p' +#define TOAST_LZ4_COMPRESSION 'l' + +#define InvalidCompressionMethod '\0' +#define CompressionMethodIsValid(cm) ((bool) ((cm) != InvalidCompressionMethod)) + +#define NO_LZ4_SUPPORT() \ + ereport(ERROR, \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ + errmsg("unsupported LZ4 compression method"), \ + errdetail("This functionality requires the server to be built with lz4 support."), \ + errhint("You need to rebuild PostgreSQL using --with-lz4."))) + +#define IsValidCompression(cm) ((cm) != InvalidCompressionMethod) + +#define IsStorageCompressible(storage) ((storage) != TYPSTORAGE_PLAIN && \ + (storage) != TYPSTORAGE_EXTERNAL) + +/* + * GetCompressionMethodName - Get compression method name + */ +static inline const char * +GetCompressionMethodName(char method) +{ + switch (method) + { + case TOAST_PGLZ_COMPRESSION: + return "pglz"; + case TOAST_LZ4_COMPRESSION: + return "lz4"; + default: + elog(ERROR, "invalid compression method %c", method); + } +} + +/* + * CompressionNameToMethod - Get compression method from compression name + * + * Search in the available built-in methods. If the compression not found + * in the built-in methods then return InvalidCompressionMethod. + */ +static inline char +CompressionNameToMethod(char *compression) +{ + if (strcmp(compression, "pglz") == 0) + return TOAST_PGLZ_COMPRESSION; + else if (strcmp(compression, "lz4") == 0) + { +#ifndef USE_LZ4 + NO_LZ4_SUPPORT(); +#endif + return TOAST_LZ4_COMPRESSION; + } + + return InvalidCompressionMethod; +} + +/* + * GetDefaultToastCompression -- get the default toast compression method + * + * This exists to hide the use of the default_toast_compression GUC variable. + */ +static inline char +GetDefaultToastCompression(void) +{ + return CompressionNameToMethod(default_toast_compression); +} + +/* pglz compression/decompression routines */ +extern struct varlena *pglz_compress_datum(const struct varlena *value); +extern struct varlena *pglz_decompress_datum(const struct varlena *value); +extern struct varlena *pglz_decompress_datum_slice(const struct varlena *value, + int32 slicelength); + +/* lz4 compression/decompression routines */ +extern struct varlena *lz4_compress_datum(const struct varlena *value); +extern struct varlena *lz4_decompress_datum(const struct varlena *value); +extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value, + int32 slicelength); +extern ToastCompressionId toast_get_compression_id(struct varlena *attr); +extern bool check_default_toast_compression(char **newval, void **extra, + GucSource source); + +#endif /* TOAST_COMPRESSION_H */ diff --git a/src/include/access/toast_helper.h b/src/include/access/toast_helper.h index a9a6d644bc..05104ce237 100644 --- a/src/include/access/toast_helper.h +++ b/src/include/access/toast_helper.h @@ -32,6 +32,7 @@ typedef struct struct varlena *tai_oldexternal; int32 tai_size; uint8 tai_colflags; + char tai_compression; } ToastAttrInfo; /* diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index cedfb890d8..b4d068459a 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -12,6 +12,7 @@ #ifndef TOAST_INTERNALS_H #define TOAST_INTERNALS_H +#include "access/toast_compression.h" #include "storage/lockdefs.h" #include "utils/relcache.h" #include "utils/snapshot.h" @@ -22,22 +23,26 @@ typedef struct toast_compress_header { int32 vl_len_; /* varlena header (do not touch directly!) */ - int32 rawsize; + uint32 tcinfo; /* 2 bits for compression method and 30 bits + * rawsize */ } toast_compress_header; /* * Utilities for manipulation of header information for compressed * toast entries. */ -#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header)) -#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize) -#define TOAST_COMPRESS_SIZE(ptr) ((int32) VARSIZE_ANY(ptr) - TOAST_COMPRESS_HDRSZ) -#define TOAST_COMPRESS_RAWDATA(ptr) \ - (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ) -#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \ - (((toast_compress_header *) (ptr))->rawsize = (len)) +#define TOAST_COMPRESS_METHOD(ptr) \ + (((toast_compress_header *) (ptr))->tcinfo >> VARLENA_RAWSIZE_BITS) +#define TOAST_COMPRESS_SET_SIZE_AND_METHOD(ptr, len, cm_method) \ + do { \ + Assert((len) > 0 && (len) <= VARLENA_RAWSIZE_MASK); \ + Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ + (cm_method) == TOAST_LZ4_COMPRESSION_ID); \ + ((toast_compress_header *) (ptr))->tcinfo = \ + ((len) | (cm_method) << VARLENA_RAWSIZE_BITS); \ + } while (0) -extern Datum toast_compress_datum(Datum value); +extern Datum toast_compress_datum(Datum value, char cmethod); extern Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock); extern void toast_delete_datum(Relation rel, Datum value, bool is_speculative); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 609d184e81..c831b55bf9 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202103131 +#define CATALOG_VERSION_NO 202103191 #endif diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h index 3db42abf08..560f8f00bb 100644 --- a/src/include/catalog/pg_attribute.h +++ b/src/include/catalog/pg_attribute.h @@ -160,6 +160,12 @@ CATALOG(pg_attribute,1249,AttributeRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(75, /* attribute's collation, if any */ Oid attcollation BKI_LOOKUP_OPT(pg_collation); + /* + * compression method. Must be InvalidCompressionMethod if and only if + * typstorage is 'plain' or 'external'. + */ + char attcompression BKI_DEFAULT('\0'); + #ifdef CATALOG_VARLEN /* variable-length fields start here */ /* NOTE: The following fields are not present in tuple descriptors. */ @@ -187,7 +193,7 @@ CATALOG(pg_attribute,1249,AttributeRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(75, * can access fields beyond attcollation except in a real tuple! */ #define ATTRIBUTE_FIXED_PART_SIZE \ - (offsetof(FormData_pg_attribute,attcollation) + sizeof(Oid)) + (offsetof(FormData_pg_attribute,attcompression) + sizeof(char)) /* ---------------- * Form_pg_attribute corresponds to a pointer to a tuple with diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 93393fcfd4..e259531f60 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -7103,6 +7103,10 @@ descr => 'bytes required to store the value, perhaps with compression', proname => 'pg_column_size', provolatile => 's', prorettype => 'int4', proargtypes => 'any', prosrc => 'pg_column_size' }, +{ oid => '2121', + descr => 'compression method for the compressed datum', + proname => 'pg_column_compression', provolatile => 's', prorettype => 'text', + proargtypes => 'any', prosrc => 'pg_column_compression' }, { oid => '2322', descr => 'total disk space usage for the specified tablespace', proname => 'pg_tablespace_size', provolatile => 'v', prorettype => 'int8', diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 3a81d4f267..68425eb2c0 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -655,6 +655,7 @@ typedef struct ColumnDef NodeTag type; char *colname; /* name of column */ TypeName *typeName; /* type of column */ + char *compression; /* compression method for column */ int inhcount; /* number of times column is inherited */ bool is_local; /* column has local (non-inherited) def'n */ bool is_not_null; /* NOT NULL constraint specified? */ @@ -694,6 +695,7 @@ typedef enum TableLikeOption CREATE_TABLE_LIKE_INDEXES = 1 << 5, CREATE_TABLE_LIKE_STATISTICS = 1 << 6, CREATE_TABLE_LIKE_STORAGE = 1 << 7, + CREATE_TABLE_LIKE_COMPRESSION = 1 << 8, CREATE_TABLE_LIKE_ALL = PG_INT32_MAX } TableLikeOption; @@ -1855,6 +1857,7 @@ typedef enum AlterTableType AT_SetOptions, /* alter column set ( options ) */ AT_ResetOptions, /* alter column reset ( options ) */ AT_SetStorage, /* alter column set storage */ + AT_SetCompression, /* alter column set compression */ AT_DropColumn, /* drop column */ AT_DropColumnRecurse, /* internal to commands/tablecmds.c */ AT_AddIndex, /* add index */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 28083aaac9..ca1f950cbe 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -88,6 +88,7 @@ PG_KEYWORD("comment", COMMENT, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("comments", COMMENTS, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("commit", COMMIT, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("committed", COMMITTED, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("compression", COMPRESSION, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("concurrently", CONCURRENTLY, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL) PG_KEYWORD("configuration", CONFIGURATION, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("conflict", CONFLICT, UNRESERVED_KEYWORD, BARE_LABEL) diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 7a7cc21d8d..0a6422da4f 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -899,6 +899,9 @@ /* Define to 1 to build with LLVM based JIT support. (--with-llvm) */ #undef USE_LLVM +/* Define to 1 to build with LZ4 support (--with-lz4) */ +#undef USE_LZ4 + /* Define to select named POSIX semaphores. */ #undef USE_NAMED_POSIX_SEMAPHORES diff --git a/src/include/postgres.h b/src/include/postgres.h index 2ed572004d..2ccbea8e50 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -55,7 +55,9 @@ /* * struct varatt_external is a traditional "TOAST pointer", that is, the * information needed to fetch a Datum stored out-of-line in a TOAST table. - * The data is compressed if and only if va_extsize < va_rawsize - VARHDRSZ. + * The data is compressed if and only if the size stored in va_extinfo < + * va_rawsize - VARHDRSZ. + * * This struct must not contain any padding, because we sometimes compare * these pointers using memcmp. * @@ -67,7 +69,8 @@ typedef struct varatt_external { int32 va_rawsize; /* Original data size (includes header) */ - int32 va_extsize; /* External saved size (doesn't) */ + uint32 va_extinfo; /* External saved size (without header) and + * compression method */ Oid va_valueid; /* Unique ID of value within TOAST table */ Oid va_toastrelid; /* RelID of TOAST table containing it */ } varatt_external; @@ -145,7 +148,8 @@ typedef union struct /* Compressed-in-line format */ { uint32 va_header; - uint32 va_rawsize; /* Original data size (excludes header) */ + uint32 va_tcinfo; /* Original data size (excludes header) and + * compression method */ char va_data[FLEXIBLE_ARRAY_MEMBER]; /* Compressed data */ } va_compressed; } varattrib_4b; @@ -274,14 +278,23 @@ typedef struct (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) #define VARHDRSZ_EXTERNAL offsetof(varattrib_1b_e, va_data) +#define VARHDRSZ_COMPRESS offsetof(varattrib_4b, va_compressed.va_data) #define VARDATA_4B(PTR) (((varattrib_4b *) (PTR))->va_4byte.va_data) #define VARDATA_4B_C(PTR) (((varattrib_4b *) (PTR))->va_compressed.va_data) #define VARDATA_1B(PTR) (((varattrib_1b *) (PTR))->va_data) #define VARDATA_1B_E(PTR) (((varattrib_1b_e *) (PTR))->va_data) +#define VARLENA_RAWSIZE_BITS 30 +#define VARLENA_RAWSIZE_MASK ((1U << VARLENA_RAWSIZE_BITS) - 1) + +/* + * va_tcinfo in va_compress contains raw size of datum and compression method. + */ #define VARRAWSIZE_4B_C(PTR) \ - (((varattrib_4b *) (PTR))->va_compressed.va_rawsize) + (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_RAWSIZE_MASK) +#define VARCOMPRESS_4B_C(PTR) \ + (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_RAWSIZE_BITS) /* Externally visible macros */ @@ -323,6 +336,35 @@ typedef struct (VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR))) #define VARATT_IS_EXTERNAL_NON_EXPANDED(PTR) \ (VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR))) + +/* + * va_extinfo in varatt_external contains actual length of the external data + * and compression method if external data is compressed. + */ +#define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \ + ((toast_pointer).va_extinfo & VARLENA_RAWSIZE_MASK) + +#define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESSION(toast_pointer, len, cm) \ + do { \ + Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ + (cm) == TOAST_LZ4_COMPRESSION_ID); \ + ((toast_pointer).va_extinfo = (len) | (cm) << VARLENA_RAWSIZE_BITS); \ + } while (0) + +#define VARATT_EXTERNAL_GET_COMPRESSION(PTR) \ + ((toast_pointer).va_extinfo >> VARLENA_RAWSIZE_BITS) + +/* + * Testing whether an externally-stored value is compressed now requires + * comparing size stored in va_extinfo (the actual length of the external data) + * to rawsize (the original uncompressed datum's size). The latter includes + * VARHDRSZ overhead, the former doesn't. We never use compression unless it + * actually saves space, so we expect either equality or less-than. + */ +#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ + (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \ + (toast_pointer).va_rawsize - VARHDRSZ) + #define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR) #define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR)) diff --git a/src/test/regress/expected/compression.out b/src/test/regress/expected/compression.out new file mode 100644 index 0000000000..3de2886de0 --- /dev/null +++ b/src/test/regress/expected/compression.out @@ -0,0 +1,347 @@ +\set HIDE_TOAST_COMPRESSION false +-- test creating table with compression method +CREATE TABLE cmdata(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata(f1); +INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); +\d+ cmdata + Table "public.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz | | +Indexes: + "idx" btree (f1) + +CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); +INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); +\d+ cmdata1 + Table "public.cmdata1" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | + +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz +(1 row) + +SELECT pg_column_compression(f1) FROM cmdata1; + pg_column_compression +----------------------- + lz4 +(1 row) + +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata; + substr +-------- + 01234 +(1 row) + +SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; + substr +---------------------------------------------------- + 01234567890123456789012345678901234567890123456789 +(1 row) + +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata; +\d+ cmmove1 + Table "public.cmmove1" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz | | + +SELECT pg_column_compression(f1) FROM cmmove1; + pg_column_compression +----------------------- + pglz +(1 row) + +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata; +INSERT INTO cmmove3 SELECT * FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove3; + pg_column_compression +----------------------- + pglz + lz4 +(2 rows) + +-- test LIKE INCLUDING COMPRESSION +CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | + +DROP TABLE cmdata2; +-- try setting compression for incompressible data type +CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); +ERROR: column data type integer does not support compression +-- update using datum from different table +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + pglz +(1 row) + +UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + lz4 +(1 row) + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(md5(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); +INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; + pg_column_compression +----------------------- + pglz +(1 row) + +INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata1; + pg_column_compression +----------------------- + lz4 + lz4 +(2 rows) + +SELECT SUBSTR(f1, 200, 5) FROM cmdata1; + substr +-------- + 01234 + 8f14e +(2 rows) + +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; + substr +-------- + 8f14e +(1 row) + +DROP TABLE cmdata2; +--test column type update varlena/non-varlena +CREATE TABLE cmdata2 (f1 int); +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+---------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | integer | | | | plain | | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | character varying | | | | extended | pglz | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+---------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | integer | | | | plain | | | + +--changing column storage should not impact the compression method +--but the data should not be compressed +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | character varying | | | | extended | pglz | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | character varying | | | | plain | pglz | | + +INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); +SELECT pg_column_compression(f1) FROM cmdata2; + pg_column_compression +----------------------- + +(1 row) + +-- test compression with materialized view +CREATE MATERIALIZED VIEW mv(x) AS SELECT * FROM cmdata1; +\d+ mv + Materialized view "public.mv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | pglz | | +View definition: + SELECT cmdata1.f1 AS x + FROM cmdata1; + +SELECT pg_column_compression(f1) FROM cmdata1; + pg_column_compression +----------------------- + lz4 + lz4 +(2 rows) + +SELECT pg_column_compression(x) FROM mv; + pg_column_compression +----------------------- + lz4 + lz4 +(2 rows) + +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; + pg_column_compression +----------------------- + lz4 +(1 row) + +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- + pglz +(1 row) + +-- test compression with inheritence, error +CREATE TABLE cminh() INHERITS(cmdata, cmdata1); +NOTICE: merging multiple inherited definitions of column "f1" +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus lz4 +CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); +NOTICE: merging column "f1" with inherited definition +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus lz4 +-- test default_toast_compression GUC +SET default_toast_compression = ''; +ERROR: invalid value for parameter "default_toast_compression": "" +DETAIL: default_toast_compression cannot be empty. +SET default_toast_compression = 'I do not exist compression'; +ERROR: invalid value for parameter "default_toast_compression": "I do not exist compression" +DETAIL: Compression method "I do not exist compression" does not exist. +SET default_toast_compression = 'lz4'; +DROP TABLE cmdata2; +CREATE TABLE cmdata2 (f1 text); +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | + +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; +INSERT INTO cmdata VALUES (repeat('123456789', 4004)); +\d+ cmdata + Table "public.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | +Indexes: + "idx" btree (f1) + +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + lz4 +(2 rows) + +-- test alter compression method for the materialized view +ALTER MATERIALIZED VIEW mv ALTER COLUMN x SET COMPRESSION lz4; +\d+ mv + Materialized view "public.mv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | lz4 | | +View definition: + SELECT cmdata1.f1 AS x + FROM cmdata1; + +-- test alter compression method for the partitioned table +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; + pg_column_compression +----------------------- + lz4 + pglz +(2 rows) + +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- + pglz + lz4 +(2 rows) + +--vacuum full to recompress the data +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + lz4 +(2 rows) + +VACUUM FULL cmdata; +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + lz4 + lz4 +(2 rows) + +-- check data is ok +SELECT length(f1) FROM cmdata; + length +-------- + 10000 + 36036 +(2 rows) + +SELECT length(f1) FROM cmdata1; + length +-------- + 10040 + 12449 +(2 rows) + +SELECT length(f1) FROM cmmove1; + length +-------- + 10000 +(1 row) + +SELECT length(f1) FROM cmmove2; + length +-------- + 10040 +(1 row) + +SELECT length(f1) FROM cmmove3; + length +-------- + 10000 + 10040 +(2 rows) + +\set HIDE_TOAST_COMPRESSION true diff --git a/src/test/regress/expected/compression_1.out b/src/test/regress/expected/compression_1.out new file mode 100644 index 0000000000..40aad81fa1 --- /dev/null +++ b/src/test/regress/expected/compression_1.out @@ -0,0 +1,340 @@ +\set HIDE_TOAST_COMPRESSION false +-- test creating table with compression method +CREATE TABLE cmdata(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata(f1); +INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); +\d+ cmdata + Table "public.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz | | +Indexes: + "idx" btree (f1) + +CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); +ERROR: unsupported LZ4 compression method +DETAIL: This functionality requires the server to be built with lz4 support. +HINT: You need to rebuild PostgreSQL using --with-lz4. +INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); +ERROR: relation "cmdata1" does not exist +LINE 1: INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); + ^ +\d+ cmdata1 +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz +(1 row) + +SELECT pg_column_compression(f1) FROM cmdata1; +ERROR: relation "cmdata1" does not exist +LINE 1: SELECT pg_column_compression(f1) FROM cmdata1; + ^ +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata; + substr +-------- + 01234 +(1 row) + +SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; +ERROR: relation "cmdata1" does not exist +LINE 1: SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; + ^ +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata; +\d+ cmmove1 + Table "public.cmmove1" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz | | + +SELECT pg_column_compression(f1) FROM cmmove1; + pg_column_compression +----------------------- + pglz +(1 row) + +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata; +INSERT INTO cmmove3 SELECT * FROM cmdata1; +ERROR: relation "cmdata1" does not exist +LINE 1: INSERT INTO cmmove3 SELECT * FROM cmdata1; + ^ +SELECT pg_column_compression(f1) FROM cmmove3; + pg_column_compression +----------------------- + pglz +(1 row) + +-- test LIKE INCLUDING COMPRESSION +CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); +ERROR: relation "cmdata1" does not exist +LINE 1: CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); + ^ +\d+ cmdata2 +DROP TABLE cmdata2; +ERROR: table "cmdata2" does not exist +-- try setting compression for incompressible data type +CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); +ERROR: column data type integer does not support compression +-- update using datum from different table +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + pglz +(1 row) + +UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; +ERROR: relation "cmdata1" does not exist +LINE 1: UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; + ^ +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + pglz +(1 row) + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(md5(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); +INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; + pg_column_compression +----------------------- + pglz +(1 row) + +INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); +ERROR: relation "cmdata1" does not exist +LINE 1: INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); + ^ +SELECT pg_column_compression(f1) FROM cmdata1; +ERROR: relation "cmdata1" does not exist +LINE 1: SELECT pg_column_compression(f1) FROM cmdata1; + ^ +SELECT SUBSTR(f1, 200, 5) FROM cmdata1; +ERROR: relation "cmdata1" does not exist +LINE 1: SELECT SUBSTR(f1, 200, 5) FROM cmdata1; + ^ +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; + substr +-------- + 8f14e +(1 row) + +DROP TABLE cmdata2; +--test column type update varlena/non-varlena +CREATE TABLE cmdata2 (f1 int); +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+---------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | integer | | | | plain | | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | character varying | | | | extended | pglz | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+---------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | integer | | | | plain | | | + +--changing column storage should not impact the compression method +--but the data should not be compressed +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | character varying | | | | extended | pglz | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | character varying | | | | plain | pglz | | + +INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); +SELECT pg_column_compression(f1) FROM cmdata2; + pg_column_compression +----------------------- + +(1 row) + +-- test compression with materialized view +CREATE MATERIALIZED VIEW mv(x) AS SELECT * FROM cmdata1; +ERROR: relation "cmdata1" does not exist +LINE 1: CREATE MATERIALIZED VIEW mv(x) AS SELECT * FROM cmdata1; + ^ +\d+ mv +SELECT pg_column_compression(f1) FROM cmdata1; +ERROR: relation "cmdata1" does not exist +LINE 1: SELECT pg_column_compression(f1) FROM cmdata1; + ^ +SELECT pg_column_compression(x) FROM mv; +ERROR: relation "mv" does not exist +LINE 1: SELECT pg_column_compression(x) FROM mv; + ^ +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); +ERROR: unsupported LZ4 compression method +DETAIL: This functionality requires the server to be built with lz4 support. +HINT: You need to rebuild PostgreSQL using --with-lz4. +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +ERROR: relation "cmpart" does not exist +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +ERROR: relation "cmpart" does not exist +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +ERROR: relation "cmpart" does not exist +LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 1004)); + ^ +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +ERROR: relation "cmpart" does not exist +LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 4004)); + ^ +SELECT pg_column_compression(f1) FROM cmpart1; +ERROR: relation "cmpart1" does not exist +LINE 1: SELECT pg_column_compression(f1) FROM cmpart1; + ^ +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- +(0 rows) + +-- test compression with inheritence, error +CREATE TABLE cminh() INHERITS(cmdata, cmdata1); +ERROR: relation "cmdata1" does not exist +CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); +NOTICE: merging column "f1" with inherited definition +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus lz4 +-- test default_toast_compression GUC +SET default_toast_compression = ''; +ERROR: invalid value for parameter "default_toast_compression": "" +DETAIL: default_toast_compression cannot be empty. +SET default_toast_compression = 'I do not exist compression'; +ERROR: invalid value for parameter "default_toast_compression": "I do not exist compression" +DETAIL: Compression method "I do not exist compression" does not exist. +SET default_toast_compression = 'lz4'; +ERROR: unsupported LZ4 compression method +DETAIL: This functionality requires the server to be built with lz4 support. +HINT: You need to rebuild PostgreSQL using --with-lz4. +DROP TABLE cmdata2; +CREATE TABLE cmdata2 (f1 text); +\d+ cmdata2 + Table "public.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz | | + +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; +ERROR: unsupported LZ4 compression method +DETAIL: This functionality requires the server to be built with lz4 support. +HINT: You need to rebuild PostgreSQL using --with-lz4. +INSERT INTO cmdata VALUES (repeat('123456789', 4004)); +\d+ cmdata + Table "public.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz | | +Indexes: + "idx" btree (f1) + +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + pglz +(2 rows) + +-- test alter compression method for the materialized view +ALTER MATERIALIZED VIEW mv ALTER COLUMN x SET COMPRESSION lz4; +ERROR: relation "mv" does not exist +\d+ mv +-- test alter compression method for the partitioned table +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ERROR: relation "cmpart1" does not exist +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; +ERROR: unsupported LZ4 compression method +DETAIL: This functionality requires the server to be built with lz4 support. +HINT: You need to rebuild PostgreSQL using --with-lz4. +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +ERROR: relation "cmpart" does not exist +LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 1004)); + ^ +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +ERROR: relation "cmpart" does not exist +LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 4004)); + ^ +SELECT pg_column_compression(f1) FROM cmpart1; +ERROR: relation "cmpart1" does not exist +LINE 1: SELECT pg_column_compression(f1) FROM cmpart1; + ^ +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- +(0 rows) + +--vacuum full to recompress the data +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + pglz +(2 rows) + +VACUUM FULL cmdata; +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + pglz +(2 rows) + +-- check data is ok +SELECT length(f1) FROM cmdata; + length +-------- + 10000 + 36036 +(2 rows) + +SELECT length(f1) FROM cmdata1; +ERROR: relation "cmdata1" does not exist +LINE 1: SELECT length(f1) FROM cmdata1; + ^ +SELECT length(f1) FROM cmmove1; + length +-------- + 10000 +(1 row) + +SELECT length(f1) FROM cmmove2; + length +-------- + 10040 +(1 row) + +SELECT length(f1) FROM cmmove3; + length +-------- + 10000 +(1 row) + +\set HIDE_TOAST_COMPRESSION true diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index e280198b17..70c38309d7 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -115,7 +115,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # ---------- # Another group of parallel tests # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression # event triggers cannot run concurrently with any test that runs DDL # oidjoins is read-only, though, and should run late for best coverage diff --git a/src/test/regress/pg_regress_main.c b/src/test/regress/pg_regress_main.c index 8dc4941c24..1524676f3b 100644 --- a/src/test/regress/pg_regress_main.c +++ b/src/test/regress/pg_regress_main.c @@ -78,11 +78,11 @@ psql_start_test(const char *testname, * against different AMs without unnecessary differences. */ offset += snprintf(psql_cmd + offset, sizeof(psql_cmd) - offset, - "\"%s%spsql\" -X -a -q -d \"%s\" -v %s < \"%s\" > \"%s\" 2>&1", + "\"%s%spsql\" -X -a -q -d \"%s\" %s < \"%s\" > \"%s\" 2>&1", bindir ? bindir : "", bindir ? "/" : "", dblist->str, - "HIDE_TABLEAM=\"on\"", + "-v HIDE_TABLEAM=on -v HIDE_TOAST_COMPRESSION=on", infile, outfile); if (offset >= sizeof(psql_cmd)) diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 6a57e889a1..d81d04136c 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -201,6 +201,7 @@ test: partition_aggregate test: partition_info test: tuplesort test: explain +test: compression test: event_trigger test: oidjoins test: fast_default diff --git a/src/test/regress/sql/compression.sql b/src/test/regress/sql/compression.sql new file mode 100644 index 0000000000..d97e26b6ee --- /dev/null +++ b/src/test/regress/sql/compression.sql @@ -0,0 +1,136 @@ +\set HIDE_TOAST_COMPRESSION false + +-- test creating table with compression method +CREATE TABLE cmdata(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata(f1); +INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); +\d+ cmdata +CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); +INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); +\d+ cmdata1 + +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata; +SELECT pg_column_compression(f1) FROM cmdata1; + +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata; +SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; + +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata; +\d+ cmmove1 +SELECT pg_column_compression(f1) FROM cmmove1; + +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata; +INSERT INTO cmmove3 SELECT * FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove3; + +-- test LIKE INCLUDING COMPRESSION +CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); +\d+ cmdata2 +DROP TABLE cmdata2; + +-- try setting compression for incompressible data type +CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); + +-- update using datum from different table +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; +UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove2; + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(md5(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); +INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; +INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata1; +SELECT SUBSTR(f1, 200, 5) FROM cmdata1; +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; +DROP TABLE cmdata2; + +--test column type update varlena/non-varlena +CREATE TABLE cmdata2 (f1 int); +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; +\d+ cmdata2 + +--changing column storage should not impact the compression method +--but the data should not be compressed +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; +\d+ cmdata2 +INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); +SELECT pg_column_compression(f1) FROM cmdata2; + +-- test compression with materialized view +CREATE MATERIALIZED VIEW mv(x) AS SELECT * FROM cmdata1; +\d+ mv +SELECT pg_column_compression(f1) FROM cmdata1; +SELECT pg_column_compression(x) FROM mv; + +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); + +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +-- test compression with inheritence, error +CREATE TABLE cminh() INHERITS(cmdata, cmdata1); +CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); + +-- test default_toast_compression GUC +SET default_toast_compression = ''; +SET default_toast_compression = 'I do not exist compression'; +SET default_toast_compression = 'lz4'; +DROP TABLE cmdata2; +CREATE TABLE cmdata2 (f1 text); +\d+ cmdata2 + +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; +INSERT INTO cmdata VALUES (repeat('123456789', 4004)); +\d+ cmdata +SELECT pg_column_compression(f1) FROM cmdata; + +-- test alter compression method for the materialized view +ALTER MATERIALIZED VIEW mv ALTER COLUMN x SET COMPRESSION lz4; +\d+ mv + +-- test alter compression method for the partitioned table +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; + +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +--vacuum full to recompress the data +SELECT pg_column_compression(f1) FROM cmdata; +VACUUM FULL cmdata; +SELECT pg_column_compression(f1) FROM cmdata; + +-- check data is ok +SELECT length(f1) FROM cmdata; +SELECT length(f1) FROM cmdata1; +SELECT length(f1) FROM cmmove1; +SELECT length(f1) FROM cmmove2; +SELECT length(f1) FROM cmmove3; + +\set HIDE_TOAST_COMPRESSION true diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index a4f5cc4bdb..14605371bb 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -485,6 +485,7 @@ sub GenerateFiles USE_ICU => $self->{options}->{icu} ? 1 : undef, USE_LIBXML => undef, USE_LIBXSLT => undef, + USE_LZ4 => undef, USE_LDAP => $self->{options}->{ldap} ? 1 : undef, USE_LLVM => undef, USE_NAMED_POSIX_SEMAPHORES => undef,