From dab298471ff2f91f33bc25bfb73e435d3ab02148 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 11 Feb 2022 08:29:38 -0500 Subject: [PATCH] Add suport for server-side LZ4 base backup compression. LZ4 compression can be a lot faster than gzip compression, so users may prefer it even if the compression ratio is not as good. We will want pg_basebackup to support LZ4 compression and decompression on the client side as well, and there is a pending patch for that, but it's by a different author, so I am committing this part separately for that reason. Jeevan Ladhe, reviewed by Tushar Ahuja and by me. Discussion: http://postgr.es/m/CANm22Cg9cArXEaYgHVZhCnzPLfqXCZLAzjwTq7Fc0quXRPfbxA@mail.gmail.com --- doc/src/sgml/protocol.sgml | 7 +- doc/src/sgml/ref/pg_basebackup.sgml | 24 +- src/backend/replication/Makefile | 1 + src/backend/replication/basebackup.c | 7 +- src/backend/replication/basebackup_lz4.c | 298 ++++++++++++++++++++++ src/bin/pg_basebackup/pg_basebackup.c | 18 +- src/bin/pg_verifybackup/Makefile | 1 + src/bin/pg_verifybackup/t/008_untar.pl | 10 +- src/include/replication/basebackup_sink.h | 1 + 9 files changed, 349 insertions(+), 18 deletions(-) create mode 100644 src/backend/replication/basebackup_lz4.c diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index fd03c860bd..1c5ab00879 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2724,8 +2724,8 @@ The commands accepted in replication mode are: Instructs the server to compress the backup using the specified - method. Currently, the only supported method is - gzip. + method. Currently, the supported methods are gzip + and lz4. @@ -2736,7 +2736,8 @@ The commands accepted in replication mode are: Specifies the compression level to be used. This should only be used in conjunction with the COMPRESSION option. - The value should be an integer between 1 and 9. + For gzip the value should be an integer between 1 + and 9, and for lz4 it should be between 1 and 12. diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index e7ae29ec3d..7a1b432eba 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -417,10 +417,13 @@ PostgreSQL documentation specify -Xfetch. - The compression method can be set to either gzip - for compression with gzip, or - none for no compression. A compression level - can be optionally specified, by appending the level number after a + The compression method can be set to gzip for + compression with gzip, or + lz4 for compression with + lz4, or none for no + compression. However, lz4 can be currently only + used with server. A compression level can be + optionally specified, by appending the level number after a colon (:). If no level is specified, the default compression level will be used. If only a level is specified without mentioning an algorithm, gzip compression will @@ -428,12 +431,13 @@ PostgreSQL documentation used if the level is 0. - When the tar format is used, the suffix .gz will - automatically be added to all tar filenames. When the plain format is - used, client-side compression may not be specified, but it is - still possible to request server-side compression. If this is done, - the server will compress the backup for transmission, and the - client will decompress and extract it. + When the tar format is used with gzip or + lz4, the suffix .gz or + .lz4 will automatically be added to all tar + filenames. When the plain format is used, client-side compression may + not be specified, but it is still possible to request server-side + compression. If this is done, the server will compress the backup for + transmission, and the client will decompress and extract it. diff --git a/src/backend/replication/Makefile b/src/backend/replication/Makefile index 8ec60ded76..74043ff331 100644 --- a/src/backend/replication/Makefile +++ b/src/backend/replication/Makefile @@ -19,6 +19,7 @@ OBJS = \ basebackup.o \ basebackup_copy.o \ basebackup_gzip.o \ + basebackup_lz4.o \ basebackup_progress.o \ basebackup_server.o \ basebackup_sink.o \ diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index fcd9161f74..0bf28b55d7 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -63,7 +63,8 @@ typedef enum typedef enum { BACKUP_COMPRESSION_NONE, - BACKUP_COMPRESSION_GZIP + BACKUP_COMPRESSION_GZIP, + BACKUP_COMPRESSION_LZ4 } basebackup_compression_type; typedef struct @@ -903,6 +904,8 @@ parse_basebackup_options(List *options, basebackup_options *opt) opt->compression = BACKUP_COMPRESSION_NONE; else if (strcmp(optval, "gzip") == 0) opt->compression = BACKUP_COMPRESSION_GZIP; + else if (strcmp(optval, "lz4") == 0) + opt->compression = BACKUP_COMPRESSION_LZ4; else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -1021,6 +1024,8 @@ SendBaseBackup(BaseBackupCmd *cmd) /* Set up server-side compression, if client requested it */ if (opt.compression == BACKUP_COMPRESSION_GZIP) sink = bbsink_gzip_new(sink, opt.compression_level); + else if (opt.compression == BACKUP_COMPRESSION_LZ4) + sink = bbsink_lz4_new(sink, opt.compression_level); /* Set up progress reporting. */ sink = bbsink_progress_new(sink, opt.progress); diff --git a/src/backend/replication/basebackup_lz4.c b/src/backend/replication/basebackup_lz4.c new file mode 100644 index 0000000000..8730ee89dc --- /dev/null +++ b/src/backend/replication/basebackup_lz4.c @@ -0,0 +1,298 @@ +/*------------------------------------------------------------------------- + * + * basebackup_lz4.c + * Basebackup sink implementing lz4 compression. + * + * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/basebackup_lz4.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#ifdef HAVE_LIBLZ4 +#include +#endif + +#include "replication/basebackup_sink.h" + +#ifdef HAVE_LIBLZ4 + +typedef struct bbsink_lz4 +{ + /* Common information for all types of sink. */ + bbsink base; + + /* Compression level. */ + int compresslevel; + + LZ4F_compressionContext_t ctx; + LZ4F_preferences_t prefs; + + /* Number of bytes staged in output buffer. */ + size_t bytes_written; +} bbsink_lz4; + +static void bbsink_lz4_begin_backup(bbsink *sink); +static void bbsink_lz4_begin_archive(bbsink *sink, const char *archive_name); +static void bbsink_lz4_archive_contents(bbsink *sink, size_t avail_in); +static void bbsink_lz4_manifest_contents(bbsink *sink, size_t len); +static void bbsink_lz4_end_archive(bbsink *sink); +static void bbsink_lz4_cleanup(bbsink *sink); + +const bbsink_ops bbsink_lz4_ops = { + .begin_backup = bbsink_lz4_begin_backup, + .begin_archive = bbsink_lz4_begin_archive, + .archive_contents = bbsink_lz4_archive_contents, + .end_archive = bbsink_lz4_end_archive, + .begin_manifest = bbsink_forward_begin_manifest, + .manifest_contents = bbsink_lz4_manifest_contents, + .end_manifest = bbsink_forward_end_manifest, + .end_backup = bbsink_forward_end_backup, + .cleanup = bbsink_lz4_cleanup +}; +#endif + +/* + * Create a new basebackup sink that performs lz4 compression using the + * designated compression level. + */ +bbsink * +bbsink_lz4_new(bbsink *next, int compresslevel) +{ +#ifndef HAVE_LIBLZ4 + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("lz4 compression is not supported by this build"))); +#else + bbsink_lz4 *sink; + + Assert(next != NULL); + + if (compresslevel < 0 || compresslevel > 12) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("lz4 compression level %d is out of range", + compresslevel))); + + sink = palloc0(sizeof(bbsink_lz4)); + *((const bbsink_ops **) &sink->base.bbs_ops) = &bbsink_lz4_ops; + sink->base.bbs_next = next; + sink->compresslevel = compresslevel; + + return &sink->base; +#endif +} + +#ifdef HAVE_LIBLZ4 + +/* + * Begin backup. + */ +static void +bbsink_lz4_begin_backup(bbsink *sink) +{ + bbsink_lz4 *mysink = (bbsink_lz4 *) sink; + size_t output_buffer_bound; + LZ4F_preferences_t *prefs = &mysink->prefs; + + /* Initialize compressor object. */ + memset(prefs, 0, sizeof(LZ4F_preferences_t)); + prefs->frameInfo.blockSizeID = LZ4F_max256KB; + prefs->compressionLevel = mysink->compresslevel; + + /* + * We need our own buffer, because we're going to pass different data to + * the next sink than what gets passed to us. + */ + mysink->base.bbs_buffer = palloc(mysink->base.bbs_buffer_length); + + /* + * Since LZ4F_compressUpdate() requires the output buffer of size equal or + * greater than that of LZ4F_compressBound(), make sure we have the next + * sink's bbs_buffer of length that can accommodate the compressed input + * buffer. + */ + output_buffer_bound = LZ4F_compressBound(mysink->base.bbs_buffer_length, + &mysink->prefs); + + /* + * The buffer length is expected to be a multiple of BLCKSZ, so round up. + */ + output_buffer_bound = output_buffer_bound + BLCKSZ - + (output_buffer_bound % BLCKSZ); + + bbsink_begin_backup(sink->bbs_next, sink->bbs_state, output_buffer_bound); +} + +/* + * Prepare to compress the next archive. + */ +static void +bbsink_lz4_begin_archive(bbsink *sink, const char *archive_name) +{ + bbsink_lz4 *mysink = (bbsink_lz4 *) sink; + char *lz4_archive_name; + LZ4F_errorCode_t ctxError; + size_t headerSize; + + ctxError = LZ4F_createCompressionContext(&mysink->ctx, LZ4F_VERSION); + if (LZ4F_isError(ctxError)) + elog(ERROR, "could not create lz4 compression context: %s", + LZ4F_getErrorName(ctxError)); + + /* First of all write the frame header to destination buffer. */ + headerSize = LZ4F_compressBegin(mysink->ctx, + mysink->base.bbs_next->bbs_buffer, + mysink->base.bbs_next->bbs_buffer_length, + &mysink->prefs); + + if (LZ4F_isError(headerSize)) + elog(ERROR, "could not write lz4 header: %s", + LZ4F_getErrorName(headerSize)); + + /* + * We need to write the compressed data after the header in the output + * buffer. So, make sure to update the notion of bytes written to output + * buffer. + */ + mysink->bytes_written += headerSize; + + /* Add ".lz4" to the archive name. */ + lz4_archive_name = psprintf("%s.lz4", archive_name); + Assert(sink->bbs_next != NULL); + bbsink_begin_archive(sink->bbs_next, lz4_archive_name); + pfree(lz4_archive_name); +} + +/* + * Compress the input data to the output buffer until we run out of input + * data. Each time the output buffer falls below the compression bound for + * the input buffer, invoke the archive_contents() method for then next sink. + * + * Note that since we're compressing the input, it may very commonly happen + * that we consume all the input data without filling the output buffer. In + * that case, the compressed representation of the current input data won't + * actually be sent to the next bbsink until a later call to this function, + * or perhaps even not until bbsink_lz4_end_archive() is invoked. + */ +static void +bbsink_lz4_archive_contents(bbsink *sink, size_t avail_in) +{ + bbsink_lz4 *mysink = (bbsink_lz4 *) sink; + size_t compressedSize; + size_t avail_in_bound; + + avail_in_bound = LZ4F_compressBound(avail_in, &mysink->prefs); + + /* + * If the number of available bytes has fallen below the value computed by + * LZ4F_compressBound(), ask the next sink to process the data so that we + * can empty the buffer. + */ + if ((mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written) <= + avail_in_bound) + { + bbsink_archive_contents(sink->bbs_next, mysink->bytes_written); + mysink->bytes_written = 0; + } + + /* + * Compress the input buffer and write it into the output buffer. + */ + compressedSize = LZ4F_compressUpdate(mysink->ctx, + mysink->base.bbs_next->bbs_buffer + mysink->bytes_written, + mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written, + (uint8 *) mysink->base.bbs_buffer, + avail_in, + NULL); + + if (LZ4F_isError(compressedSize)) + elog(ERROR, "could not compress data: %s", + LZ4F_getErrorName(compressedSize)); + + /* + * Update our notion of how many bytes we've written into output buffer. + */ + mysink->bytes_written += compressedSize; +} + +/* + * There might be some data inside lz4's internal buffers; we need to get + * that flushed out and also finalize the lz4 frame and then get that forwarded + * to the successor sink as archive content. + * + * Then we can end processing for this archive. + */ +static void +bbsink_lz4_end_archive(bbsink *sink) +{ + bbsink_lz4 *mysink = (bbsink_lz4 *) sink; + size_t compressedSize; + size_t lz4_footer_bound; + + lz4_footer_bound = LZ4F_compressBound(0, &mysink->prefs); + + Assert(mysink->base.bbs_next->bbs_buffer_length >= lz4_footer_bound); + + if ((mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written) <= + lz4_footer_bound) + { + bbsink_archive_contents(sink->bbs_next, mysink->bytes_written); + mysink->bytes_written = 0; + } + + compressedSize = LZ4F_compressEnd(mysink->ctx, + mysink->base.bbs_next->bbs_buffer + mysink->bytes_written, + mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written, + NULL); + + if (LZ4F_isError(compressedSize)) + elog(ERROR, "could not end lz4 compression: %s", + LZ4F_getErrorName(compressedSize)); + + /* Update our notion of how many bytes we've written. */ + mysink->bytes_written += compressedSize; + + /* Send whatever accumulated output bytes we have. */ + bbsink_archive_contents(sink->bbs_next, mysink->bytes_written); + mysink->bytes_written = 0; + + /* Release the resources. */ + LZ4F_freeCompressionContext(mysink->ctx); + mysink->ctx = NULL; + + /* Pass on the information that this archive has ended. */ + bbsink_forward_end_archive(sink); +} + +/* + * Manifest contents are not compressed, but we do need to copy them into + * the successor sink's buffer, because we have our own. + */ +static void +bbsink_lz4_manifest_contents(bbsink *sink, size_t len) +{ + memcpy(sink->bbs_next->bbs_buffer, sink->bbs_buffer, len); + bbsink_manifest_contents(sink->bbs_next, len); +} + +/* + * In case the backup fails, make sure we free the compression context by + * calling LZ4F_freeCompressionContext() if needed to avoid memory leak. + */ +static void +bbsink_lz4_cleanup(bbsink *sink) +{ + bbsink_lz4 *mysink = (bbsink_lz4 *) sink; + + if (mysink->ctx) + { + LZ4F_freeCompressionContext(mysink->ctx); + mysink->ctx = NULL; + } +} + +#endif diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index c40925c1f0..923659ddee 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -391,7 +391,7 @@ usage(void) printf(_(" -X, --wal-method=none|fetch|stream\n" " include required WAL files with specified method\n")); printf(_(" -z, --gzip compress tar output\n")); - printf(_(" -Z, --compress={[{client,server}-]gzip,none}[:LEVEL] or [LEVEL]\n" + printf(_(" -Z, --compress={[{client,server}-]gzip,lz4,none}[:LEVEL] or [LEVEL]\n" " compress tar output with given compression method or level\n")); printf(_("\nGeneral options:\n")); printf(_(" -c, --checkpoint=fast|spread\n" @@ -1003,6 +1003,11 @@ parse_compress_options(char *src, WalCompressionMethod *methodres, *methodres = COMPRESSION_GZIP; *locationres = COMPRESS_LOCATION_SERVER; } + else if (pg_strcasecmp(firstpart, "server-lz4") == 0) + { + *methodres = COMPRESSION_LZ4; + *locationres = COMPRESS_LOCATION_SERVER; + } else if (pg_strcasecmp(firstpart, "none") == 0) { *methodres = COMPRESSION_NONE; @@ -1930,6 +1935,9 @@ BaseBackup(void) case COMPRESSION_GZIP: compressmethodstr = "gzip"; break; + case COMPRESSION_LZ4: + compressmethodstr = "lz4"; + break; default: Assert(false); break; @@ -2772,8 +2780,12 @@ main(int argc, char **argv) } break; case COMPRESSION_LZ4: - /* option not supported */ - Assert(false); + if (compresslevel > 12) + { + pg_log_error("compression level %d of method %s higher than maximum of 12", + compresslevel, "lz4"); + exit(1); + } break; } diff --git a/src/bin/pg_verifybackup/Makefile b/src/bin/pg_verifybackup/Makefile index 1ae818f9a1..851233a6e0 100644 --- a/src/bin/pg_verifybackup/Makefile +++ b/src/bin/pg_verifybackup/Makefile @@ -9,6 +9,7 @@ export TAR # used by the command "gzip" to pass down options, so stick with a different # name. export GZIP_PROGRAM=$(GZIP) +export LZ4=$(LZ4) subdir = src/bin/pg_verifybackup top_builddir = ../../.. diff --git a/src/bin/pg_verifybackup/t/008_untar.pl b/src/bin/pg_verifybackup/t/008_untar.pl index d32c86e92e..9d5b0e139a 100644 --- a/src/bin/pg_verifybackup/t/008_untar.pl +++ b/src/bin/pg_verifybackup/t/008_untar.pl @@ -11,7 +11,7 @@ use Config; use File::Path qw(rmtree); use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 6; +use Test::More tests => 9; my $primary = PostgreSQL::Test::Cluster->new('primary'); $primary->init(allows_streaming => 1); @@ -35,6 +35,14 @@ my @test_configuration = ( 'decompress_program' => $ENV{'GZIP_PROGRAM'}, 'decompress_flags' => [ '-d' ], 'enabled' => check_pg_config("#define HAVE_LIBZ 1") + }, + { + 'compression_method' => 'lz4', + 'backup_flags' => ['--compress', 'server-lz4'], + 'backup_archive' => 'base.tar.lz4', + 'decompress_program' => $ENV{'LZ4'}, + 'decompress_flags' => [ '-d', '-m'], + 'enabled' => check_pg_config("#define HAVE_LIBLZ4 1") } ); diff --git a/src/include/replication/basebackup_sink.h b/src/include/replication/basebackup_sink.h index 2cfa816bb8..a3f8d37258 100644 --- a/src/include/replication/basebackup_sink.h +++ b/src/include/replication/basebackup_sink.h @@ -284,6 +284,7 @@ extern void bbsink_forward_cleanup(bbsink *sink); /* Constructors for various types of sinks. */ extern bbsink *bbsink_copystream_new(bool send_to_client); extern bbsink *bbsink_gzip_new(bbsink *next, int compresslevel); +extern bbsink *bbsink_lz4_new(bbsink *next, int compresslevel); extern bbsink *bbsink_progress_new(bbsink *next, bool estimate_backup_size); extern bbsink *bbsink_server_new(bbsink *next, char *pathname); extern bbsink *bbsink_throttle_new(bbsink *next, uint32 maxrate);