305 lines
8.9 KiB
C
305 lines
8.9 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* basebackup_gzip.c
|
|
* Basebackup sink implementing gzip compression.
|
|
*
|
|
* Portions Copyright (c) 2010-2023, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/backup/basebackup_gzip.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#ifdef HAVE_LIBZ
|
|
#include <zlib.h>
|
|
#endif
|
|
|
|
#include "backup/basebackup_sink.h"
|
|
|
|
#ifdef HAVE_LIBZ
|
|
typedef struct bbsink_gzip
|
|
{
|
|
/* Common information for all types of sink. */
|
|
bbsink base;
|
|
|
|
/* Compression level. */
|
|
int compresslevel;
|
|
|
|
/* Compressed data stream. */
|
|
z_stream zstream;
|
|
|
|
/* Number of bytes staged in output buffer. */
|
|
size_t bytes_written;
|
|
} bbsink_gzip;
|
|
|
|
static void bbsink_gzip_begin_backup(bbsink *sink);
|
|
static void bbsink_gzip_begin_archive(bbsink *sink, const char *archive_name);
|
|
static void bbsink_gzip_archive_contents(bbsink *sink, size_t len);
|
|
static void bbsink_gzip_manifest_contents(bbsink *sink, size_t len);
|
|
static void bbsink_gzip_end_archive(bbsink *sink);
|
|
static void *gzip_palloc(void *opaque, unsigned items, unsigned size);
|
|
static void gzip_pfree(void *opaque, void *address);
|
|
|
|
static const bbsink_ops bbsink_gzip_ops = {
|
|
.begin_backup = bbsink_gzip_begin_backup,
|
|
.begin_archive = bbsink_gzip_begin_archive,
|
|
.archive_contents = bbsink_gzip_archive_contents,
|
|
.end_archive = bbsink_gzip_end_archive,
|
|
.begin_manifest = bbsink_forward_begin_manifest,
|
|
.manifest_contents = bbsink_gzip_manifest_contents,
|
|
.end_manifest = bbsink_forward_end_manifest,
|
|
.end_backup = bbsink_forward_end_backup,
|
|
.cleanup = bbsink_forward_cleanup
|
|
};
|
|
#endif
|
|
|
|
/*
|
|
* Create a new basebackup sink that performs gzip compression.
|
|
*/
|
|
bbsink *
|
|
bbsink_gzip_new(bbsink *next, pg_compress_specification *compress)
|
|
{
|
|
#ifndef HAVE_LIBZ
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("gzip compression is not supported by this build")));
|
|
return NULL; /* keep compiler quiet */
|
|
#else
|
|
bbsink_gzip *sink;
|
|
int compresslevel;
|
|
|
|
Assert(next != NULL);
|
|
|
|
compresslevel = compress->level;
|
|
Assert((compresslevel >= 1 && compresslevel <= 9) ||
|
|
compresslevel == Z_DEFAULT_COMPRESSION);
|
|
|
|
sink = palloc0(sizeof(bbsink_gzip));
|
|
*((const bbsink_ops **) &sink->base.bbs_ops) = &bbsink_gzip_ops;
|
|
sink->base.bbs_next = next;
|
|
sink->compresslevel = compresslevel;
|
|
|
|
return &sink->base;
|
|
#endif
|
|
}
|
|
|
|
#ifdef HAVE_LIBZ
|
|
|
|
/*
|
|
* Begin backup.
|
|
*/
|
|
static void
|
|
bbsink_gzip_begin_backup(bbsink *sink)
|
|
{
|
|
/*
|
|
* We need our own buffer, because we're going to pass different data to
|
|
* the next sink than what gets passed to us.
|
|
*/
|
|
sink->bbs_buffer = palloc(sink->bbs_buffer_length);
|
|
|
|
/*
|
|
* Since deflate() doesn't require the output buffer to be of any
|
|
* particular size, we can just make it the same size as the input buffer.
|
|
*/
|
|
bbsink_begin_backup(sink->bbs_next, sink->bbs_state,
|
|
sink->bbs_buffer_length);
|
|
}
|
|
|
|
/*
|
|
* Prepare to compress the next archive.
|
|
*/
|
|
static void
|
|
bbsink_gzip_begin_archive(bbsink *sink, const char *archive_name)
|
|
{
|
|
bbsink_gzip *mysink = (bbsink_gzip *) sink;
|
|
char *gz_archive_name;
|
|
z_stream *zs = &mysink->zstream;
|
|
|
|
/* Initialize compressor object. */
|
|
memset(zs, 0, sizeof(z_stream));
|
|
zs->zalloc = gzip_palloc;
|
|
zs->zfree = gzip_pfree;
|
|
zs->next_out = (uint8 *) sink->bbs_next->bbs_buffer;
|
|
zs->avail_out = sink->bbs_next->bbs_buffer_length;
|
|
|
|
/*
|
|
* We need to use deflateInit2() rather than deflateInit() here so that we
|
|
* can request a gzip header rather than a zlib header. Otherwise, we want
|
|
* to supply the same values that would have been used by default if we
|
|
* had just called deflateInit().
|
|
*
|
|
* Per the documentation for deflateInit2, the third argument must be
|
|
* Z_DEFLATED; the fourth argument is the number of "window bits", by
|
|
* default 15, but adding 16 gets you a gzip header rather than a zlib
|
|
* header; the fifth argument controls memory usage, and 8 is the default;
|
|
* and likewise Z_DEFAULT_STRATEGY is the default for the sixth argument.
|
|
*/
|
|
if (deflateInit2(zs, mysink->compresslevel, Z_DEFLATED, 15 + 16, 8,
|
|
Z_DEFAULT_STRATEGY) != Z_OK)
|
|
ereport(ERROR,
|
|
errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("could not initialize compression library"));
|
|
|
|
/*
|
|
* Add ".gz" to the archive name. Note that the pg_basebackup -z produces
|
|
* archives named ".tar.gz" rather than ".tgz", so we match that here.
|
|
*/
|
|
gz_archive_name = psprintf("%s.gz", archive_name);
|
|
Assert(sink->bbs_next != NULL);
|
|
bbsink_begin_archive(sink->bbs_next, gz_archive_name);
|
|
pfree(gz_archive_name);
|
|
}
|
|
|
|
/*
|
|
* Compress the input data to the output buffer until we run out of input
|
|
* data. Each time the output buffer fills up, invoke the archive_contents()
|
|
* method for then next sink.
|
|
*
|
|
* Note that since we're compressing the input, it may very commonly happen
|
|
* that we consume all the input data without filling the output buffer. In
|
|
* that case, the compressed representation of the current input data won't
|
|
* actually be sent to the next bbsink until a later call to this function,
|
|
* or perhaps even not until bbsink_gzip_end_archive() is invoked.
|
|
*/
|
|
static void
|
|
bbsink_gzip_archive_contents(bbsink *sink, size_t len)
|
|
{
|
|
bbsink_gzip *mysink = (bbsink_gzip *) sink;
|
|
z_stream *zs = &mysink->zstream;
|
|
|
|
/* Compress data from input buffer. */
|
|
zs->next_in = (uint8 *) mysink->base.bbs_buffer;
|
|
zs->avail_in = len;
|
|
|
|
while (zs->avail_in > 0)
|
|
{
|
|
int res;
|
|
|
|
/* Write output data into unused portion of output buffer. */
|
|
Assert(mysink->bytes_written < mysink->base.bbs_next->bbs_buffer_length);
|
|
zs->next_out = (uint8 *)
|
|
mysink->base.bbs_next->bbs_buffer + mysink->bytes_written;
|
|
zs->avail_out =
|
|
mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written;
|
|
|
|
/*
|
|
* Try to compress. Note that this will update zs->next_in and
|
|
* zs->avail_in according to how much input data was consumed, and
|
|
* zs->next_out and zs->avail_out according to how many output bytes
|
|
* were produced.
|
|
*
|
|
* According to the zlib documentation, Z_STREAM_ERROR should only
|
|
* occur if we've made a programming error, or if say there's been a
|
|
* memory clobber; we use elog() rather than Assert() here out of an
|
|
* abundance of caution.
|
|
*/
|
|
res = deflate(zs, Z_NO_FLUSH);
|
|
if (res == Z_STREAM_ERROR)
|
|
elog(ERROR, "could not compress data: %s", zs->msg);
|
|
|
|
/* Update our notion of how many bytes we've written. */
|
|
mysink->bytes_written =
|
|
mysink->base.bbs_next->bbs_buffer_length - zs->avail_out;
|
|
|
|
/*
|
|
* If the output buffer is full, it's time for the next sink to
|
|
* process the contents.
|
|
*/
|
|
if (mysink->bytes_written >= mysink->base.bbs_next->bbs_buffer_length)
|
|
{
|
|
bbsink_archive_contents(sink->bbs_next, mysink->bytes_written);
|
|
mysink->bytes_written = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* There might be some data inside zlib's internal buffers; we need to get
|
|
* that flushed out and forwarded to the successor sink as archive content.
|
|
*
|
|
* Then we can end processing for this archive.
|
|
*/
|
|
static void
|
|
bbsink_gzip_end_archive(bbsink *sink)
|
|
{
|
|
bbsink_gzip *mysink = (bbsink_gzip *) sink;
|
|
z_stream *zs = &mysink->zstream;
|
|
|
|
/* There is no more data available. */
|
|
zs->next_in = (uint8 *) mysink->base.bbs_buffer;
|
|
zs->avail_in = 0;
|
|
|
|
while (1)
|
|
{
|
|
int res;
|
|
|
|
/* Write output data into unused portion of output buffer. */
|
|
Assert(mysink->bytes_written < mysink->base.bbs_next->bbs_buffer_length);
|
|
zs->next_out = (uint8 *)
|
|
mysink->base.bbs_next->bbs_buffer + mysink->bytes_written;
|
|
zs->avail_out =
|
|
mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written;
|
|
|
|
/*
|
|
* As bbsink_gzip_archive_contents, but pass Z_FINISH since there is
|
|
* no more input.
|
|
*/
|
|
res = deflate(zs, Z_FINISH);
|
|
if (res == Z_STREAM_ERROR)
|
|
elog(ERROR, "could not compress data: %s", zs->msg);
|
|
|
|
/* Update our notion of how many bytes we've written. */
|
|
mysink->bytes_written =
|
|
mysink->base.bbs_next->bbs_buffer_length - zs->avail_out;
|
|
|
|
/*
|
|
* Apparently we had no data in the output buffer and deflate() was
|
|
* not able to add any. We must be done.
|
|
*/
|
|
if (mysink->bytes_written == 0)
|
|
break;
|
|
|
|
/* Send whatever accumulated output bytes we have. */
|
|
bbsink_archive_contents(sink->bbs_next, mysink->bytes_written);
|
|
mysink->bytes_written = 0;
|
|
}
|
|
|
|
/* Must also pass on the information that this archive has ended. */
|
|
bbsink_forward_end_archive(sink);
|
|
}
|
|
|
|
/*
|
|
* Manifest contents are not compressed, but we do need to copy them into
|
|
* the successor sink's buffer, because we have our own.
|
|
*/
|
|
static void
|
|
bbsink_gzip_manifest_contents(bbsink *sink, size_t len)
|
|
{
|
|
memcpy(sink->bbs_next->bbs_buffer, sink->bbs_buffer, len);
|
|
bbsink_manifest_contents(sink->bbs_next, len);
|
|
}
|
|
|
|
/*
|
|
* Wrapper function to adjust the signature of palloc to match what libz
|
|
* expects.
|
|
*/
|
|
static void *
|
|
gzip_palloc(void *opaque, unsigned items, unsigned size)
|
|
{
|
|
return palloc(items * size);
|
|
}
|
|
|
|
/*
|
|
* Wrapper function to adjust the signature of pfree to match what libz
|
|
* expects.
|
|
*/
|
|
static void
|
|
gzip_pfree(void *opaque, void *address)
|
|
{
|
|
pfree(address);
|
|
}
|
|
|
|
#endif
|