Introduce a generic pg_dump compression API

Switch pg_dump to use the Compression API, implemented by bf9aa490db.

The CompressFileHandle replaces the cfp* family of functions with a
struct of callbacks for accessing (compressed) files. This allows adding
new compression methods simply by introducing a new struct instance with
appropriate implementation of the callbacks.

Archives compressed using custom compression methods store an identifier
of the compression algorithm in their header instead of the compression
level. The header version is bumped.

Author: Georgios Kokolatos
Reviewed-by: Michael Paquier, Rachel Heaton, Justin Pryzby, Tomas Vondra
Discussion: https://postgr.es/m/faUNEOpts9vunEaLnmxmG-DldLSg_ql137OC3JYDmgrOMHm1RvvWY2IdBkv_CRxm5spCCb_OmKNk2T03TMm0fBEWveFF9wA1WizPuAgB7Ss%3D%40protonmail.com
This commit is contained in:
Tomas Vondra 2023-02-23 18:33:30 +01:00
parent 739f1d6218
commit e9960732a9
16 changed files with 1090 additions and 791 deletions

View File

@ -24,7 +24,9 @@ LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
OBJS = \
$(WIN32RES) \
compress_gzip.o \
compress_io.o \
compress_none.o \
dumputils.o \
parallel.o \
pg_backup_archiver.o \

View File

@ -0,0 +1,401 @@
/*-------------------------------------------------------------------------
*
* compress_gzip.c
* Routines for archivers to read or write a gzip compressed data stream.
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/bin/pg_dump/compress_gzip.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <unistd.h>
#include "compress_gzip.h"
#include "pg_backup_utils.h"
#ifdef HAVE_LIBZ
#include "zlib.h"
/*----------------------
* Compressor API
*----------------------
*/
typedef struct GzipCompressorState
{
z_streamp zp;
void *outbuf;
size_t outsize;
} GzipCompressorState;
/* Private routines that support gzip compressed data I/O */
static void
DeflateCompressorGzip(ArchiveHandle *AH, CompressorState *cs, bool flush)
{
GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
z_streamp zp = gzipcs->zp;
void *out = gzipcs->outbuf;
int res = Z_OK;
while (gzipcs->zp->avail_in != 0 || flush)
{
res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH);
if (res == Z_STREAM_ERROR)
pg_fatal("could not compress data: %s", zp->msg);
if ((flush && (zp->avail_out < gzipcs->outsize))
|| (zp->avail_out == 0)
|| (zp->avail_in != 0)
)
{
/*
* Extra paranoia: avoid zero-length chunks, since a zero length
* chunk is the EOF marker in the custom format. This should never
* happen but ...
*/
if (zp->avail_out < gzipcs->outsize)
{
/*
* Any write function should do its own error checking but to
* make sure we do a check here as well ...
*/
size_t len = gzipcs->outsize - zp->avail_out;
cs->writeF(AH, (char *) out, len);
}
zp->next_out = out;
zp->avail_out = gzipcs->outsize;
}
if (res == Z_STREAM_END)
break;
}
}
static void
EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs)
{
GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
z_streamp zp;
if (gzipcs->zp)
{
zp = gzipcs->zp;
zp->next_in = NULL;
zp->avail_in = 0;
/* Flush any remaining data from zlib buffer */
DeflateCompressorGzip(AH, cs, true);
if (deflateEnd(zp) != Z_OK)
pg_fatal("could not close compression stream: %s", zp->msg);
pg_free(gzipcs->outbuf);
pg_free(gzipcs->zp);
}
pg_free(gzipcs);
cs->private_data = NULL;
}
static void
WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs,
const void *data, size_t dLen)
{
GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
z_streamp zp;
if (!gzipcs->zp)
{
zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
zp->zalloc = Z_NULL;
zp->zfree = Z_NULL;
zp->opaque = Z_NULL;
/*
* outsize is the buffer size we tell zlib it can output to. We
* actually allocate one extra byte because some routines want to
* append a trailing zero byte to the zlib output.
*/
gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1);
gzipcs->outsize = ZLIB_OUT_SIZE;
/*
* A level of zero simply copies the input one block at the time. This
* is probably not what the user wanted when calling this interface.
*/
if (cs->compression_spec.level == 0)
pg_fatal("requested to compress the archive yet no level was specified");
if (deflateInit(zp, cs->compression_spec.level) != Z_OK)
pg_fatal("could not initialize compression library: %s", zp->msg);
/* Just be paranoid - maybe End is called after Start, with no Write */
zp->next_out = gzipcs->outbuf;
zp->avail_out = gzipcs->outsize;
}
gzipcs->zp->next_in = (void *) unconstify(void *, data);
gzipcs->zp->avail_in = dLen;
DeflateCompressorGzip(AH, cs, false);
}
static void
ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs)
{
z_streamp zp;
char *out;
int res = Z_OK;
size_t cnt;
char *buf;
size_t buflen;
zp = (z_streamp) pg_malloc(sizeof(z_stream));
zp->zalloc = Z_NULL;
zp->zfree = Z_NULL;
zp->opaque = Z_NULL;
buf = pg_malloc(ZLIB_IN_SIZE);
buflen = ZLIB_IN_SIZE;
out = pg_malloc(ZLIB_OUT_SIZE + 1);
if (inflateInit(zp) != Z_OK)
pg_fatal("could not initialize compression library: %s",
zp->msg);
/* no minimal chunk size for zlib */
while ((cnt = cs->readF(AH, &buf, &buflen)))
{
zp->next_in = (void *) buf;
zp->avail_in = cnt;
while (zp->avail_in > 0)
{
zp->next_out = (void *) out;
zp->avail_out = ZLIB_OUT_SIZE;
res = inflate(zp, 0);
if (res != Z_OK && res != Z_STREAM_END)
pg_fatal("could not uncompress data: %s", zp->msg);
out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
}
}
zp->next_in = NULL;
zp->avail_in = 0;
while (res != Z_STREAM_END)
{
zp->next_out = (void *) out;
zp->avail_out = ZLIB_OUT_SIZE;
res = inflate(zp, 0);
if (res != Z_OK && res != Z_STREAM_END)
pg_fatal("could not uncompress data: %s", zp->msg);
out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
}
if (inflateEnd(zp) != Z_OK)
pg_fatal("could not close compression library: %s", zp->msg);
free(buf);
free(out);
free(zp);
}
/* Public routines that support gzip compressed data I/O */
void
InitCompressorGzip(CompressorState *cs,
const pg_compress_specification compression_spec)
{
GzipCompressorState *gzipcs;
cs->readData = ReadDataFromArchiveGzip;
cs->writeData = WriteDataToArchiveGzip;
cs->end = EndCompressorGzip;
cs->compression_spec = compression_spec;
gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState));
cs->private_data = gzipcs;
}
/*----------------------
* Compress File API
*----------------------
*/
static size_t
Gzip_read(void *ptr, size_t size, CompressFileHandle *CFH)
{
gzFile gzfp = (gzFile) CFH->private_data;
size_t ret;
ret = gzread(gzfp, ptr, size);
if (ret != size && !gzeof(gzfp))
{
int errnum;
const char *errmsg = gzerror(gzfp, &errnum);
pg_fatal("could not read from input file: %s",
errnum == Z_ERRNO ? strerror(errno) : errmsg);
}
return ret;
}
static size_t
Gzip_write(const void *ptr, size_t size, CompressFileHandle *CFH)
{
gzFile gzfp = (gzFile) CFH->private_data;
return gzwrite(gzfp, ptr, size);
}
static int
Gzip_getc(CompressFileHandle *CFH)
{
gzFile gzfp = (gzFile) CFH->private_data;
int ret;
errno = 0;
ret = gzgetc(gzfp);
if (ret == EOF)
{
if (!gzeof(gzfp))
pg_fatal("could not read from input file: %s", strerror(errno));
else
pg_fatal("could not read from input file: end of file");
}
return ret;
}
static char *
Gzip_gets(char *ptr, int size, CompressFileHandle *CFH)
{
gzFile gzfp = (gzFile) CFH->private_data;
return gzgets(gzfp, ptr, size);
}
static int
Gzip_close(CompressFileHandle *CFH)
{
gzFile gzfp = (gzFile) CFH->private_data;
CFH->private_data = NULL;
return gzclose(gzfp);
}
static int
Gzip_eof(CompressFileHandle *CFH)
{
gzFile gzfp = (gzFile) CFH->private_data;
return gzeof(gzfp);
}
static const char *
Gzip_get_error(CompressFileHandle *CFH)
{
gzFile gzfp = (gzFile) CFH->private_data;
const char *errmsg;
int errnum;
errmsg = gzerror(gzfp, &errnum);
if (errnum == Z_ERRNO)
errmsg = strerror(errno);
return errmsg;
}
static int
Gzip_open(const char *path, int fd, const char *mode, CompressFileHandle *CFH)
{
gzFile gzfp;
char mode_compression[32];
if (CFH->compression_spec.level != Z_DEFAULT_COMPRESSION)
{
/*
* user has specified a compression level, so tell zlib to use it
*/
snprintf(mode_compression, sizeof(mode_compression), "%s%d",
mode, CFH->compression_spec.level);
}
else
strcpy(mode_compression, mode);
if (fd >= 0)
gzfp = gzdopen(dup(fd), mode_compression);
else
gzfp = gzopen(path, mode_compression);
if (gzfp == NULL)
return 1;
CFH->private_data = gzfp;
return 0;
}
static int
Gzip_open_write(const char *path, const char *mode, CompressFileHandle *CFH)
{
char *fname;
int ret;
int save_errno;
fname = psprintf("%s.gz", path);
ret = CFH->open_func(fname, -1, mode, CFH);
save_errno = errno;
pg_free(fname);
errno = save_errno;
return ret;
}
void
InitCompressFileHandleGzip(CompressFileHandle *CFH,
const pg_compress_specification compression_spec)
{
CFH->open_func = Gzip_open;
CFH->open_write_func = Gzip_open_write;
CFH->read_func = Gzip_read;
CFH->write_func = Gzip_write;
CFH->gets_func = Gzip_gets;
CFH->getc_func = Gzip_getc;
CFH->close_func = Gzip_close;
CFH->eof_func = Gzip_eof;
CFH->get_error_func = Gzip_get_error;
CFH->compression_spec = compression_spec;
CFH->private_data = NULL;
}
#else /* HAVE_LIBZ */
void
InitCompressorGzip(CompressorState *cs,
const pg_compress_specification compression_spec)
{
pg_fatal("this build does not support compression with %s", "gzip");
}
void
InitCompressFileHandleGzip(CompressFileHandle *CFH,
const pg_compress_specification compression_spec)
{
pg_fatal("this build does not support compression with %s", "gzip");
}
#endif /* HAVE_LIBZ */

View File

@ -0,0 +1,24 @@
/*-------------------------------------------------------------------------
*
* compress_gzip.h
* GZIP interface to compress_io.c routines
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/bin/pg_dump/compress_gzip.h
*
*-------------------------------------------------------------------------
*/
#ifndef _COMPRESS_GZIP_H_
#define _COMPRESS_GZIP_H_
#include "compress_io.h"
extern void InitCompressorGzip(CompressorState *cs,
const pg_compress_specification compression_spec);
extern void InitCompressFileHandleGzip(CompressFileHandle *CFH,
const pg_compress_specification compression_spec);
#endif /* _COMPRESS_GZIP_H_ */

View File

@ -9,42 +9,51 @@
*
* This file includes two APIs for dealing with compressed data. The first
* provides more flexibility, using callbacks to read/write data from the
* underlying stream. The second API is a wrapper around fopen/gzopen and
* underlying stream. The second API is a wrapper around fopen and
* friends, providing an interface similar to those, but abstracts away
* the possible compression. Both APIs use libz for the compression, but
* the second API uses gzip headers, so the resulting files can be easily
* manipulated with the gzip utility.
* the possible compression. The second API is aimed for the resulting
* files to be easily manipulated with an external compression utility
* program.
*
* Compressor API
* --------------
*
* The interface for writing to an archive consists of three functions:
* AllocateCompressor, WriteDataToArchive and EndCompressor. First you call
* AllocateCompressor, then write all the data by calling WriteDataToArchive
* as many times as needed, and finally EndCompressor. WriteDataToArchive
* and EndCompressor will call the WriteFunc that was provided to
* AllocateCompressor for each chunk of compressed data.
* AllocateCompressor, writeData, and EndCompressor. First you call
* AllocateCompressor, then write all the data by calling writeData as many
* times as needed, and finally EndCompressor. writeData will call the
* WriteFunc that was provided to AllocateCompressor for each chunk of
* compressed data.
*
* The interface for reading an archive consists of just one function:
* ReadDataFromArchive. ReadDataFromArchive reads the whole compressed input
* stream, by repeatedly calling the given ReadFunc. ReadFunc returns the
* compressed data chunk at a time, and ReadDataFromArchive decompresses it
* and passes the decompressed data to ahwrite(), until ReadFunc returns 0
* to signal EOF.
*
* The interface is the same for compressed and uncompressed streams.
* The interface for reading an archive consists of the same three functions:
* AllocateCompressor, readData, and EndCompressor. First you call
* AllocateCompressor, then read all the data by calling readData to read the
* whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
* returns the compressed data one chunk at a time. Then readData decompresses
* it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
* to signal EOF. The interface is the same for compressed and uncompressed
* streams.
*
* Compressed stream API
* ----------------------
*
* The compressed stream API is a wrapper around the C standard fopen() and
* libz's gzopen() APIs. It allows you to use the same functions for
* compressed and uncompressed streams. cfopen_read() first tries to open
* the file with given name, and if it fails, it tries to open the same
* file with the .gz suffix. cfopen_write() opens a file for writing, an
* extra argument specifies if the file should be compressed, and adds the
* .gz suffix to the filename if so. This allows you to easily handle both
* compressed and uncompressed files.
* The compressed stream API is providing a set of function pointers for
* opening, reading, writing, and finally closing files. The implemented
* function pointers are documented in the corresponding header file and are
* common for all streams. It allows the caller to use the same functions for
* both compressed and uncompressed streams.
*
* The interface consists of three functions, InitCompressFileHandle,
* InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
* compression is known, then start by calling InitCompressFileHandle,
* otherwise discover it by using InitDiscoverCompressFileHandle. Then call
* the function pointers as required for the read/write operations. Finally
* call EndCompressFileHandle to end the stream.
*
* InitDiscoverCompressFileHandle tries to infer the compression by the
* filename suffix. If the suffix is not yet known then it tries to simply
* open the file and if it fails, it tries to open the same file with the .gz
* suffix.
*
* IDENTIFICATION
* src/bin/pg_dump/compress_io.c
@ -53,12 +62,13 @@
*/
#include "postgres_fe.h"
#include "compress_io.h"
#include "pg_backup_utils.h"
#include <sys/stat.h>
#include <unistd.h>
#ifdef HAVE_LIBZ
#include <zlib.h>
#endif
#include "compress_gzip.h"
#include "compress_io.h"
#include "compress_none.h"
#include "pg_backup_utils.h"
/*----------------------
* Generic functions
@ -96,663 +106,45 @@ supports_compression(const pg_compress_specification compression_spec)
*----------------------
*/
/* typedef appears in compress_io.h */
struct CompressorState
{
pg_compress_specification compression_spec;
WriteFunc writeF;
#ifdef HAVE_LIBZ
z_streamp zp;
char *zlibOut;
size_t zlibOutSize;
#endif
};
/* Routines that support zlib compressed data I/O */
#ifdef HAVE_LIBZ
static void InitCompressorZlib(CompressorState *cs, int level);
static void DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs,
bool flush);
static void ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF);
static void WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
const char *data, size_t dLen);
static void EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs);
#endif
/* Routines that support uncompressed data I/O */
static void ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF);
static void WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
const char *data, size_t dLen);
/* Public interface routines */
/* Allocate a new compressor */
/*
* Allocate a new compressor.
*/
CompressorState *
AllocateCompressor(const pg_compress_specification compression_spec,
WriteFunc writeF)
ReadFunc readF, WriteFunc writeF)
{
CompressorState *cs;
#ifndef HAVE_LIBZ
if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
pg_fatal("this build does not support compression with %s", "gzip");
#endif
cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
cs->readF = readF;
cs->writeF = writeF;
cs->compression_spec = compression_spec;
/*
* Perform compression algorithm specific initialization.
*/
#ifdef HAVE_LIBZ
if (cs->compression_spec.algorithm == PG_COMPRESSION_GZIP)
InitCompressorZlib(cs, cs->compression_spec.level);
#endif
if (compression_spec.algorithm == PG_COMPRESSION_NONE)
InitCompressorNone(cs, compression_spec);
else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
InitCompressorGzip(cs, compression_spec);
return cs;
}
/*
* Read all compressed data from the input stream (via readF) and print it
* out with ahwrite().
*/
void
ReadDataFromArchive(ArchiveHandle *AH,
const pg_compress_specification compression_spec,
ReadFunc readF)
{
if (compression_spec.algorithm == PG_COMPRESSION_NONE)
ReadDataFromArchiveNone(AH, readF);
if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
{
#ifdef HAVE_LIBZ
ReadDataFromArchiveZlib(AH, readF);
#else
pg_fatal("this build does not support compression with %s", "gzip");
#endif
}
}
/*
* Compress and write data to the output stream (via writeF).
*/
void
WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs,
const void *data, size_t dLen)
{
switch (cs->compression_spec.algorithm)
{
case PG_COMPRESSION_GZIP:
#ifdef HAVE_LIBZ
WriteDataToArchiveZlib(AH, cs, data, dLen);
#else
pg_fatal("this build does not support compression with %s", "gzip");
#endif
break;
case PG_COMPRESSION_NONE:
WriteDataToArchiveNone(AH, cs, data, dLen);
break;
case PG_COMPRESSION_LZ4:
/* fallthrough */
case PG_COMPRESSION_ZSTD:
pg_fatal("invalid compression method");
break;
}
}
/*
* Terminate compression library context and flush its buffers.
*/
void
EndCompressor(ArchiveHandle *AH, CompressorState *cs)
{
#ifdef HAVE_LIBZ
if (cs->compression_spec.algorithm == PG_COMPRESSION_GZIP)
EndCompressorZlib(AH, cs);
#endif
free(cs);
cs->end(AH, cs);
pg_free(cs);
}
/* Private routines, specific to each compression method. */
#ifdef HAVE_LIBZ
/*
* Functions for zlib compressed output.
*/
static void
InitCompressorZlib(CompressorState *cs, int level)
{
z_streamp zp;
zp = cs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
zp->zalloc = Z_NULL;
zp->zfree = Z_NULL;
zp->opaque = Z_NULL;
/*
* zlibOutSize is the buffer size we tell zlib it can output to. We
* actually allocate one extra byte because some routines want to append a
* trailing zero byte to the zlib output.
*/
cs->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1);
cs->zlibOutSize = ZLIB_OUT_SIZE;
if (deflateInit(zp, level) != Z_OK)
pg_fatal("could not initialize compression library: %s",
zp->msg);
/* Just be paranoid - maybe End is called after Start, with no Write */
zp->next_out = (void *) cs->zlibOut;
zp->avail_out = cs->zlibOutSize;
}
static void
EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs)
{
z_streamp zp = cs->zp;
zp->next_in = NULL;
zp->avail_in = 0;
/* Flush any remaining data from zlib buffer */
DeflateCompressorZlib(AH, cs, true);
if (deflateEnd(zp) != Z_OK)
pg_fatal("could not close compression stream: %s", zp->msg);
free(cs->zlibOut);
free(cs->zp);
}
static void
DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
{
z_streamp zp = cs->zp;
char *out = cs->zlibOut;
int res = Z_OK;
while (cs->zp->avail_in != 0 || flush)
{
res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH);
if (res == Z_STREAM_ERROR)
pg_fatal("could not compress data: %s", zp->msg);
if ((flush && (zp->avail_out < cs->zlibOutSize))
|| (zp->avail_out == 0)
|| (zp->avail_in != 0)
)
{
/*
* Extra paranoia: avoid zero-length chunks, since a zero length
* chunk is the EOF marker in the custom format. This should never
* happen but...
*/
if (zp->avail_out < cs->zlibOutSize)
{
/*
* Any write function should do its own error checking but to
* make sure we do a check here as well...
*/
size_t len = cs->zlibOutSize - zp->avail_out;
cs->writeF(AH, out, len);
}
zp->next_out = (void *) out;
zp->avail_out = cs->zlibOutSize;
}
if (res == Z_STREAM_END)
break;
}
}
static void
WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
const char *data, size_t dLen)
{
cs->zp->next_in = (void *) unconstify(char *, data);
cs->zp->avail_in = dLen;
DeflateCompressorZlib(AH, cs, false);
}
static void
ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
{
z_streamp zp;
char *out;
int res = Z_OK;
size_t cnt;
char *buf;
size_t buflen;
zp = (z_streamp) pg_malloc(sizeof(z_stream));
zp->zalloc = Z_NULL;
zp->zfree = Z_NULL;
zp->opaque = Z_NULL;
buf = pg_malloc(ZLIB_IN_SIZE);
buflen = ZLIB_IN_SIZE;
out = pg_malloc(ZLIB_OUT_SIZE + 1);
if (inflateInit(zp) != Z_OK)
pg_fatal("could not initialize compression library: %s",
zp->msg);
/* no minimal chunk size for zlib */
while ((cnt = readF(AH, &buf, &buflen)))
{
zp->next_in = (void *) buf;
zp->avail_in = cnt;
while (zp->avail_in > 0)
{
zp->next_out = (void *) out;
zp->avail_out = ZLIB_OUT_SIZE;
res = inflate(zp, 0);
if (res != Z_OK && res != Z_STREAM_END)
pg_fatal("could not uncompress data: %s", zp->msg);
out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
}
}
zp->next_in = NULL;
zp->avail_in = 0;
while (res != Z_STREAM_END)
{
zp->next_out = (void *) out;
zp->avail_out = ZLIB_OUT_SIZE;
res = inflate(zp, 0);
if (res != Z_OK && res != Z_STREAM_END)
pg_fatal("could not uncompress data: %s", zp->msg);
out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
}
if (inflateEnd(zp) != Z_OK)
pg_fatal("could not close compression library: %s", zp->msg);
free(buf);
free(out);
free(zp);
}
#endif /* HAVE_LIBZ */
/*
* Functions for uncompressed output.
*/
static void
ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF)
{
size_t cnt;
char *buf;
size_t buflen;
buf = pg_malloc(ZLIB_OUT_SIZE);
buflen = ZLIB_OUT_SIZE;
while ((cnt = readF(AH, &buf, &buflen)))
{
ahwrite(buf, 1, cnt, AH);
}
free(buf);
}
static void
WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
const char *data, size_t dLen)
{
cs->writeF(AH, data, dLen);
}
/*----------------------
* Compressed stream API
*----------------------
*/
/*
* cfp represents an open stream, wrapping the underlying FILE or gzFile
* pointer. This is opaque to the callers.
* Private routines
*/
struct cfp
{
FILE *uncompressedfp;
#ifdef HAVE_LIBZ
gzFile compressedfp;
#endif
};
#ifdef HAVE_LIBZ
static int hasSuffix(const char *filename, const char *suffix);
#endif
/* free() without changing errno; useful in several places below */
static void
free_keep_errno(void *p)
{
int save_errno = errno;
free(p);
errno = save_errno;
}
/*
* Open a file for reading. 'path' is the file to open, and 'mode' should
* be either "r" or "rb".
*
* If the file at 'path' does not exist, we append the ".gz" suffix (if 'path'
* doesn't already have it) and try again. So if you pass "foo" as 'path',
* this will open either "foo" or "foo.gz".
*
* On failure, return NULL with an error code in errno.
*/
cfp *
cfopen_read(const char *path, const char *mode)
{
cfp *fp;
pg_compress_specification compression_spec = {0};
#ifdef HAVE_LIBZ
if (hasSuffix(path, ".gz"))
{
compression_spec.algorithm = PG_COMPRESSION_GZIP;
fp = cfopen(path, mode, compression_spec);
}
else
#endif
{
compression_spec.algorithm = PG_COMPRESSION_NONE;
fp = cfopen(path, mode, compression_spec);
#ifdef HAVE_LIBZ
if (fp == NULL)
{
char *fname;
fname = psprintf("%s.gz", path);
compression_spec.algorithm = PG_COMPRESSION_GZIP;
fp = cfopen(fname, mode, compression_spec);
free_keep_errno(fname);
}
#endif
}
return fp;
}
/*
* Open a file for writing. 'path' indicates the path name, and 'mode' must
* be a filemode as accepted by fopen() and gzopen() that indicates writing
* ("w", "wb", "a", or "ab").
*
* If 'compression_spec.algorithm' is GZIP, a gzip compressed stream is opened,
* and 'compression_spec.level' used. The ".gz" suffix is automatically added to
* 'path' in that case.
*
* On failure, return NULL with an error code in errno.
*/
cfp *
cfopen_write(const char *path, const char *mode,
const pg_compress_specification compression_spec)
{
cfp *fp;
if (compression_spec.algorithm == PG_COMPRESSION_NONE)
fp = cfopen(path, mode, compression_spec);
else
{
#ifdef HAVE_LIBZ
char *fname;
fname = psprintf("%s.gz", path);
fp = cfopen(fname, mode, compression_spec);
free_keep_errno(fname);
#else
pg_fatal("this build does not support compression with %s", "gzip");
fp = NULL; /* keep compiler quiet */
#endif
}
return fp;
}
/*
* This is the workhorse for cfopen() or cfdopen(). It opens file 'path' or
* associates a stream 'fd', if 'fd' is a valid descriptor, in 'mode'. The
* descriptor is not dup'ed and it is the caller's responsibility to do so.
* The caller must verify that the 'compress_algorithm' is supported by the
* current build.
*
* On failure, return NULL with an error code in errno.
*/
static cfp *
cfopen_internal(const char *path, int fd, const char *mode,
pg_compress_specification compression_spec)
{
cfp *fp = pg_malloc0(sizeof(cfp));
if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
{
#ifdef HAVE_LIBZ
if (compression_spec.level != Z_DEFAULT_COMPRESSION)
{
/* user has specified a compression level, so tell zlib to use it */
char mode_compression[32];
snprintf(mode_compression, sizeof(mode_compression), "%s%d",
mode, compression_spec.level);
if (fd >= 0)
fp->compressedfp = gzdopen(fd, mode_compression);
else
fp->compressedfp = gzopen(path, mode_compression);
}
else
{
/* don't specify a level, just use the zlib default */
if (fd >= 0)
fp->compressedfp = gzdopen(fd, mode);
else
fp->compressedfp = gzopen(path, mode);
}
if (fp->compressedfp == NULL)
{
free_keep_errno(fp);
fp = NULL;
}
#else
pg_fatal("this build does not support compression with %s", "gzip");
#endif
}
else
{
if (fd >= 0)
fp->uncompressedfp = fdopen(fd, mode);
else
fp->uncompressedfp = fopen(path, mode);
if (fp->uncompressedfp == NULL)
{
free_keep_errno(fp);
fp = NULL;
}
}
return fp;
}
/*
* Opens file 'path' in 'mode' and compression as defined in
* compression_spec. The caller must verify that the compression
* is supported by the current build.
*
* On failure, return NULL with an error code in errno.
*/
cfp *
cfopen(const char *path, const char *mode,
const pg_compress_specification compression_spec)
{
return cfopen_internal(path, -1, mode, compression_spec);
}
/*
* Associates a stream 'fd', if 'fd' is a valid descriptor, in 'mode'
* and compression as defined in compression_spec. The caller must
* verify that the compression is supported by the current build.
*
* On failure, return NULL with an error code in errno.
*/
cfp *
cfdopen(int fd, const char *mode,
const pg_compress_specification compression_spec)
{
return cfopen_internal(NULL, fd, mode, compression_spec);
}
int
cfread(void *ptr, int size, cfp *fp)
{
int ret;
if (size == 0)
return 0;
#ifdef HAVE_LIBZ
if (fp->compressedfp)
{
ret = gzread(fp->compressedfp, ptr, size);
if (ret != size && !gzeof(fp->compressedfp))
{
int errnum;
const char *errmsg = gzerror(fp->compressedfp, &errnum);
pg_fatal("could not read from input file: %s",
errnum == Z_ERRNO ? strerror(errno) : errmsg);
}
}
else
#endif
{
ret = fread(ptr, 1, size, fp->uncompressedfp);
if (ret != size && !feof(fp->uncompressedfp))
READ_ERROR_EXIT(fp->uncompressedfp);
}
return ret;
}
int
cfwrite(const void *ptr, int size, cfp *fp)
{
#ifdef HAVE_LIBZ
if (fp->compressedfp)
return gzwrite(fp->compressedfp, ptr, size);
else
#endif
return fwrite(ptr, 1, size, fp->uncompressedfp);
}
int
cfgetc(cfp *fp)
{
int ret;
#ifdef HAVE_LIBZ
if (fp->compressedfp)
{
ret = gzgetc(fp->compressedfp);
if (ret == EOF)
{
if (!gzeof(fp->compressedfp))
pg_fatal("could not read from input file: %s", strerror(errno));
else
pg_fatal("could not read from input file: end of file");
}
}
else
#endif
{
ret = fgetc(fp->uncompressedfp);
if (ret == EOF)
READ_ERROR_EXIT(fp->uncompressedfp);
}
return ret;
}
char *
cfgets(cfp *fp, char *buf, int len)
{
#ifdef HAVE_LIBZ
if (fp->compressedfp)
return gzgets(fp->compressedfp, buf, len);
else
#endif
return fgets(buf, len, fp->uncompressedfp);
}
int
cfclose(cfp *fp)
{
int result;
if (fp == NULL)
{
errno = EBADF;
return EOF;
}
#ifdef HAVE_LIBZ
if (fp->compressedfp)
{
result = gzclose(fp->compressedfp);
fp->compressedfp = NULL;
}
else
#endif
{
result = fclose(fp->uncompressedfp);
fp->uncompressedfp = NULL;
}
free_keep_errno(fp);
return result;
}
int
cfeof(cfp *fp)
{
#ifdef HAVE_LIBZ
if (fp->compressedfp)
return gzeof(fp->compressedfp);
else
#endif
return feof(fp->uncompressedfp);
}
const char *
get_cfp_error(cfp *fp)
{
#ifdef HAVE_LIBZ
if (fp->compressedfp)
{
int errnum;
const char *errmsg = gzerror(fp->compressedfp, &errnum);
if (errnum != Z_ERRNO)
return errmsg;
}
#endif
return strerror(errno);
}
#ifdef HAVE_LIBZ
static int
hasSuffix(const char *filename, const char *suffix)
{
@ -767,4 +159,113 @@ hasSuffix(const char *filename, const char *suffix)
suffixlen) == 0;
}
/* free() without changing errno; useful in several places below */
static void
free_keep_errno(void *p)
{
int save_errno = errno;
free(p);
errno = save_errno;
}
/*
* Public interface
*/
/*
* Initialize a compress file handle for the specified compression algorithm.
*/
CompressFileHandle *
InitCompressFileHandle(const pg_compress_specification compression_spec)
{
CompressFileHandle *CFH;
CFH = pg_malloc0(sizeof(CompressFileHandle));
if (compression_spec.algorithm == PG_COMPRESSION_NONE)
InitCompressFileHandleNone(CFH, compression_spec);
else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
InitCompressFileHandleGzip(CFH, compression_spec);
return CFH;
}
/*
* Open a file for reading. 'path' is the file to open, and 'mode' should
* be either "r" or "rb".
*
* If the file at 'path' contains the suffix of a supported compression method,
* currently this includes only ".gz", then this compression will be used
* throughout. Otherwise the compression will be inferred by iteratively trying
* to open the file at 'path', first as is, then by appending known compression
* suffixes. So if you pass "foo" as 'path', this will open either "foo" or
* "foo.gz", trying in that order.
*
* On failure, return NULL with an error code in errno.
*/
CompressFileHandle *
InitDiscoverCompressFileHandle(const char *path, const char *mode)
{
CompressFileHandle *CFH = NULL;
struct stat st;
char *fname;
pg_compress_specification compression_spec = {0};
compression_spec.algorithm = PG_COMPRESSION_NONE;
Assert(strcmp(mode, PG_BINARY_R) == 0);
fname = strdup(path);
if (hasSuffix(fname, ".gz"))
compression_spec.algorithm = PG_COMPRESSION_GZIP;
else
{
bool exists;
exists = (stat(path, &st) == 0);
/* avoid unused warning if it is not built with compression */
if (exists)
compression_spec.algorithm = PG_COMPRESSION_NONE;
#ifdef HAVE_LIBZ
if (!exists)
{
free_keep_errno(fname);
fname = psprintf("%s.gz", path);
exists = (stat(fname, &st) == 0);
if (exists)
compression_spec.algorithm = PG_COMPRESSION_GZIP;
}
#endif
}
CFH = InitCompressFileHandle(compression_spec);
if (CFH->open_func(fname, -1, mode, CFH))
{
free_keep_errno(CFH);
CFH = NULL;
}
free_keep_errno(fname);
return CFH;
}
/*
* Close an open file handle and release its memory.
*
* On failure, returns an error value and sets errno appropriately.
*/
int
EndCompressFileHandle(CompressFileHandle *CFH)
{
int ret = 0;
if (CFH->private_data)
ret = CFH->close_func(CFH);
free_keep_errno(CFH);
return ret;
}

View File

@ -23,50 +23,160 @@
extern char *supports_compression(const pg_compress_specification compression_spec);
/* Prototype for callback function to WriteDataToArchive() */
/*
* Prototype for callback function used in writeData()
*/
typedef void (*WriteFunc) (ArchiveHandle *AH, const char *buf, size_t len);
/*
* Prototype for callback function to ReadDataFromArchive()
* Prototype for callback function used in readData()
*
* ReadDataFromArchive will call the read function repeatedly, until it
* returns 0 to signal EOF. ReadDataFromArchive passes a buffer to read the
* data into in *buf, of length *buflen. If that's not big enough for the
* callback function, it can free() it and malloc() a new one, returning the
* new buffer and its size in *buf and *buflen.
* readData will call the read function repeatedly, until it returns 0 to signal
* EOF. readData passes a buffer to read the data into in *buf, of length
* *buflen. If that's not big enough for the callback function, it can free() it
* and malloc() a new one, returning the new buffer and its size in *buf and
* *buflen.
*
* Returns the number of bytes read into *buf, or 0 on EOF.
*/
typedef size_t (*ReadFunc) (ArchiveHandle *AH, char **buf, size_t *buflen);
/* struct definition appears in compress_io.c */
typedef struct CompressorState CompressorState;
struct CompressorState
{
/*
* Read all compressed data from the input stream (via readF) and print it
* out with ahwrite().
*/
void (*readData) (ArchiveHandle *AH, CompressorState *cs);
/*
* Compress and write data to the output stream (via writeF).
*/
void (*writeData) (ArchiveHandle *AH, CompressorState *cs,
const void *data, size_t dLen);
/*
* End compression and flush internal buffers if any.
*/
void (*end) (ArchiveHandle *AH, CompressorState *cs);
/*
* Callback function to read from an already processed input stream
*/
ReadFunc readF;
/*
* Callback function to write an already processed chunk of data.
*/
WriteFunc writeF;
/*
* Compression specification for this state.
*/
pg_compress_specification compression_spec;
/*
* Private data to be used by the compressor.
*/
void *private_data;
};
extern CompressorState *AllocateCompressor(const pg_compress_specification compression_spec,
ReadFunc readF,
WriteFunc writeF);
extern void ReadDataFromArchive(ArchiveHandle *AH,
const pg_compress_specification compression_spec,
ReadFunc readF);
extern void WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs,
const void *data, size_t dLen);
extern void EndCompressor(ArchiveHandle *AH, CompressorState *cs);
/*
* Compress File Handle
*/
typedef struct CompressFileHandle CompressFileHandle;
typedef struct cfp cfp;
struct CompressFileHandle
{
/*
* Open a file in mode.
*
* Pass either 'path' or 'fd' depending on whether a file path or a file
* descriptor is available. 'mode' can be one of 'r', 'rb', 'w', 'wb',
* 'a', and 'ab'. Requires an already initialized CompressFileHandle.
*/
int (*open_func) (const char *path, int fd, const char *mode,
CompressFileHandle *CFH);
extern cfp *cfopen(const char *path, const char *mode,
const pg_compress_specification compression_spec);
extern cfp *cfdopen(int fd, const char *mode,
const pg_compress_specification compression_spec);
extern cfp *cfopen_read(const char *path, const char *mode);
extern cfp *cfopen_write(const char *path, const char *mode,
const pg_compress_specification compression_spec);
extern int cfread(void *ptr, int size, cfp *fp);
extern int cfwrite(const void *ptr, int size, cfp *fp);
extern int cfgetc(cfp *fp);
extern char *cfgets(cfp *fp, char *buf, int len);
extern int cfclose(cfp *fp);
extern int cfeof(cfp *fp);
extern const char *get_cfp_error(cfp *fp);
/*
* Open a file for writing.
*
* 'mode' can be one of 'w', 'wb', 'a', and 'ab'. Requires an already
* initialized CompressFileHandle.
*/
int (*open_write_func) (const char *path, const char *mode,
CompressFileHandle *CFH);
/*
* Read 'size' bytes of data from the file and store them into 'ptr'.
*/
size_t (*read_func) (void *ptr, size_t size, CompressFileHandle *CFH);
/*
* Write 'size' bytes of data into the file from 'ptr'.
*/
size_t (*write_func) (const void *ptr, size_t size,
struct CompressFileHandle *CFH);
/*
* Read at most size - 1 characters from the compress file handle into
* 's'.
*
* Stop if an EOF or a newline is found first. 's' is always null
* terminated and contains the newline if it was found.
*/
char *(*gets_func) (char *s, int size, CompressFileHandle *CFH);
/*
* Read the next character from the compress file handle as 'unsigned
* char' cast into 'int'.
*/
int (*getc_func) (CompressFileHandle *CFH);
/*
* Test if EOF is reached in the compress file handle.
*/
int (*eof_func) (CompressFileHandle *CFH);
/*
* Close an open file handle.
*/
int (*close_func) (CompressFileHandle *CFH);
/*
* Get a pointer to a string that describes an error that occurred during a
* compress file handle operation.
*/
const char *(*get_error_func) (CompressFileHandle *CFH);
/*
* Compression specification for this file handle.
*/
pg_compress_specification compression_spec;
/*
* Private data to be used by the compressor.
*/
void *private_data;
};
/*
* Initialize a compress file handle with the requested compression.
*/
extern CompressFileHandle *InitCompressFileHandle(const pg_compress_specification compression_spec);
/*
* Initialize a compress file stream. Deffer the compression algorithm
* from 'path', either by examining its suffix or by appending the supported
* suffixes in 'path'.
*/
extern CompressFileHandle *InitDiscoverCompressFileHandle(const char *path,
const char *mode);
extern int EndCompressFileHandle(CompressFileHandle *CFH);
#endif

View File

@ -0,0 +1,206 @@
/*-------------------------------------------------------------------------
*
* compress_none.c
* Routines for archivers to read or write an uncompressed stream.
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/bin/pg_dump/compress_none.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <unistd.h>
#include "compress_none.h"
#include "pg_backup_utils.h"
/*----------------------
* Compressor API
*----------------------
*/
/*
* Private routines
*/
static void
ReadDataFromArchiveNone(ArchiveHandle *AH, CompressorState *cs)
{
size_t cnt;
char *buf;
size_t buflen;
buf = pg_malloc(ZLIB_OUT_SIZE);
buflen = ZLIB_OUT_SIZE;
while ((cnt = cs->readF(AH, &buf, &buflen)))
{
ahwrite(buf, 1, cnt, AH);
}
free(buf);
}
static void
WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
const void *data, size_t dLen)
{
cs->writeF(AH, data, dLen);
}
static void
EndCompressorNone(ArchiveHandle *AH, CompressorState *cs)
{
/* no op */
}
/*
* Public interface
*/
void
InitCompressorNone(CompressorState *cs,
const pg_compress_specification compression_spec)
{
cs->readData = ReadDataFromArchiveNone;
cs->writeData = WriteDataToArchiveNone;
cs->end = EndCompressorNone;
cs->compression_spec = compression_spec;
}
/*----------------------
* Compress File API
*----------------------
*/
/*
* Private routines
*/
static size_t
read_none(void *ptr, size_t size, CompressFileHandle *CFH)
{
FILE *fp = (FILE *) CFH->private_data;
size_t ret;
if (size == 0)
return 0;
ret = fread(ptr, 1, size, fp);
if (ret != size && !feof(fp))
pg_fatal("could not read from input file: %s",
strerror(errno));
return ret;
}
static size_t
write_none(const void *ptr, size_t size, CompressFileHandle *CFH)
{
return fwrite(ptr, 1, size, (FILE *) CFH->private_data);
}
static const char *
get_error_none(CompressFileHandle *CFH)
{
return strerror(errno);
}
static char *
gets_none(char *ptr, int size, CompressFileHandle *CFH)
{
return fgets(ptr, size, (FILE *) CFH->private_data);
}
static int
getc_none(CompressFileHandle *CFH)
{
FILE *fp = (FILE *) CFH->private_data;
int ret;
ret = fgetc(fp);
if (ret == EOF)
{
if (!feof(fp))
pg_fatal("could not read from input file: %s", strerror(errno));
else
pg_fatal("could not read from input file: end of file");
}
return ret;
}
static int
close_none(CompressFileHandle *CFH)
{
FILE *fp = (FILE *) CFH->private_data;
int ret = 0;
CFH->private_data = NULL;
if (fp)
ret = fclose(fp);
return ret;
}
static int
eof_none(CompressFileHandle *CFH)
{
return feof((FILE *) CFH->private_data);
}
static int
open_none(const char *path, int fd, const char *mode, CompressFileHandle *CFH)
{
Assert(CFH->private_data == NULL);
if (fd >= 0)
CFH->private_data = fdopen(dup(fd), mode);
else
CFH->private_data = fopen(path, mode);
if (CFH->private_data == NULL)
return 1;
return 0;
}
static int
open_write_none(const char *path, const char *mode, CompressFileHandle *CFH)
{
Assert(CFH->private_data == NULL);
CFH->private_data = fopen(path, mode);
if (CFH->private_data == NULL)
return 1;
return 0;
}
/*
* Public interface
*/
void
InitCompressFileHandleNone(CompressFileHandle *CFH,
const pg_compress_specification compression_spec)
{
CFH->open_func = open_none;
CFH->open_write_func = open_write_none;
CFH->read_func = read_none;
CFH->write_func = write_none;
CFH->gets_func = gets_none;
CFH->getc_func = getc_none;
CFH->close_func = close_none;
CFH->eof_func = eof_none;
CFH->get_error_func = get_error_none;
CFH->private_data = NULL;
}

View File

@ -0,0 +1,24 @@
/*-------------------------------------------------------------------------
*
* compress_none.h
* Uncompressed interface to compress_io.c routines
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/bin/pg_dump/compress_none.h
*
*-------------------------------------------------------------------------
*/
#ifndef _COMPRESS_NONE_H_
#define _COMPRESS_NONE_H_
#include "compress_io.h"
extern void InitCompressorNone(CompressorState *cs,
const pg_compress_specification compression_spec);
extern void InitCompressFileHandleNone(CompressFileHandle *CFH,
const pg_compress_specification compression_spec);
#endif /* _COMPRESS_NONE_H_ */

View File

@ -1,7 +1,9 @@
# Copyright (c) 2022-2023, PostgreSQL Global Development Group
pg_dump_common_sources = files(
'compress_gzip.c',
'compress_io.c',
'compress_none.c',
'dumputils.c',
'parallel.c',
'pg_backup_archiver.c',

View File

@ -95,8 +95,8 @@ static void dump_lo_buf(ArchiveHandle *AH);
static void dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim);
static void SetOutput(ArchiveHandle *AH, const char *filename,
const pg_compress_specification compression_spec);
static cfp *SaveOutput(ArchiveHandle *AH);
static void RestoreOutput(ArchiveHandle *AH, cfp *savedOutput);
static CompressFileHandle *SaveOutput(ArchiveHandle *AH);
static void RestoreOutput(ArchiveHandle *AH, CompressFileHandle *savedOutput);
static int restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel);
static void restore_toc_entries_prefork(ArchiveHandle *AH,
@ -272,7 +272,7 @@ CloseArchive(Archive *AHX)
/* Close the output */
errno = 0;
res = cfclose(AH->OF);
res = EndCompressFileHandle(AH->OF);
if (res != 0)
pg_fatal("could not close output file: %m");
@ -354,7 +354,7 @@ RestoreArchive(Archive *AHX)
RestoreOptions *ropt = AH->public.ropt;
bool parallel_mode;
TocEntry *te;
cfp *sav;
CompressFileHandle *sav;
AH->stage = STAGE_INITIALIZING;
@ -1128,7 +1128,7 @@ PrintTOCSummary(Archive *AHX)
TocEntry *te;
pg_compress_specification out_compression_spec = {0};
teSection curSection;
cfp *sav;
CompressFileHandle *sav;
const char *fmtName;
char stamp_str[64];
@ -1144,9 +1144,10 @@ PrintTOCSummary(Archive *AHX)
strcpy(stamp_str, "[unknown]");
ahprintf(AH, ";\n; Archive created at %s\n", stamp_str);
ahprintf(AH, "; dbname: %s\n; TOC Entries: %d\n; Compression: %d\n",
ahprintf(AH, "; dbname: %s\n; TOC Entries: %d\n; Compression: %s\n",
sanitize_line(AH->archdbname, false),
AH->tocCount, AH->compression_spec.level);
AH->tocCount,
get_compress_algorithm_name(AH->compression_spec.algorithm));
switch (AH->format)
{
@ -1503,6 +1504,7 @@ static void
SetOutput(ArchiveHandle *AH, const char *filename,
const pg_compress_specification compression_spec)
{
CompressFileHandle *CFH;
const char *mode;
int fn = -1;
@ -1525,33 +1527,32 @@ SetOutput(ArchiveHandle *AH, const char *filename,
else
mode = PG_BINARY_W;
if (fn >= 0)
AH->OF = cfdopen(dup(fn), mode, compression_spec);
else
AH->OF = cfopen(filename, mode, compression_spec);
CFH = InitCompressFileHandle(compression_spec);
if (!AH->OF)
if (CFH->open_func(filename, fn, mode, CFH))
{
if (filename)
pg_fatal("could not open output file \"%s\": %m", filename);
else
pg_fatal("could not open output file: %m");
}
AH->OF = CFH;
}
static cfp *
static CompressFileHandle *
SaveOutput(ArchiveHandle *AH)
{
return (cfp *) AH->OF;
return (CompressFileHandle *) AH->OF;
}
static void
RestoreOutput(ArchiveHandle *AH, cfp *savedOutput)
RestoreOutput(ArchiveHandle *AH, CompressFileHandle *savedOutput)
{
int res;
errno = 0;
res = cfclose(AH->OF);
res = EndCompressFileHandle(AH->OF);
if (res != 0)
pg_fatal("could not close output file: %m");
@ -1690,7 +1691,11 @@ ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH)
else if (RestoringToDB(AH))
bytes_written = ExecuteSqlCommandBuf(&AH->public, (const char *) ptr, size * nmemb);
else
bytes_written = cfwrite(ptr, size * nmemb, AH->OF);
{
CompressFileHandle *CFH = (CompressFileHandle *) AH->OF;
bytes_written = CFH->write_func(ptr, size * nmemb, CFH);
}
if (bytes_written != size * nmemb)
WRITE_ERROR_EXIT;
@ -2032,6 +2037,18 @@ ReadStr(ArchiveHandle *AH)
return buf;
}
static bool
_fileExistsInDirectory(const char *dir, const char *filename)
{
struct stat st;
char buf[MAXPGPATH];
if (snprintf(buf, MAXPGPATH, "%s/%s", dir, filename) >= MAXPGPATH)
pg_fatal("directory name too long: \"%s\"", dir);
return (stat(buf, &st) == 0 && S_ISREG(st.st_mode));
}
static int
_discoverArchiveFormat(ArchiveHandle *AH)
{
@ -2062,26 +2079,12 @@ _discoverArchiveFormat(ArchiveHandle *AH)
*/
if (stat(AH->fSpec, &st) == 0 && S_ISDIR(st.st_mode))
{
char buf[MAXPGPATH];
if (snprintf(buf, MAXPGPATH, "%s/toc.dat", AH->fSpec) >= MAXPGPATH)
pg_fatal("directory name too long: \"%s\"",
AH->fSpec);
if (stat(buf, &st) == 0 && S_ISREG(st.st_mode))
{
AH->format = archDirectory;
AH->format = archDirectory;
if (_fileExistsInDirectory(AH->fSpec, "toc.dat"))
return AH->format;
}
#ifdef HAVE_LIBZ
if (snprintf(buf, MAXPGPATH, "%s/toc.dat.gz", AH->fSpec) >= MAXPGPATH)
pg_fatal("directory name too long: \"%s\"",
AH->fSpec);
if (stat(buf, &st) == 0 && S_ISREG(st.st_mode))
{
AH->format = archDirectory;
if (_fileExistsInDirectory(AH->fSpec, "toc.dat.gz"))
return AH->format;
}
#endif
pg_fatal("directory \"%s\" does not appear to be a valid archive (\"toc.dat\" does not exist)",
AH->fSpec);
@ -2179,6 +2182,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
SetupWorkerPtrType setupWorkerPtr)
{
ArchiveHandle *AH;
CompressFileHandle *CFH;
pg_compress_specification out_compress_spec = {0};
pg_log_debug("allocating AH for %s, format %d",
@ -2234,7 +2238,10 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
/* Open stdout with no compression for AH output handle */
out_compress_spec.algorithm = PG_COMPRESSION_NONE;
AH->OF = cfdopen(dup(fileno(stdout)), PG_BINARY_A, out_compress_spec);
CFH = InitCompressFileHandle(out_compress_spec);
if (CFH->open_func(NULL, fileno(stdout), PG_BINARY_A, CFH))
pg_fatal("could not open stdout for appending: %m");
AH->OF = CFH;
/*
* On Windows, we need to use binary mode to read/write non-text files,
@ -3646,12 +3653,7 @@ WriteHead(ArchiveHandle *AH)
AH->WriteBytePtr(AH, AH->intSize);
AH->WriteBytePtr(AH, AH->offSize);
AH->WriteBytePtr(AH, AH->format);
/*
* For now the compression type is implied by the level. This will need
* to change once support for more compression algorithms is added,
* requiring a format bump.
*/
WriteInt(AH, AH->compression_spec.level);
AH->WriteBytePtr(AH, AH->compression_spec.algorithm);
crtm = *localtime(&AH->createDate);
WriteInt(AH, crtm.tm_sec);
WriteInt(AH, crtm.tm_min);
@ -3723,10 +3725,11 @@ ReadHead(ArchiveHandle *AH)
pg_fatal("expected format (%d) differs from format found in file (%d)",
AH->format, fmt);
/* Guess the compression method based on the level */
AH->compression_spec.algorithm = PG_COMPRESSION_NONE;
if (AH->version >= K_VERS_1_2)
if (AH->version >= K_VERS_1_15)
AH->compression_spec.algorithm = AH->ReadBytePtr(AH);
else if (AH->version >= K_VERS_1_2)
{
/* Guess the compression method based on the level */
if (AH->version < K_VERS_1_4)
AH->compression_spec.level = AH->ReadBytePtr(AH);
else

View File

@ -65,10 +65,13 @@
#define K_VERS_1_13 MAKE_ARCHIVE_VERSION(1, 13, 0) /* change search_path
* behavior */
#define K_VERS_1_14 MAKE_ARCHIVE_VERSION(1, 14, 0) /* add tableam */
#define K_VERS_1_15 MAKE_ARCHIVE_VERSION(1, 15, 0) /* add
* compression_algorithm
* in header */
/* Current archive version number (the format we can output) */
#define K_VERS_MAJOR 1
#define K_VERS_MINOR 14
#define K_VERS_MINOR 15
#define K_VERS_REV 0
#define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV)

View File

@ -298,7 +298,9 @@ _StartData(ArchiveHandle *AH, TocEntry *te)
_WriteByte(AH, BLK_DATA); /* Block type */
WriteInt(AH, te->dumpId); /* For sanity check */
ctx->cs = AllocateCompressor(AH->compression_spec, _CustomWriteFunc);
ctx->cs = AllocateCompressor(AH->compression_spec,
NULL,
_CustomWriteFunc);
}
/*
@ -317,15 +319,15 @@ _WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
CompressorState *cs = ctx->cs;
if (dLen > 0)
/* WriteDataToArchive() internally throws write errors */
WriteDataToArchive(AH, cs, data, dLen);
/* writeData() internally throws write errors */
cs->writeData(AH, cs, data, dLen);
}
/*
* Called by the archiver when a dumper's 'DataDumper' routine has
* finished.
*
* Optional.
* Mandatory.
*/
static void
_EndData(ArchiveHandle *AH, TocEntry *te)
@ -333,6 +335,8 @@ _EndData(ArchiveHandle *AH, TocEntry *te)
lclContext *ctx = (lclContext *) AH->formatData;
EndCompressor(AH, ctx->cs);
ctx->cs = NULL;
/* Send the end marker */
WriteInt(AH, 0);
}
@ -377,7 +381,9 @@ _StartLO(ArchiveHandle *AH, TocEntry *te, Oid oid)
WriteInt(AH, oid);
ctx->cs = AllocateCompressor(AH->compression_spec, _CustomWriteFunc);
ctx->cs = AllocateCompressor(AH->compression_spec,
NULL,
_CustomWriteFunc);
}
/*
@ -566,7 +572,12 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
static void
_PrintData(ArchiveHandle *AH)
{
ReadDataFromArchive(AH, AH->compression_spec, _CustomReadFunc);
CompressorState *cs;
cs = AllocateCompressor(AH->compression_spec,
_CustomReadFunc, NULL);
cs->readData(AH, cs);
EndCompressor(AH, cs);
}
static void
@ -977,7 +988,7 @@ _readBlockHeader(ArchiveHandle *AH, int *type, int *id)
}
/*
* Callback function for WriteDataToArchive. Writes one block of (compressed)
* Callback function for writeData. Writes one block of (compressed)
* data to the archive.
*/
static void
@ -992,7 +1003,7 @@ _CustomWriteFunc(ArchiveHandle *AH, const char *buf, size_t len)
}
/*
* Callback function for ReadDataFromArchive. To keep things simple, we
* Callback function for readData. To keep things simple, we
* always read one compressed block at a time.
*/
static size_t

View File

@ -50,9 +50,8 @@ typedef struct
*/
char *directory;
cfp *dataFH; /* currently open data file */
cfp *LOsTocFH; /* file handle for blobs.toc */
CompressFileHandle *dataFH; /* currently open data file */
CompressFileHandle *LOsTocFH; /* file handle for blobs.toc */
ParallelState *pstate; /* for parallel backup / restore */
} lclContext;
@ -198,11 +197,11 @@ InitArchiveFmt_Directory(ArchiveHandle *AH)
else
{ /* Read Mode */
char fname[MAXPGPATH];
cfp *tocFH;
CompressFileHandle *tocFH;
setFilePath(AH, fname, "toc.dat");
tocFH = cfopen_read(fname, PG_BINARY_R);
tocFH = InitDiscoverCompressFileHandle(fname, PG_BINARY_R);
if (tocFH == NULL)
pg_fatal("could not open input file \"%s\": %m", fname);
@ -218,7 +217,7 @@ InitArchiveFmt_Directory(ArchiveHandle *AH)
ReadToc(AH);
/* Nothing else in the file, so close it again... */
if (cfclose(tocFH) != 0)
if (EndCompressFileHandle(tocFH) != 0)
pg_fatal("could not close TOC file: %m");
ctx->dataFH = NULL;
}
@ -327,9 +326,9 @@ _StartData(ArchiveHandle *AH, TocEntry *te)
setFilePath(AH, fname, tctx->filename);
ctx->dataFH = cfopen_write(fname, PG_BINARY_W,
AH->compression_spec);
if (ctx->dataFH == NULL)
ctx->dataFH = InitCompressFileHandle(AH->compression_spec);
if (ctx->dataFH->open_write_func(fname, PG_BINARY_W, ctx->dataFH))
pg_fatal("could not open output file \"%s\": %m", fname);
}
@ -346,15 +345,16 @@ static void
_WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
{
lclContext *ctx = (lclContext *) AH->formatData;
CompressFileHandle *CFH = ctx->dataFH;
errno = 0;
if (dLen > 0 && cfwrite(data, dLen, ctx->dataFH) != dLen)
if (dLen > 0 && CFH->write_func(data, dLen, CFH) != dLen)
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
pg_fatal("could not write to output file: %s",
get_cfp_error(ctx->dataFH));
CFH->get_error_func(CFH));
}
}
@ -370,7 +370,7 @@ _EndData(ArchiveHandle *AH, TocEntry *te)
lclContext *ctx = (lclContext *) AH->formatData;
/* Close the file */
if (cfclose(ctx->dataFH) != 0)
if (EndCompressFileHandle(ctx->dataFH) != 0)
pg_fatal("could not close data file: %m");
ctx->dataFH = NULL;
@ -385,26 +385,25 @@ _PrintFileData(ArchiveHandle *AH, char *filename)
size_t cnt;
char *buf;
size_t buflen;
cfp *cfp;
CompressFileHandle *CFH;
if (!filename)
return;
cfp = cfopen_read(filename, PG_BINARY_R);
if (!cfp)
CFH = InitDiscoverCompressFileHandle(filename, PG_BINARY_R);
if (!CFH)
pg_fatal("could not open input file \"%s\": %m", filename);
buf = pg_malloc(ZLIB_OUT_SIZE);
buflen = ZLIB_OUT_SIZE;
while ((cnt = cfread(buf, buflen, cfp)))
while ((cnt = CFH->read_func(buf, buflen, CFH)))
{
ahwrite(buf, 1, cnt, AH);
}
free(buf);
if (cfclose(cfp) != 0)
if (EndCompressFileHandle(CFH) != 0)
pg_fatal("could not close data file \"%s\": %m", filename);
}
@ -435,6 +434,7 @@ _LoadLOs(ArchiveHandle *AH)
{
Oid oid;
lclContext *ctx = (lclContext *) AH->formatData;
CompressFileHandle *CFH;
char tocfname[MAXPGPATH];
char line[MAXPGPATH];
@ -442,14 +442,14 @@ _LoadLOs(ArchiveHandle *AH)
setFilePath(AH, tocfname, "blobs.toc");
ctx->LOsTocFH = cfopen_read(tocfname, PG_BINARY_R);
CFH = ctx->LOsTocFH = InitDiscoverCompressFileHandle(tocfname, PG_BINARY_R);
if (ctx->LOsTocFH == NULL)
pg_fatal("could not open large object TOC file \"%s\" for input: %m",
tocfname);
/* Read the LOs TOC file line-by-line, and process each LO */
while ((cfgets(ctx->LOsTocFH, line, MAXPGPATH)) != NULL)
while ((CFH->gets_func(line, MAXPGPATH, CFH)) != NULL)
{
char lofname[MAXPGPATH + 1];
char path[MAXPGPATH];
@ -464,11 +464,11 @@ _LoadLOs(ArchiveHandle *AH)
_PrintFileData(AH, path);
EndRestoreLO(AH, oid);
}
if (!cfeof(ctx->LOsTocFH))
if (!CFH->eof_func(CFH))
pg_fatal("error reading large object TOC file \"%s\"",
tocfname);
if (cfclose(ctx->LOsTocFH) != 0)
if (EndCompressFileHandle(ctx->LOsTocFH) != 0)
pg_fatal("could not close large object TOC file \"%s\": %m",
tocfname);
@ -488,15 +488,16 @@ _WriteByte(ArchiveHandle *AH, const int i)
{
unsigned char c = (unsigned char) i;
lclContext *ctx = (lclContext *) AH->formatData;
CompressFileHandle *CFH = ctx->dataFH;
errno = 0;
if (cfwrite(&c, 1, ctx->dataFH) != 1)
if (CFH->write_func(&c, 1, CFH) != 1)
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
pg_fatal("could not write to output file: %s",
get_cfp_error(ctx->dataFH));
CFH->get_error_func(CFH));
}
return 1;
@ -512,8 +513,9 @@ static int
_ReadByte(ArchiveHandle *AH)
{
lclContext *ctx = (lclContext *) AH->formatData;
CompressFileHandle *CFH = ctx->dataFH;
return cfgetc(ctx->dataFH);
return CFH->getc_func(CFH);
}
/*
@ -524,15 +526,16 @@ static void
_WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
{
lclContext *ctx = (lclContext *) AH->formatData;
CompressFileHandle *CFH = ctx->dataFH;
errno = 0;
if (cfwrite(buf, len, ctx->dataFH) != len)
if (CFH->write_func(buf, len, CFH) != len)
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
pg_fatal("could not write to output file: %s",
get_cfp_error(ctx->dataFH));
CFH->get_error_func(CFH));
}
}
@ -545,12 +548,13 @@ static void
_ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
{
lclContext *ctx = (lclContext *) AH->formatData;
CompressFileHandle *CFH = ctx->dataFH;
/*
* If there was an I/O error, we already exited in cfread(), so here we
* If there was an I/O error, we already exited in readF(), so here we
* exit on short reads.
*/
if (cfread(buf, len, ctx->dataFH) != len)
if (CFH->read_func(buf, len, CFH) != len)
pg_fatal("could not read from input file: end of file");
}
@ -573,7 +577,7 @@ _CloseArchive(ArchiveHandle *AH)
if (AH->mode == archModeWrite)
{
cfp *tocFH;
CompressFileHandle *tocFH;
pg_compress_specification compression_spec = {0};
char fname[MAXPGPATH];
@ -584,8 +588,8 @@ _CloseArchive(ArchiveHandle *AH)
/* The TOC is always created uncompressed */
compression_spec.algorithm = PG_COMPRESSION_NONE;
tocFH = cfopen_write(fname, PG_BINARY_W, compression_spec);
if (tocFH == NULL)
tocFH = InitCompressFileHandle(compression_spec);
if (tocFH->open_write_func(fname, PG_BINARY_W, tocFH))
pg_fatal("could not open output file \"%s\": %m", fname);
ctx->dataFH = tocFH;
@ -598,7 +602,7 @@ _CloseArchive(ArchiveHandle *AH)
WriteHead(AH);
AH->format = archDirectory;
WriteToc(AH);
if (cfclose(tocFH) != 0)
if (EndCompressFileHandle(tocFH) != 0)
pg_fatal("could not close TOC file: %m");
WriteDataChunks(AH, ctx->pstate);
@ -649,8 +653,8 @@ _StartLOs(ArchiveHandle *AH, TocEntry *te)
/* The LO TOC file is never compressed */
compression_spec.algorithm = PG_COMPRESSION_NONE;
ctx->LOsTocFH = cfopen_write(fname, "ab", compression_spec);
if (ctx->LOsTocFH == NULL)
ctx->LOsTocFH = InitCompressFileHandle(compression_spec);
if (ctx->LOsTocFH->open_write_func(fname, "ab", ctx->LOsTocFH))
pg_fatal("could not open output file \"%s\": %m", fname);
}
@ -667,9 +671,8 @@ _StartLO(ArchiveHandle *AH, TocEntry *te, Oid oid)
snprintf(fname, MAXPGPATH, "%s/blob_%u.dat", ctx->directory, oid);
ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression_spec);
if (ctx->dataFH == NULL)
ctx->dataFH = InitCompressFileHandle(AH->compression_spec);
if (ctx->dataFH->open_write_func(fname, PG_BINARY_W, ctx->dataFH))
pg_fatal("could not open output file \"%s\": %m", fname);
}
@ -682,18 +685,19 @@ static void
_EndLO(ArchiveHandle *AH, TocEntry *te, Oid oid)
{
lclContext *ctx = (lclContext *) AH->formatData;
CompressFileHandle *CFH = ctx->LOsTocFH;
char buf[50];
int len;
/* Close the LO data file itself */
if (cfclose(ctx->dataFH) != 0)
pg_fatal("could not close LO data file: %m");
/* Close the BLOB data file itself */
if (EndCompressFileHandle(ctx->dataFH) != 0)
pg_fatal("could not close blob data file: %m");
ctx->dataFH = NULL;
/* register the LO in blobs.toc */
len = snprintf(buf, sizeof(buf), "%u blob_%u.dat\n", oid, oid);
if (cfwrite(buf, len, ctx->LOsTocFH) != len)
pg_fatal("could not write to LOs TOC file");
if (CFH->write_func(buf, len, CFH) != len)
pg_fatal("could not write to blobs TOC file");
}
/*
@ -706,8 +710,8 @@ _EndLOs(ArchiveHandle *AH, TocEntry *te)
{
lclContext *ctx = (lclContext *) AH->formatData;
if (cfclose(ctx->LOsTocFH) != 0)
pg_fatal("could not close LOs TOC file: %m");
if (EndCompressFileHandle(ctx->LOsTocFH) != 0)
pg_fatal("could not close blobs TOC file: %m");
ctx->LOsTocFH = NULL;
}

View File

@ -94,7 +94,7 @@ my %pgdump_runs = (
command => [
'pg_restore', '-l', "$tempdir/compression_gzip_custom.dump",
],
expected => qr/Compression: 1/,
expected => qr/Compression: gzip/,
name => 'data content is gzip-compressed'
},
},
@ -239,8 +239,8 @@ my %pgdump_runs = (
command =>
[ 'pg_restore', '-l', "$tempdir/defaults_custom_format.dump", ],
expected => $supports_gzip ?
qr/Compression: -1/ :
qr/Compression: 0/,
qr/Compression: gzip/ :
qr/Compression: none/,
name => 'data content is gzip-compressed by default if available',
},
},
@ -264,8 +264,8 @@ my %pgdump_runs = (
command =>
[ 'pg_restore', '-l', "$tempdir/defaults_dir_format", ],
expected => $supports_gzip ?
qr/Compression: -1/ :
qr/Compression: 0/,
qr/Compression: gzip/ :
qr/Compression: none/,
name => 'data content is gzip-compressed by default',
},
glob_patterns => [

View File

@ -14,6 +14,10 @@
#ifndef PG_COMPRESSION_H
#define PG_COMPRESSION_H
/*
* These values are stored in disk, for example in files generated by pg_dump.
* Create the necessary backwards compatibility layers if their order changes.
*/
typedef enum pg_compress_algorithm
{
PG_COMPRESSION_NONE,

View File

@ -150,7 +150,9 @@ do
# pg_dump is not C++-clean because it uses "public" and "namespace"
# as field names, which is unfortunate but we won't change it now.
test "$f" = src/bin/pg_dump/compress_gzip.h && continue
test "$f" = src/bin/pg_dump/compress_io.h && continue
test "$f" = src/bin/pg_dump/compress_none.h && continue
test "$f" = src/bin/pg_dump/parallel.h && continue
test "$f" = src/bin/pg_dump/pg_backup_archiver.h && continue
test "$f" = src/bin/pg_dump/pg_dump.h && continue

View File

@ -429,6 +429,7 @@ CompiledExprState
CompositeIOData
CompositeTypeStmt
CompoundAffixFlag
CompressFileHandle
CompressionLocation
CompressorState
ComputeXidHorizonsResult
@ -1035,6 +1036,7 @@ GucStack
GucStackState
GucStringAssignHook
GucStringCheckHook
GzipCompressorState
HANDLE
HASHACTION
HASHBUCKET