/*------------------------------------------------------------------------- * * compress_io.c * Routines for archivers to write an uncompressed or compressed data * stream. * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * This file includes two APIs for dealing with compressed data. The first * provides more flexibility, using callbacks to read/write data from the * underlying stream. The second API is a wrapper around fopen and * friends, providing an interface similar to those, but abstracts away * the possible compression. The second API is aimed for the resulting * files to be easily manipulated with an external compression utility * program. * * Compressor API * -------------- * * The interface for writing to an archive consists of three functions: * AllocateCompressor, writeData, and EndCompressor. First you call * AllocateCompressor, then write all the data by calling writeData as many * times as needed, and finally EndCompressor. writeData will call the * WriteFunc that was provided to AllocateCompressor for each chunk of * compressed data. * * The interface for reading an archive consists of the same three functions: * AllocateCompressor, readData, and EndCompressor. First you call * AllocateCompressor, then read all the data by calling readData to read the * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc * returns the compressed data one chunk at a time. Then readData decompresses * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0 * to signal EOF. The interface is the same for compressed and uncompressed * streams. * * Compressed stream API * ---------------------- * * The compressed stream API is providing a set of function pointers for * opening, reading, writing, and finally closing files. The implemented * function pointers are documented in the corresponding header file and are * common for all streams. It allows the caller to use the same functions for * both compressed and uncompressed streams. * * The interface consists of three functions, InitCompressFileHandle, * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the * compression is known, then start by calling InitCompressFileHandle, * otherwise discover it by using InitDiscoverCompressFileHandle. Then call * the function pointers as required for the read/write operations. Finally * call EndCompressFileHandle to end the stream. * * InitDiscoverCompressFileHandle tries to infer the compression by the * filename suffix. If the suffix is not yet known then it tries to simply * open the file and if it fails, it tries to open the same file with * compressed suffixes (.gz, .lz4 and .zst, in this order). * * IDENTIFICATION * src/bin/pg_dump/compress_io.c * *------------------------------------------------------------------------- */ #include "postgres_fe.h" #include #include #include "compress_gzip.h" #include "compress_io.h" #include "compress_lz4.h" #include "compress_none.h" #include "compress_zstd.h" #include "pg_backup_utils.h" /*---------------------- * Generic functions *---------------------- */ /* * Checks whether support for a compression algorithm is implemented in * pg_dump/restore. * * On success returns NULL, otherwise returns a malloc'ed string which can be * used by the caller in an error message. */ char * supports_compression(const pg_compress_specification compression_spec) { const pg_compress_algorithm algorithm = compression_spec.algorithm; bool supported = false; if (algorithm == PG_COMPRESSION_NONE) supported = true; #ifdef HAVE_LIBZ if (algorithm == PG_COMPRESSION_GZIP) supported = true; #endif #ifdef USE_LZ4 if (algorithm == PG_COMPRESSION_LZ4) supported = true; #endif #ifdef USE_ZSTD if (algorithm == PG_COMPRESSION_ZSTD) supported = true; #endif if (!supported) return psprintf(_("this build does not support compression with %s"), get_compress_algorithm_name(algorithm)); return NULL; } /*---------------------- * Compressor API *---------------------- */ /* * Allocate a new compressor. */ CompressorState * AllocateCompressor(const pg_compress_specification compression_spec, ReadFunc readF, WriteFunc writeF) { CompressorState *cs; cs = (CompressorState *) pg_malloc0(sizeof(CompressorState)); cs->readF = readF; cs->writeF = writeF; if (compression_spec.algorithm == PG_COMPRESSION_NONE) InitCompressorNone(cs, compression_spec); else if (compression_spec.algorithm == PG_COMPRESSION_GZIP) InitCompressorGzip(cs, compression_spec); else if (compression_spec.algorithm == PG_COMPRESSION_LZ4) InitCompressorLZ4(cs, compression_spec); else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD) InitCompressorZstd(cs, compression_spec); return cs; } /* * Terminate compression library context and flush its buffers. */ void EndCompressor(ArchiveHandle *AH, CompressorState *cs) { cs->end(AH, cs); pg_free(cs); } /*---------------------- * Compressed stream API *---------------------- */ /* * Private routines */ static int hasSuffix(const char *filename, const char *suffix) { int filenamelen = strlen(filename); int suffixlen = strlen(suffix); if (filenamelen < suffixlen) return 0; return memcmp(&filename[filenamelen - suffixlen], suffix, suffixlen) == 0; } /* free() without changing errno; useful in several places below */ static void free_keep_errno(void *p) { int save_errno = errno; free(p); errno = save_errno; } /* * Public interface */ /* * Initialize a compress file handle for the specified compression algorithm. */ CompressFileHandle * InitCompressFileHandle(const pg_compress_specification compression_spec) { CompressFileHandle *CFH; CFH = pg_malloc0(sizeof(CompressFileHandle)); if (compression_spec.algorithm == PG_COMPRESSION_NONE) InitCompressFileHandleNone(CFH, compression_spec); else if (compression_spec.algorithm == PG_COMPRESSION_GZIP) InitCompressFileHandleGzip(CFH, compression_spec); else if (compression_spec.algorithm == PG_COMPRESSION_LZ4) InitCompressFileHandleLZ4(CFH, compression_spec); else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD) InitCompressFileHandleZstd(CFH, compression_spec); return CFH; } /* * Checks if a compressed file (with the specified extension) exists. * * The filename of the tested file is stored to fname buffer (the existing * buffer is freed, new buffer is allocated and returned through the pointer). */ static bool check_compressed_file(const char *path, char **fname, char *ext) { free_keep_errno(*fname); *fname = psprintf("%s.%s", path, ext); return (access(*fname, F_OK) == 0); } /* * Open a file for reading. 'path' is the file to open, and 'mode' should * be either "r" or "rb". * * If the file at 'path' contains the suffix of a supported compression method, * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used * throughout. Otherwise the compression will be inferred by iteratively trying * to open the file at 'path', first as is, then by appending known compression * suffixes. So if you pass "foo" as 'path', this will open either "foo" or * "foo.{gz,lz4,zst}", trying in that order. * * On failure, return NULL with an error code in errno. */ CompressFileHandle * InitDiscoverCompressFileHandle(const char *path, const char *mode) { CompressFileHandle *CFH = NULL; struct stat st; char *fname; pg_compress_specification compression_spec = {0}; compression_spec.algorithm = PG_COMPRESSION_NONE; Assert(strcmp(mode, PG_BINARY_R) == 0); fname = pg_strdup(path); if (hasSuffix(fname, ".gz")) compression_spec.algorithm = PG_COMPRESSION_GZIP; else if (hasSuffix(fname, ".lz4")) compression_spec.algorithm = PG_COMPRESSION_LZ4; else if (hasSuffix(fname, ".zst")) compression_spec.algorithm = PG_COMPRESSION_ZSTD; else { if (stat(path, &st) == 0) compression_spec.algorithm = PG_COMPRESSION_NONE; else if (check_compressed_file(path, &fname, "gz")) compression_spec.algorithm = PG_COMPRESSION_GZIP; else if (check_compressed_file(path, &fname, "lz4")) compression_spec.algorithm = PG_COMPRESSION_LZ4; else if (check_compressed_file(path, &fname, "zst")) compression_spec.algorithm = PG_COMPRESSION_ZSTD; } CFH = InitCompressFileHandle(compression_spec); if (!CFH->open_func(fname, -1, mode, CFH)) { free_keep_errno(CFH); CFH = NULL; } free_keep_errno(fname); return CFH; } /* * Close an open file handle and release its memory. * * On failure, returns false and sets errno appropriately. */ bool EndCompressFileHandle(CompressFileHandle *CFH) { bool ret = false; if (CFH->private_data) ret = CFH->close_func(CFH); free_keep_errno(CFH); return ret; }