2023-02-23 18:33:30 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* compress_gzip.c
|
|
|
|
* Routines for archivers to read or write a gzip compressed data stream.
|
|
|
|
*
|
2024-01-04 02:49:05 +01:00
|
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
2023-02-23 18:33:30 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/bin/pg_dump/compress_gzip.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres_fe.h"
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "compress_gzip.h"
|
|
|
|
#include "pg_backup_utils.h"
|
|
|
|
|
|
|
|
#ifdef HAVE_LIBZ
|
|
|
|
#include "zlib.h"
|
|
|
|
|
|
|
|
/*----------------------
|
|
|
|
* Compressor API
|
|
|
|
*----------------------
|
|
|
|
*/
|
|
|
|
typedef struct GzipCompressorState
|
|
|
|
{
|
|
|
|
z_streamp zp;
|
|
|
|
|
|
|
|
void *outbuf;
|
|
|
|
size_t outsize;
|
|
|
|
} GzipCompressorState;
|
|
|
|
|
|
|
|
/* Private routines that support gzip compressed data I/O */
|
pg_dump: Fix gzip compression of empty data
The pg_dump Compressor API has three basic callbacks - Allocate, Write
and End. The gzip implementation (since e9960732a) wrongly assumed the
Write function would always be called, and deferred the initialization
of the internal compression system until the first such call. But when
there's no data to compress (e.g. for empty LO), this would result in
not finalizing the compression state (because it was not actually
initialized), producing invalid dump.
Fixed by initializing the internal compression system in the Allocate
call, whenever the caller provides the Write. For decompression the
state is not needed, so we leave the private_data member unpopulated.
Introduces a pg_dump TAP test compressing an empty large object.
This also rearranges the functions to their original order, to make
diffs against older code simpler to understand. Finally, replace an
unreachable pg_fatal() with a simple assert check.
Reported-by: Justin Pryzby
Author: Justin Pryzby, Georgios Kokolatos
Reviewed-by: Georgios Kokolatos, Tomas Vondra
https://postgr.es/m/20230228235834.GC30529%40telsasoft.com
2023-03-29 00:50:34 +02:00
|
|
|
static void DeflateCompressorInit(CompressorState *cs);
|
|
|
|
static void DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs);
|
|
|
|
static void DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs,
|
|
|
|
bool flush);
|
|
|
|
static void EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs);
|
|
|
|
static void WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs,
|
|
|
|
const void *data, size_t dLen);
|
|
|
|
static void ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs);
|
|
|
|
|
|
|
|
static void
|
|
|
|
DeflateCompressorInit(CompressorState *cs)
|
|
|
|
{
|
|
|
|
GzipCompressorState *gzipcs;
|
|
|
|
z_streamp zp;
|
|
|
|
|
|
|
|
gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState));
|
|
|
|
zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
|
|
|
|
zp->zalloc = Z_NULL;
|
|
|
|
zp->zfree = Z_NULL;
|
|
|
|
zp->opaque = Z_NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* outsize is the buffer size we tell zlib it can output to. We actually
|
|
|
|
* allocate one extra byte because some routines want to append a trailing
|
|
|
|
* zero byte to the zlib output.
|
|
|
|
*/
|
|
|
|
gzipcs->outsize = DEFAULT_IO_BUFFER_SIZE;
|
|
|
|
gzipcs->outbuf = pg_malloc(gzipcs->outsize + 1);
|
|
|
|
|
|
|
|
/* -Z 0 uses the "None" compressor -- not zlib with no compression */
|
|
|
|
Assert(cs->compression_spec.level != 0);
|
|
|
|
|
|
|
|
if (deflateInit(zp, cs->compression_spec.level) != Z_OK)
|
|
|
|
pg_fatal("could not initialize compression library: %s", zp->msg);
|
|
|
|
|
|
|
|
/* Just be paranoid - maybe End is called after Start, with no Write */
|
|
|
|
zp->next_out = gzipcs->outbuf;
|
|
|
|
zp->avail_out = gzipcs->outsize;
|
|
|
|
|
|
|
|
/* Keep track of gzipcs */
|
|
|
|
cs->private_data = gzipcs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs)
|
|
|
|
{
|
|
|
|
GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
|
|
|
|
z_streamp zp;
|
|
|
|
|
|
|
|
zp = gzipcs->zp;
|
|
|
|
zp->next_in = NULL;
|
|
|
|
zp->avail_in = 0;
|
|
|
|
|
|
|
|
/* Flush any remaining data from zlib buffer */
|
|
|
|
DeflateCompressorCommon(AH, cs, true);
|
|
|
|
|
|
|
|
if (deflateEnd(zp) != Z_OK)
|
|
|
|
pg_fatal("could not close compression stream: %s", zp->msg);
|
|
|
|
|
|
|
|
pg_free(gzipcs->outbuf);
|
|
|
|
pg_free(gzipcs->zp);
|
|
|
|
pg_free(gzipcs);
|
|
|
|
cs->private_data = NULL;
|
|
|
|
}
|
|
|
|
|
2023-02-23 18:33:30 +01:00
|
|
|
static void
|
pg_dump: Fix gzip compression of empty data
The pg_dump Compressor API has three basic callbacks - Allocate, Write
and End. The gzip implementation (since e9960732a) wrongly assumed the
Write function would always be called, and deferred the initialization
of the internal compression system until the first such call. But when
there's no data to compress (e.g. for empty LO), this would result in
not finalizing the compression state (because it was not actually
initialized), producing invalid dump.
Fixed by initializing the internal compression system in the Allocate
call, whenever the caller provides the Write. For decompression the
state is not needed, so we leave the private_data member unpopulated.
Introduces a pg_dump TAP test compressing an empty large object.
This also rearranges the functions to their original order, to make
diffs against older code simpler to understand. Finally, replace an
unreachable pg_fatal() with a simple assert check.
Reported-by: Justin Pryzby
Author: Justin Pryzby, Georgios Kokolatos
Reviewed-by: Georgios Kokolatos, Tomas Vondra
https://postgr.es/m/20230228235834.GC30529%40telsasoft.com
2023-03-29 00:50:34 +02:00
|
|
|
DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs, bool flush)
|
2023-02-23 18:33:30 +01:00
|
|
|
{
|
|
|
|
GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
|
|
|
|
z_streamp zp = gzipcs->zp;
|
|
|
|
void *out = gzipcs->outbuf;
|
|
|
|
int res = Z_OK;
|
|
|
|
|
|
|
|
while (gzipcs->zp->avail_in != 0 || flush)
|
|
|
|
{
|
|
|
|
res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH);
|
|
|
|
if (res == Z_STREAM_ERROR)
|
|
|
|
pg_fatal("could not compress data: %s", zp->msg);
|
|
|
|
if ((flush && (zp->avail_out < gzipcs->outsize))
|
|
|
|
|| (zp->avail_out == 0)
|
|
|
|
|| (zp->avail_in != 0)
|
|
|
|
)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Extra paranoia: avoid zero-length chunks, since a zero length
|
|
|
|
* chunk is the EOF marker in the custom format. This should never
|
|
|
|
* happen but ...
|
|
|
|
*/
|
|
|
|
if (zp->avail_out < gzipcs->outsize)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Any write function should do its own error checking but to
|
|
|
|
* make sure we do a check here as well ...
|
|
|
|
*/
|
|
|
|
size_t len = gzipcs->outsize - zp->avail_out;
|
|
|
|
|
|
|
|
cs->writeF(AH, (char *) out, len);
|
|
|
|
}
|
|
|
|
zp->next_out = out;
|
|
|
|
zp->avail_out = gzipcs->outsize;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (res == Z_STREAM_END)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs)
|
|
|
|
{
|
pg_dump: Fix gzip compression of empty data
The pg_dump Compressor API has three basic callbacks - Allocate, Write
and End. The gzip implementation (since e9960732a) wrongly assumed the
Write function would always be called, and deferred the initialization
of the internal compression system until the first such call. But when
there's no data to compress (e.g. for empty LO), this would result in
not finalizing the compression state (because it was not actually
initialized), producing invalid dump.
Fixed by initializing the internal compression system in the Allocate
call, whenever the caller provides the Write. For decompression the
state is not needed, so we leave the private_data member unpopulated.
Introduces a pg_dump TAP test compressing an empty large object.
This also rearranges the functions to their original order, to make
diffs against older code simpler to understand. Finally, replace an
unreachable pg_fatal() with a simple assert check.
Reported-by: Justin Pryzby
Author: Justin Pryzby, Georgios Kokolatos
Reviewed-by: Georgios Kokolatos, Tomas Vondra
https://postgr.es/m/20230228235834.GC30529%40telsasoft.com
2023-03-29 00:50:34 +02:00
|
|
|
/* If deflation was initialized, finalize it */
|
|
|
|
if (cs->private_data)
|
|
|
|
DeflateCompressorEnd(AH, cs);
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs,
|
|
|
|
const void *data, size_t dLen)
|
|
|
|
{
|
|
|
|
GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
|
|
|
|
|
|
|
|
gzipcs->zp->next_in = (void *) unconstify(void *, data);
|
|
|
|
gzipcs->zp->avail_in = dLen;
|
pg_dump: Fix gzip compression of empty data
The pg_dump Compressor API has three basic callbacks - Allocate, Write
and End. The gzip implementation (since e9960732a) wrongly assumed the
Write function would always be called, and deferred the initialization
of the internal compression system until the first such call. But when
there's no data to compress (e.g. for empty LO), this would result in
not finalizing the compression state (because it was not actually
initialized), producing invalid dump.
Fixed by initializing the internal compression system in the Allocate
call, whenever the caller provides the Write. For decompression the
state is not needed, so we leave the private_data member unpopulated.
Introduces a pg_dump TAP test compressing an empty large object.
This also rearranges the functions to their original order, to make
diffs against older code simpler to understand. Finally, replace an
unreachable pg_fatal() with a simple assert check.
Reported-by: Justin Pryzby
Author: Justin Pryzby, Georgios Kokolatos
Reviewed-by: Georgios Kokolatos, Tomas Vondra
https://postgr.es/m/20230228235834.GC30529%40telsasoft.com
2023-03-29 00:50:34 +02:00
|
|
|
DeflateCompressorCommon(AH, cs, false);
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs)
|
|
|
|
{
|
|
|
|
z_streamp zp;
|
|
|
|
char *out;
|
|
|
|
int res = Z_OK;
|
|
|
|
size_t cnt;
|
|
|
|
char *buf;
|
|
|
|
size_t buflen;
|
|
|
|
|
|
|
|
zp = (z_streamp) pg_malloc(sizeof(z_stream));
|
|
|
|
zp->zalloc = Z_NULL;
|
|
|
|
zp->zfree = Z_NULL;
|
|
|
|
zp->opaque = Z_NULL;
|
|
|
|
|
2023-03-23 17:52:32 +01:00
|
|
|
buflen = DEFAULT_IO_BUFFER_SIZE;
|
|
|
|
buf = pg_malloc(buflen);
|
2023-02-23 18:33:30 +01:00
|
|
|
|
2023-03-23 17:52:32 +01:00
|
|
|
out = pg_malloc(DEFAULT_IO_BUFFER_SIZE + 1);
|
2023-02-23 18:33:30 +01:00
|
|
|
|
|
|
|
if (inflateInit(zp) != Z_OK)
|
|
|
|
pg_fatal("could not initialize compression library: %s",
|
|
|
|
zp->msg);
|
|
|
|
|
|
|
|
/* no minimal chunk size for zlib */
|
|
|
|
while ((cnt = cs->readF(AH, &buf, &buflen)))
|
|
|
|
{
|
|
|
|
zp->next_in = (void *) buf;
|
|
|
|
zp->avail_in = cnt;
|
|
|
|
|
|
|
|
while (zp->avail_in > 0)
|
|
|
|
{
|
|
|
|
zp->next_out = (void *) out;
|
2023-03-23 17:52:32 +01:00
|
|
|
zp->avail_out = DEFAULT_IO_BUFFER_SIZE;
|
2023-02-23 18:33:30 +01:00
|
|
|
|
|
|
|
res = inflate(zp, 0);
|
|
|
|
if (res != Z_OK && res != Z_STREAM_END)
|
|
|
|
pg_fatal("could not uncompress data: %s", zp->msg);
|
|
|
|
|
2023-03-23 17:52:32 +01:00
|
|
|
out[DEFAULT_IO_BUFFER_SIZE - zp->avail_out] = '\0';
|
|
|
|
ahwrite(out, 1, DEFAULT_IO_BUFFER_SIZE - zp->avail_out, AH);
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
zp->next_in = NULL;
|
|
|
|
zp->avail_in = 0;
|
|
|
|
while (res != Z_STREAM_END)
|
|
|
|
{
|
|
|
|
zp->next_out = (void *) out;
|
2023-03-23 17:52:32 +01:00
|
|
|
zp->avail_out = DEFAULT_IO_BUFFER_SIZE;
|
2023-02-23 18:33:30 +01:00
|
|
|
res = inflate(zp, 0);
|
|
|
|
if (res != Z_OK && res != Z_STREAM_END)
|
|
|
|
pg_fatal("could not uncompress data: %s", zp->msg);
|
|
|
|
|
2023-03-23 17:52:32 +01:00
|
|
|
out[DEFAULT_IO_BUFFER_SIZE - zp->avail_out] = '\0';
|
|
|
|
ahwrite(out, 1, DEFAULT_IO_BUFFER_SIZE - zp->avail_out, AH);
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (inflateEnd(zp) != Z_OK)
|
|
|
|
pg_fatal("could not close compression library: %s", zp->msg);
|
|
|
|
|
|
|
|
free(buf);
|
|
|
|
free(out);
|
|
|
|
free(zp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Public routines that support gzip compressed data I/O */
|
|
|
|
void
|
|
|
|
InitCompressorGzip(CompressorState *cs,
|
|
|
|
const pg_compress_specification compression_spec)
|
|
|
|
{
|
|
|
|
cs->readData = ReadDataFromArchiveGzip;
|
|
|
|
cs->writeData = WriteDataToArchiveGzip;
|
|
|
|
cs->end = EndCompressorGzip;
|
|
|
|
|
|
|
|
cs->compression_spec = compression_spec;
|
|
|
|
|
pg_dump: Fix gzip compression of empty data
The pg_dump Compressor API has three basic callbacks - Allocate, Write
and End. The gzip implementation (since e9960732a) wrongly assumed the
Write function would always be called, and deferred the initialization
of the internal compression system until the first such call. But when
there's no data to compress (e.g. for empty LO), this would result in
not finalizing the compression state (because it was not actually
initialized), producing invalid dump.
Fixed by initializing the internal compression system in the Allocate
call, whenever the caller provides the Write. For decompression the
state is not needed, so we leave the private_data member unpopulated.
Introduces a pg_dump TAP test compressing an empty large object.
This also rearranges the functions to their original order, to make
diffs against older code simpler to understand. Finally, replace an
unreachable pg_fatal() with a simple assert check.
Reported-by: Justin Pryzby
Author: Justin Pryzby, Georgios Kokolatos
Reviewed-by: Georgios Kokolatos, Tomas Vondra
https://postgr.es/m/20230228235834.GC30529%40telsasoft.com
2023-03-29 00:50:34 +02:00
|
|
|
/*
|
|
|
|
* If the caller has defined a write function, prepare the necessary
|
|
|
|
* state. Note that if the data is empty, End may be called immediately
|
|
|
|
* after Init, without ever calling Write.
|
|
|
|
*/
|
|
|
|
if (cs->writeF)
|
|
|
|
DeflateCompressorInit(cs);
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*----------------------
|
|
|
|
* Compress File API
|
|
|
|
*----------------------
|
|
|
|
*/
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
static bool
|
|
|
|
Gzip_read(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH)
|
2023-02-23 18:33:30 +01:00
|
|
|
{
|
|
|
|
gzFile gzfp = (gzFile) CFH->private_data;
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
int gzret;
|
2023-02-23 18:33:30 +01:00
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
gzret = gzread(gzfp, ptr, size);
|
|
|
|
if (gzret <= 0 && !gzeof(gzfp))
|
2023-02-23 18:33:30 +01:00
|
|
|
{
|
|
|
|
int errnum;
|
|
|
|
const char *errmsg = gzerror(gzfp, &errnum);
|
|
|
|
|
|
|
|
pg_fatal("could not read from input file: %s",
|
|
|
|
errnum == Z_ERRNO ? strerror(errno) : errmsg);
|
|
|
|
}
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
if (rsize)
|
|
|
|
*rsize = (size_t) gzret;
|
|
|
|
|
|
|
|
return true;
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
static bool
|
2023-02-23 18:33:30 +01:00
|
|
|
Gzip_write(const void *ptr, size_t size, CompressFileHandle *CFH)
|
|
|
|
{
|
|
|
|
gzFile gzfp = (gzFile) CFH->private_data;
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
return gzwrite(gzfp, ptr, size) > 0;
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
Gzip_getc(CompressFileHandle *CFH)
|
|
|
|
{
|
|
|
|
gzFile gzfp = (gzFile) CFH->private_data;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
ret = gzgetc(gzfp);
|
|
|
|
if (ret == EOF)
|
|
|
|
{
|
|
|
|
if (!gzeof(gzfp))
|
2024-03-12 02:02:54 +01:00
|
|
|
pg_fatal("could not read from input file: %m");
|
2023-02-23 18:33:30 +01:00
|
|
|
else
|
|
|
|
pg_fatal("could not read from input file: end of file");
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *
|
|
|
|
Gzip_gets(char *ptr, int size, CompressFileHandle *CFH)
|
|
|
|
{
|
|
|
|
gzFile gzfp = (gzFile) CFH->private_data;
|
|
|
|
|
|
|
|
return gzgets(gzfp, ptr, size);
|
|
|
|
}
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
static bool
|
2023-02-23 18:33:30 +01:00
|
|
|
Gzip_close(CompressFileHandle *CFH)
|
|
|
|
{
|
|
|
|
gzFile gzfp = (gzFile) CFH->private_data;
|
|
|
|
|
|
|
|
CFH->private_data = NULL;
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
return gzclose(gzfp) == Z_OK;
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
static bool
|
2023-02-23 18:33:30 +01:00
|
|
|
Gzip_eof(CompressFileHandle *CFH)
|
|
|
|
{
|
|
|
|
gzFile gzfp = (gzFile) CFH->private_data;
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
return gzeof(gzfp) == 1;
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static const char *
|
|
|
|
Gzip_get_error(CompressFileHandle *CFH)
|
|
|
|
{
|
|
|
|
gzFile gzfp = (gzFile) CFH->private_data;
|
|
|
|
const char *errmsg;
|
|
|
|
int errnum;
|
|
|
|
|
|
|
|
errmsg = gzerror(gzfp, &errnum);
|
|
|
|
if (errnum == Z_ERRNO)
|
|
|
|
errmsg = strerror(errno);
|
|
|
|
|
|
|
|
return errmsg;
|
|
|
|
}
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
static bool
|
2023-02-23 18:33:30 +01:00
|
|
|
Gzip_open(const char *path, int fd, const char *mode, CompressFileHandle *CFH)
|
|
|
|
{
|
|
|
|
gzFile gzfp;
|
|
|
|
char mode_compression[32];
|
|
|
|
|
|
|
|
if (CFH->compression_spec.level != Z_DEFAULT_COMPRESSION)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* user has specified a compression level, so tell zlib to use it
|
|
|
|
*/
|
|
|
|
snprintf(mode_compression, sizeof(mode_compression), "%s%d",
|
|
|
|
mode, CFH->compression_spec.level);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
strcpy(mode_compression, mode);
|
|
|
|
|
|
|
|
if (fd >= 0)
|
|
|
|
gzfp = gzdopen(dup(fd), mode_compression);
|
|
|
|
else
|
|
|
|
gzfp = gzopen(path, mode_compression);
|
|
|
|
|
|
|
|
if (gzfp == NULL)
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
return false;
|
2023-02-23 18:33:30 +01:00
|
|
|
|
|
|
|
CFH->private_data = gzfp;
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
return true;
|
2023-02-23 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
static bool
|
2023-02-23 18:33:30 +01:00
|
|
|
Gzip_open_write(const char *path, const char *mode, CompressFileHandle *CFH)
|
|
|
|
{
|
|
|
|
char *fname;
|
Improve type handling in pg_dump's compress file API
After 0da243fed0 got committed, we've received a report about a compiler
warning, related to the new LZ4File_gets() function:
compress_lz4.c: In function 'LZ4File_gets':
compress_lz4.c:492:19: warning: comparison of unsigned expression in
'< 0' is always false [-Wtype-limits]
492 | if (dsize < 0)
The reason is very simple - dsize is declared as size_t, which is an
unsigned integer, and thus the check is pointless and we might fail to
notice an error in some cases (or fail in a strange way a bit later).
The warning could have been silenced by simply changing the type, but we
realized the API mostly assumes all the libraries use the same types and
report errors the same way (e.g. by returning 0 and/or negative value).
But we can't make this assumption - the gzip/lz4 libraries already
disagree on some of this, and even if they did a library added in the
future might not.
The right solution is to define what the API does, and translate the
library-specific behavior in consistent way (so that the internal errors
are not exposed to users of our compression API). So this adjusts the
data types in a couple places, so that we don't miss library errors, and
simplifies and unifies the error reporting to simply return true/false
(instead of e.g. size_t).
While at it, make sure LZ4File_open_write() does not clobber errno in
case open_func() fails.
Author: Georgios Kokolatos
Reported-by: Alexander Lakhin
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com
2023-03-23 17:51:55 +01:00
|
|
|
bool ret;
|
2023-02-23 18:33:30 +01:00
|
|
|
int save_errno;
|
|
|
|
|
|
|
|
fname = psprintf("%s.gz", path);
|
|
|
|
ret = CFH->open_func(fname, -1, mode, CFH);
|
|
|
|
|
|
|
|
save_errno = errno;
|
|
|
|
pg_free(fname);
|
|
|
|
errno = save_errno;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
InitCompressFileHandleGzip(CompressFileHandle *CFH,
|
|
|
|
const pg_compress_specification compression_spec)
|
|
|
|
{
|
|
|
|
CFH->open_func = Gzip_open;
|
|
|
|
CFH->open_write_func = Gzip_open_write;
|
|
|
|
CFH->read_func = Gzip_read;
|
|
|
|
CFH->write_func = Gzip_write;
|
|
|
|
CFH->gets_func = Gzip_gets;
|
|
|
|
CFH->getc_func = Gzip_getc;
|
|
|
|
CFH->close_func = Gzip_close;
|
|
|
|
CFH->eof_func = Gzip_eof;
|
|
|
|
CFH->get_error_func = Gzip_get_error;
|
|
|
|
|
|
|
|
CFH->compression_spec = compression_spec;
|
|
|
|
|
|
|
|
CFH->private_data = NULL;
|
|
|
|
}
|
|
|
|
#else /* HAVE_LIBZ */
|
|
|
|
void
|
|
|
|
InitCompressorGzip(CompressorState *cs,
|
|
|
|
const pg_compress_specification compression_spec)
|
|
|
|
{
|
|
|
|
pg_fatal("this build does not support compression with %s", "gzip");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
InitCompressFileHandleGzip(CompressFileHandle *CFH,
|
|
|
|
const pg_compress_specification compression_spec)
|
|
|
|
{
|
|
|
|
pg_fatal("this build does not support compression with %s", "gzip");
|
|
|
|
}
|
|
|
|
#endif /* HAVE_LIBZ */
|