From 5a1007a5088cd6ddf892f7422ea8dbaef362372f Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 9 Nov 2021 14:21:35 -0500 Subject: [PATCH] Have the server properly terminate tar archives. Earlier versions of PostgreSQL featured a version of pg_basebackup that wanted to edit tar archives but was too dumb to parse them properly. The server made things easier for the client by failing to add the two blocks of zero bytes that ought to end a tar file, leaving it up to the client to do that. But since commit 23a1c6578c87fca0e361c4f5f9a07df5ae1f9858, we don't need this hack any more, because pg_basebackup is now smarter and can parse tar files even if they are properly terminated! So change the server to always properly terminate the tar files. Older versions of pg_basebackup can't talk to new servers anyway, so there's no compatibility break. On the pg_basebackup side, we see still need to add the terminating zero bytes if we're talking to an older server, but not when the server is v15+. Hopefully at some point we'll be able to remove some of this compatibility cruft, but it seems best to hang on to it for now. In passing, add a file header comment to bbstreamer_tar.c, to make it clearer what's going on here. Discussion: http://postgr.es/m/CA+TgmoZbNzsWwM4BE5Jb_qHncY817DYZwGf+2-7hkMQ27ZwsMQ@mail.gmail.com --- src/backend/replication/basebackup.c | 16 ++++++++++++++++ src/bin/pg_basebackup/bbstreamer_tar.c | 10 ++++++++++ src/bin/pg_basebackup/pg_basebackup.c | 25 +++++++++++++++++++------ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 38c82c4619..92430439f5 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -374,7 +374,16 @@ perform_base_backup(basebackup_options *opt, bbsink *sink) Assert(lnext(state.tablespaces, lc) == NULL); } else + { + /* Properly terminate the tarfile. */ + StaticAssertStmt(TAR_BLOCK_SIZE <= 2 * BLCKSZ, + "BLCKSZ too small for 2 tar blocks"); + memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE); + bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE); + + /* OK, that's the end of the archive. */ bbsink_end_archive(sink); + } } basebackup_progress_wait_wal_archive(&state); @@ -611,6 +620,13 @@ perform_base_backup(basebackup_options *opt, bbsink *sink) sendFileWithContent(sink, pathbuf, "", &manifest); } + /* Properly terminate the tar file. */ + StaticAssertStmt(TAR_BLOCK_SIZE <= 2 * BLCKSZ, + "BLCKSZ too small for 2 tar blocks"); + memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE); + bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE); + + /* OK, that's the end of the archive. */ bbsink_end_archive(sink); } diff --git a/src/bin/pg_basebackup/bbstreamer_tar.c b/src/bin/pg_basebackup/bbstreamer_tar.c index 5fded0f4e6..e6bd3ef52e 100644 --- a/src/bin/pg_basebackup/bbstreamer_tar.c +++ b/src/bin/pg_basebackup/bbstreamer_tar.c @@ -2,6 +2,16 @@ * * bbstreamer_tar.c * + * This module implements three types of tar processing. A tar parser + * expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits + * it into labelled chunks (any other value of bbstreamer_archive_context). + * A tar archiver does the reverse: it takes a bunch of labelled chunks + * and produces a tarfile, optionally replacing member headers and trailers + * so that upstream bbstreamer objects can perform surgery on the tarfile + * contents without knowing the details of the tar format. A tar terminator + * just adds two blocks of NUL bytes to the end of the file, since older + * server versions produce files with this terminator omitted. + * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 30efc03b83..1739ac6382 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -85,6 +85,12 @@ typedef void (*WriteDataCallback) (size_t nbytes, char *buf, */ #define MINIMUM_VERSION_FOR_MANIFESTS 130000 +/* + * Before v15, tar files received from the server will be improperly + * terminated. + */ +#define MINIMUM_VERSION_FOR_TERMINATED_TARFILE 150000 + /* * Different ways to include WAL */ @@ -166,7 +172,8 @@ static void progress_report(int tablespacenum, bool force, bool finished); static bbstreamer *CreateBackupStreamer(char *archive_name, char *spclocation, bbstreamer **manifest_inject_streamer_p, - bool is_recovery_guc_supported); + bool is_recovery_guc_supported, + bool expect_unterminated_tarfile); static void ReceiveTarFile(PGconn *conn, char *archive_name, char *spclocation, bool tablespacenum); static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data); @@ -965,7 +972,8 @@ ReceiveCopyData(PGconn *conn, WriteDataCallback callback, static bbstreamer * CreateBackupStreamer(char *archive_name, char *spclocation, bbstreamer **manifest_inject_streamer_p, - bool is_recovery_guc_supported) + bool is_recovery_guc_supported, + bool expect_unterminated_tarfile) { bbstreamer *streamer; bbstreamer *manifest_inject_streamer = NULL; @@ -1074,12 +1082,13 @@ CreateBackupStreamer(char *archive_name, char *spclocation, /* * If we're doing anything that involves understanding the contents of * the archive, we'll need to parse it. If not, we can skip parsing it, - * but the tar files the server sends are not properly terminated, so - * we'll need to add the terminator here. + * but old versions of the server send improperly terminated tarfiles, + * so if we're talking to such a server we'll need to add the terminator + * here. */ if (must_parse_archive) streamer = bbstreamer_tar_parser_new(streamer); - else + else if (expect_unterminated_tarfile) streamer = bbstreamer_tar_terminator_new(streamer); /* Return the results. */ @@ -1099,14 +1108,18 @@ ReceiveTarFile(PGconn *conn, char *archive_name, char *spclocation, WriteTarState state; bbstreamer *manifest_inject_streamer; bool is_recovery_guc_supported; + bool expect_unterminated_tarfile; /* Pass all COPY data through to the backup streamer. */ memset(&state, 0, sizeof(state)); is_recovery_guc_supported = PQserverVersion(conn) >= MINIMUM_VERSION_FOR_RECOVERY_GUC; + expect_unterminated_tarfile = + PQserverVersion(conn) < MINIMUM_VERSION_FOR_TERMINATED_TARFILE; state.streamer = CreateBackupStreamer(archive_name, spclocation, &manifest_inject_streamer, - is_recovery_guc_supported); + is_recovery_guc_supported, + expect_unterminated_tarfile); state.tablespacenum = tablespacenum; ReceiveCopyData(conn, ReceiveTarCopyChunk, &state); progress_filename = NULL;