postgresql/src/bin/pg_basebackup/bbstreamer_tar.c

515 lines
15 KiB
C

/*-------------------------------------------------------------------------
*
* bbstreamer_tar.c
*
* This module implements three types of tar processing. A tar parser
* expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits
* it into labelled chunks (any other value of bbstreamer_archive_context).
* A tar archiver does the reverse: it takes a bunch of labelled chunks
* and produces a tarfile, optionally replacing member headers and trailers
* so that upstream bbstreamer objects can perform surgery on the tarfile
* contents without knowing the details of the tar format. A tar terminator
* just adds two blocks of NUL bytes to the end of the file, since older
* server versions produce files with this terminator omitted.
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/bin/pg_basebackup/bbstreamer_tar.c
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <time.h>
#include "bbstreamer.h"
#include "common/logging.h"
#include "pgtar.h"
typedef struct bbstreamer_tar_parser
{
bbstreamer base;
bbstreamer_archive_context next_context;
bbstreamer_member member;
size_t file_bytes_sent;
size_t pad_bytes_expected;
} bbstreamer_tar_parser;
typedef struct bbstreamer_tar_archiver
{
bbstreamer base;
bool rearchive_member;
} bbstreamer_tar_archiver;
static void bbstreamer_tar_parser_content(bbstreamer *streamer,
bbstreamer_member *member,
const char *data, int len,
bbstreamer_archive_context context);
static void bbstreamer_tar_parser_finalize(bbstreamer *streamer);
static void bbstreamer_tar_parser_free(bbstreamer *streamer);
static bool bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer);
const bbstreamer_ops bbstreamer_tar_parser_ops = {
.content = bbstreamer_tar_parser_content,
.finalize = bbstreamer_tar_parser_finalize,
.free = bbstreamer_tar_parser_free
};
static void bbstreamer_tar_archiver_content(bbstreamer *streamer,
bbstreamer_member *member,
const char *data, int len,
bbstreamer_archive_context context);
static void bbstreamer_tar_archiver_finalize(bbstreamer *streamer);
static void bbstreamer_tar_archiver_free(bbstreamer *streamer);
const bbstreamer_ops bbstreamer_tar_archiver_ops = {
.content = bbstreamer_tar_archiver_content,
.finalize = bbstreamer_tar_archiver_finalize,
.free = bbstreamer_tar_archiver_free
};
static void bbstreamer_tar_terminator_content(bbstreamer *streamer,
bbstreamer_member *member,
const char *data, int len,
bbstreamer_archive_context context);
static void bbstreamer_tar_terminator_finalize(bbstreamer *streamer);
static void bbstreamer_tar_terminator_free(bbstreamer *streamer);
const bbstreamer_ops bbstreamer_tar_terminator_ops = {
.content = bbstreamer_tar_terminator_content,
.finalize = bbstreamer_tar_terminator_finalize,
.free = bbstreamer_tar_terminator_free
};
/*
* Create a bbstreamer that can parse a stream of content as tar data.
*
* The input should be a series of BBSTREAMER_UNKNOWN chunks; the bbstreamer
* specified by 'next' will receive a series of typed chunks, as per the
* conventions described in bbstreamer.h.
*/
extern bbstreamer *
bbstreamer_tar_parser_new(bbstreamer *next)
{
bbstreamer_tar_parser *streamer;
streamer = palloc0(sizeof(bbstreamer_tar_parser));
*((const bbstreamer_ops **) &streamer->base.bbs_ops) =
&bbstreamer_tar_parser_ops;
streamer->base.bbs_next = next;
initStringInfo(&streamer->base.bbs_buffer);
streamer->next_context = BBSTREAMER_MEMBER_HEADER;
return &streamer->base;
}
/*
* Parse unknown content as tar data.
*/
static void
bbstreamer_tar_parser_content(bbstreamer *streamer, bbstreamer_member *member,
const char *data, int len,
bbstreamer_archive_context context)
{
bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
size_t nbytes;
/* Expect unparsed input. */
Assert(member == NULL);
Assert(context == BBSTREAMER_UNKNOWN);
while (len > 0)
{
switch (mystreamer->next_context)
{
case BBSTREAMER_MEMBER_HEADER:
/*
* If we're expecting an archive member header, accumulate a
* full block of data before doing anything further.
*/
if (!bbstreamer_buffer_until(streamer, &data, &len,
TAR_BLOCK_SIZE))
return;
/*
* Now we can process the header and get ready to process the
* file contents; however, we might find out that what we
* thought was the next file header is actually the start of
* the archive trailer. Switch modes accordingly.
*/
if (bbstreamer_tar_header(mystreamer))
{
if (mystreamer->member.size == 0)
{
/* No content; trailer is zero-length. */
bbstreamer_content(mystreamer->base.bbs_next,
&mystreamer->member,
NULL, 0,
BBSTREAMER_MEMBER_TRAILER);
/* Expect next header. */
mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
}
else
{
/* Expect contents. */
mystreamer->next_context = BBSTREAMER_MEMBER_CONTENTS;
}
mystreamer->base.bbs_buffer.len = 0;
mystreamer->file_bytes_sent = 0;
}
else
mystreamer->next_context = BBSTREAMER_ARCHIVE_TRAILER;
break;
case BBSTREAMER_MEMBER_CONTENTS:
/*
* Send as much content as we have, but not more than the
* remaining file length.
*/
Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
nbytes = Min(nbytes, len);
Assert(nbytes > 0);
bbstreamer_content(mystreamer->base.bbs_next,
&mystreamer->member,
data, nbytes,
BBSTREAMER_MEMBER_CONTENTS);
mystreamer->file_bytes_sent += nbytes;
data += nbytes;
len -= nbytes;
/*
* If we've not yet sent the whole file, then there's more
* content to come; otherwise, it's time to expect the file
* trailer.
*/
Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
if (mystreamer->file_bytes_sent == mystreamer->member.size)
{
if (mystreamer->pad_bytes_expected == 0)
{
/* Trailer is zero-length. */
bbstreamer_content(mystreamer->base.bbs_next,
&mystreamer->member,
NULL, 0,
BBSTREAMER_MEMBER_TRAILER);
/* Expect next header. */
mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
}
else
{
/* Trailer is not zero-length. */
mystreamer->next_context = BBSTREAMER_MEMBER_TRAILER;
}
mystreamer->base.bbs_buffer.len = 0;
}
break;
case BBSTREAMER_MEMBER_TRAILER:
/*
* If we're expecting an archive member trailer, accumulate
* the expected number of padding bytes before sending
* anything onward.
*/
if (!bbstreamer_buffer_until(streamer, &data, &len,
mystreamer->pad_bytes_expected))
return;
/* OK, now we can send it. */
bbstreamer_content(mystreamer->base.bbs_next,
&mystreamer->member,
data, mystreamer->pad_bytes_expected,
BBSTREAMER_MEMBER_TRAILER);
/* Expect next file header. */
mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
mystreamer->base.bbs_buffer.len = 0;
break;
case BBSTREAMER_ARCHIVE_TRAILER:
/*
* We've seen an end-of-archive indicator, so anything more is
* buffered and sent as part of the archive trailer. But we
* don't expect more than 2 blocks.
*/
bbstreamer_buffer_bytes(streamer, &data, &len, len);
if (len > 2 * TAR_BLOCK_SIZE)
pg_fatal("tar file trailer exceeds 2 blocks");
return;
default:
/* Shouldn't happen. */
pg_fatal("unexpected state while parsing tar archive");
}
}
}
/*
* Parse a file header within a tar stream.
*
* The return value is true if we found a file header and passed it on to the
* next bbstreamer; it is false if we have reached the archive trailer.
*/
static bool
bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer)
{
bool has_nonzero_byte = false;
int i;
bbstreamer_member *member = &mystreamer->member;
char *buffer = mystreamer->base.bbs_buffer.data;
Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
/* Check whether we've got a block of all zero bytes. */
for (i = 0; i < TAR_BLOCK_SIZE; ++i)
{
if (buffer[i] != '\0')
{
has_nonzero_byte = true;
break;
}
}
/*
* If the entire block was zeros, this is the end of the archive, not the
* start of the next file.
*/
if (!has_nonzero_byte)
return false;
/*
* Parse key fields out of the header.
*/
strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
if (member->pathname[0] == '\0')
pg_fatal("tar member has empty name");
member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
member->is_directory =
(buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
member->is_link =
(buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
if (member->is_link)
strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
/* Compute number of padding bytes. */
mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
/* Forward the entire header to the next bbstreamer. */
bbstreamer_content(mystreamer->base.bbs_next, member,
buffer, TAR_BLOCK_SIZE,
BBSTREAMER_MEMBER_HEADER);
return true;
}
/*
* End-of-stream processing for a tar parser.
*/
static void
bbstreamer_tar_parser_finalize(bbstreamer *streamer)
{
bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
if (mystreamer->next_context != BBSTREAMER_ARCHIVE_TRAILER &&
(mystreamer->next_context != BBSTREAMER_MEMBER_HEADER ||
mystreamer->base.bbs_buffer.len > 0))
pg_fatal("COPY stream ended before last file was finished");
/* Send the archive trailer, even if empty. */
bbstreamer_content(streamer->bbs_next, NULL,
streamer->bbs_buffer.data, streamer->bbs_buffer.len,
BBSTREAMER_ARCHIVE_TRAILER);
/* Now finalize successor. */
bbstreamer_finalize(streamer->bbs_next);
}
/*
* Free memory associated with a tar parser.
*/
static void
bbstreamer_tar_parser_free(bbstreamer *streamer)
{
pfree(streamer->bbs_buffer.data);
bbstreamer_free(streamer->bbs_next);
}
/*
* Create a bbstreamer that can generate a tar archive.
*
* This is intended to be usable either for generating a brand-new tar archive
* or for modifying one on the fly. The input should be a series of typed
* chunks (i.e. not BBSTREAMER_UNKNOWN). See also the comments for
* bbstreamer_tar_parser_content.
*/
extern bbstreamer *
bbstreamer_tar_archiver_new(bbstreamer *next)
{
bbstreamer_tar_archiver *streamer;
streamer = palloc0(sizeof(bbstreamer_tar_archiver));
*((const bbstreamer_ops **) &streamer->base.bbs_ops) =
&bbstreamer_tar_archiver_ops;
streamer->base.bbs_next = next;
return &streamer->base;
}
/*
* Fix up the stream of input chunks to create a valid tar file.
*
* If a BBSTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
* newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
* passed through without change. Any other size is a fatal error (and
* indicates a bug).
*
* Whenever a new BBSTREAMER_MEMBER_HEADER chunk is constructed, the
* corresponding BBSTREAMER_MEMBER_TRAILER chunk is also constructed from
* scratch. Specifically, we construct a block of zero bytes sufficient to
* pad out to a block boundary, as required by the tar format. Other
* BBSTREAMER_MEMBER_TRAILER chunks are passed through without change.
*
* Any BBSTREAMER_MEMBER_CONTENTS chunks are passed through without change.
*
* The BBSTREAMER_ARCHIVE_TRAILER chunk is replaced with two
* blocks of zero bytes. Not all tar programs require this, but apparently
* some do. The server does not supply this trailer. If no archive trailer is
* present, one will be added by bbstreamer_tar_parser_finalize.
*/
static void
bbstreamer_tar_archiver_content(bbstreamer *streamer,
bbstreamer_member *member,
const char *data, int len,
bbstreamer_archive_context context)
{
bbstreamer_tar_archiver *mystreamer = (bbstreamer_tar_archiver *) streamer;
char buffer[2 * TAR_BLOCK_SIZE];
Assert(context != BBSTREAMER_UNKNOWN);
if (context == BBSTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
{
Assert(len == 0);
/* Replace zero-length tar header with a newly constructed one. */
tarCreateHeader(buffer, member->pathname, NULL,
member->size, member->mode, member->uid, member->gid,
time(NULL));
data = buffer;
len = TAR_BLOCK_SIZE;
/* Also make a note to replace padding, in case size changed. */
mystreamer->rearchive_member = true;
}
else if (context == BBSTREAMER_MEMBER_TRAILER &&
mystreamer->rearchive_member)
{
int pad_bytes = tarPaddingBytesRequired(member->size);
/* Also replace padding, if we regenerated the header. */
memset(buffer, 0, pad_bytes);
data = buffer;
len = pad_bytes;
/* Don't do this again unless we replace another header. */
mystreamer->rearchive_member = false;
}
else if (context == BBSTREAMER_ARCHIVE_TRAILER)
{
/* Trailer should always be two blocks of zero bytes. */
memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
data = buffer;
len = 2 * TAR_BLOCK_SIZE;
}
bbstreamer_content(streamer->bbs_next, member, data, len, context);
}
/*
* End-of-stream processing for a tar archiver.
*/
static void
bbstreamer_tar_archiver_finalize(bbstreamer *streamer)
{
bbstreamer_finalize(streamer->bbs_next);
}
/*
* Free memory associated with a tar archiver.
*/
static void
bbstreamer_tar_archiver_free(bbstreamer *streamer)
{
bbstreamer_free(streamer->bbs_next);
pfree(streamer);
}
/*
* Create a bbstreamer that blindly adds two blocks of NUL bytes to the
* end of an incomplete tarfile that the server might send us.
*/
bbstreamer *
bbstreamer_tar_terminator_new(bbstreamer *next)
{
bbstreamer *streamer;
streamer = palloc0(sizeof(bbstreamer));
*((const bbstreamer_ops **) &streamer->bbs_ops) =
&bbstreamer_tar_terminator_ops;
streamer->bbs_next = next;
return streamer;
}
/*
* Pass all the content through without change.
*/
static void
bbstreamer_tar_terminator_content(bbstreamer *streamer,
bbstreamer_member *member,
const char *data, int len,
bbstreamer_archive_context context)
{
/* Expect unparsed input. */
Assert(member == NULL);
Assert(context == BBSTREAMER_UNKNOWN);
/* Just forward it. */
bbstreamer_content(streamer->bbs_next, member, data, len, context);
}
/*
* At the end, blindly add the two blocks of NUL bytes which the server fails
* to supply.
*/
static void
bbstreamer_tar_terminator_finalize(bbstreamer *streamer)
{
char buffer[2 * TAR_BLOCK_SIZE];
memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
bbstreamer_content(streamer->bbs_next, NULL, buffer,
2 * TAR_BLOCK_SIZE, BBSTREAMER_UNKNOWN);
bbstreamer_finalize(streamer->bbs_next);
}
/*
* Free memory associated with a tar terminator.
*/
static void
bbstreamer_tar_terminator_free(bbstreamer *streamer)
{
bbstreamer_free(streamer->bbs_next);
pfree(streamer);
}