515 lines
15 KiB
C
515 lines
15 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* bbstreamer_tar.c
|
|
*
|
|
* This module implements three types of tar processing. A tar parser
|
|
* expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits
|
|
* it into labelled chunks (any other value of bbstreamer_archive_context).
|
|
* A tar archiver does the reverse: it takes a bunch of labelled chunks
|
|
* and produces a tarfile, optionally replacing member headers and trailers
|
|
* so that upstream bbstreamer objects can perform surgery on the tarfile
|
|
* contents without knowing the details of the tar format. A tar terminator
|
|
* just adds two blocks of NUL bytes to the end of the file, since older
|
|
* server versions produce files with this terminator omitted.
|
|
*
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/bin/pg_basebackup/bbstreamer_tar.c
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres_fe.h"
|
|
|
|
#include <time.h>
|
|
|
|
#include "bbstreamer.h"
|
|
#include "common/logging.h"
|
|
#include "pgtar.h"
|
|
|
|
typedef struct bbstreamer_tar_parser
|
|
{
|
|
bbstreamer base;
|
|
bbstreamer_archive_context next_context;
|
|
bbstreamer_member member;
|
|
size_t file_bytes_sent;
|
|
size_t pad_bytes_expected;
|
|
} bbstreamer_tar_parser;
|
|
|
|
typedef struct bbstreamer_tar_archiver
|
|
{
|
|
bbstreamer base;
|
|
bool rearchive_member;
|
|
} bbstreamer_tar_archiver;
|
|
|
|
static void bbstreamer_tar_parser_content(bbstreamer *streamer,
|
|
bbstreamer_member *member,
|
|
const char *data, int len,
|
|
bbstreamer_archive_context context);
|
|
static void bbstreamer_tar_parser_finalize(bbstreamer *streamer);
|
|
static void bbstreamer_tar_parser_free(bbstreamer *streamer);
|
|
static bool bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer);
|
|
|
|
const bbstreamer_ops bbstreamer_tar_parser_ops = {
|
|
.content = bbstreamer_tar_parser_content,
|
|
.finalize = bbstreamer_tar_parser_finalize,
|
|
.free = bbstreamer_tar_parser_free
|
|
};
|
|
|
|
static void bbstreamer_tar_archiver_content(bbstreamer *streamer,
|
|
bbstreamer_member *member,
|
|
const char *data, int len,
|
|
bbstreamer_archive_context context);
|
|
static void bbstreamer_tar_archiver_finalize(bbstreamer *streamer);
|
|
static void bbstreamer_tar_archiver_free(bbstreamer *streamer);
|
|
|
|
const bbstreamer_ops bbstreamer_tar_archiver_ops = {
|
|
.content = bbstreamer_tar_archiver_content,
|
|
.finalize = bbstreamer_tar_archiver_finalize,
|
|
.free = bbstreamer_tar_archiver_free
|
|
};
|
|
|
|
static void bbstreamer_tar_terminator_content(bbstreamer *streamer,
|
|
bbstreamer_member *member,
|
|
const char *data, int len,
|
|
bbstreamer_archive_context context);
|
|
static void bbstreamer_tar_terminator_finalize(bbstreamer *streamer);
|
|
static void bbstreamer_tar_terminator_free(bbstreamer *streamer);
|
|
|
|
const bbstreamer_ops bbstreamer_tar_terminator_ops = {
|
|
.content = bbstreamer_tar_terminator_content,
|
|
.finalize = bbstreamer_tar_terminator_finalize,
|
|
.free = bbstreamer_tar_terminator_free
|
|
};
|
|
|
|
/*
|
|
* Create a bbstreamer that can parse a stream of content as tar data.
|
|
*
|
|
* The input should be a series of BBSTREAMER_UNKNOWN chunks; the bbstreamer
|
|
* specified by 'next' will receive a series of typed chunks, as per the
|
|
* conventions described in bbstreamer.h.
|
|
*/
|
|
extern bbstreamer *
|
|
bbstreamer_tar_parser_new(bbstreamer *next)
|
|
{
|
|
bbstreamer_tar_parser *streamer;
|
|
|
|
streamer = palloc0(sizeof(bbstreamer_tar_parser));
|
|
*((const bbstreamer_ops **) &streamer->base.bbs_ops) =
|
|
&bbstreamer_tar_parser_ops;
|
|
streamer->base.bbs_next = next;
|
|
initStringInfo(&streamer->base.bbs_buffer);
|
|
streamer->next_context = BBSTREAMER_MEMBER_HEADER;
|
|
|
|
return &streamer->base;
|
|
}
|
|
|
|
/*
|
|
* Parse unknown content as tar data.
|
|
*/
|
|
static void
|
|
bbstreamer_tar_parser_content(bbstreamer *streamer, bbstreamer_member *member,
|
|
const char *data, int len,
|
|
bbstreamer_archive_context context)
|
|
{
|
|
bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
|
|
size_t nbytes;
|
|
|
|
/* Expect unparsed input. */
|
|
Assert(member == NULL);
|
|
Assert(context == BBSTREAMER_UNKNOWN);
|
|
|
|
while (len > 0)
|
|
{
|
|
switch (mystreamer->next_context)
|
|
{
|
|
case BBSTREAMER_MEMBER_HEADER:
|
|
|
|
/*
|
|
* If we're expecting an archive member header, accumulate a
|
|
* full block of data before doing anything further.
|
|
*/
|
|
if (!bbstreamer_buffer_until(streamer, &data, &len,
|
|
TAR_BLOCK_SIZE))
|
|
return;
|
|
|
|
/*
|
|
* Now we can process the header and get ready to process the
|
|
* file contents; however, we might find out that what we
|
|
* thought was the next file header is actually the start of
|
|
* the archive trailer. Switch modes accordingly.
|
|
*/
|
|
if (bbstreamer_tar_header(mystreamer))
|
|
{
|
|
if (mystreamer->member.size == 0)
|
|
{
|
|
/* No content; trailer is zero-length. */
|
|
bbstreamer_content(mystreamer->base.bbs_next,
|
|
&mystreamer->member,
|
|
NULL, 0,
|
|
BBSTREAMER_MEMBER_TRAILER);
|
|
|
|
/* Expect next header. */
|
|
mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
|
|
}
|
|
else
|
|
{
|
|
/* Expect contents. */
|
|
mystreamer->next_context = BBSTREAMER_MEMBER_CONTENTS;
|
|
}
|
|
mystreamer->base.bbs_buffer.len = 0;
|
|
mystreamer->file_bytes_sent = 0;
|
|
}
|
|
else
|
|
mystreamer->next_context = BBSTREAMER_ARCHIVE_TRAILER;
|
|
break;
|
|
|
|
case BBSTREAMER_MEMBER_CONTENTS:
|
|
|
|
/*
|
|
* Send as much content as we have, but not more than the
|
|
* remaining file length.
|
|
*/
|
|
Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
|
|
nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
|
|
nbytes = Min(nbytes, len);
|
|
Assert(nbytes > 0);
|
|
bbstreamer_content(mystreamer->base.bbs_next,
|
|
&mystreamer->member,
|
|
data, nbytes,
|
|
BBSTREAMER_MEMBER_CONTENTS);
|
|
mystreamer->file_bytes_sent += nbytes;
|
|
data += nbytes;
|
|
len -= nbytes;
|
|
|
|
/*
|
|
* If we've not yet sent the whole file, then there's more
|
|
* content to come; otherwise, it's time to expect the file
|
|
* trailer.
|
|
*/
|
|
Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
|
|
if (mystreamer->file_bytes_sent == mystreamer->member.size)
|
|
{
|
|
if (mystreamer->pad_bytes_expected == 0)
|
|
{
|
|
/* Trailer is zero-length. */
|
|
bbstreamer_content(mystreamer->base.bbs_next,
|
|
&mystreamer->member,
|
|
NULL, 0,
|
|
BBSTREAMER_MEMBER_TRAILER);
|
|
|
|
/* Expect next header. */
|
|
mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
|
|
}
|
|
else
|
|
{
|
|
/* Trailer is not zero-length. */
|
|
mystreamer->next_context = BBSTREAMER_MEMBER_TRAILER;
|
|
}
|
|
mystreamer->base.bbs_buffer.len = 0;
|
|
}
|
|
break;
|
|
|
|
case BBSTREAMER_MEMBER_TRAILER:
|
|
|
|
/*
|
|
* If we're expecting an archive member trailer, accumulate
|
|
* the expected number of padding bytes before sending
|
|
* anything onward.
|
|
*/
|
|
if (!bbstreamer_buffer_until(streamer, &data, &len,
|
|
mystreamer->pad_bytes_expected))
|
|
return;
|
|
|
|
/* OK, now we can send it. */
|
|
bbstreamer_content(mystreamer->base.bbs_next,
|
|
&mystreamer->member,
|
|
data, mystreamer->pad_bytes_expected,
|
|
BBSTREAMER_MEMBER_TRAILER);
|
|
|
|
/* Expect next file header. */
|
|
mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
|
|
mystreamer->base.bbs_buffer.len = 0;
|
|
break;
|
|
|
|
case BBSTREAMER_ARCHIVE_TRAILER:
|
|
|
|
/*
|
|
* We've seen an end-of-archive indicator, so anything more is
|
|
* buffered and sent as part of the archive trailer. But we
|
|
* don't expect more than 2 blocks.
|
|
*/
|
|
bbstreamer_buffer_bytes(streamer, &data, &len, len);
|
|
if (len > 2 * TAR_BLOCK_SIZE)
|
|
pg_fatal("tar file trailer exceeds 2 blocks");
|
|
return;
|
|
|
|
default:
|
|
/* Shouldn't happen. */
|
|
pg_fatal("unexpected state while parsing tar archive");
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Parse a file header within a tar stream.
|
|
*
|
|
* The return value is true if we found a file header and passed it on to the
|
|
* next bbstreamer; it is false if we have reached the archive trailer.
|
|
*/
|
|
static bool
|
|
bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer)
|
|
{
|
|
bool has_nonzero_byte = false;
|
|
int i;
|
|
bbstreamer_member *member = &mystreamer->member;
|
|
char *buffer = mystreamer->base.bbs_buffer.data;
|
|
|
|
Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
|
|
|
|
/* Check whether we've got a block of all zero bytes. */
|
|
for (i = 0; i < TAR_BLOCK_SIZE; ++i)
|
|
{
|
|
if (buffer[i] != '\0')
|
|
{
|
|
has_nonzero_byte = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If the entire block was zeros, this is the end of the archive, not the
|
|
* start of the next file.
|
|
*/
|
|
if (!has_nonzero_byte)
|
|
return false;
|
|
|
|
/*
|
|
* Parse key fields out of the header.
|
|
*/
|
|
strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
|
|
if (member->pathname[0] == '\0')
|
|
pg_fatal("tar member has empty name");
|
|
member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
|
|
member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
|
|
member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
|
|
member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
|
|
member->is_directory =
|
|
(buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
|
|
member->is_link =
|
|
(buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
|
|
if (member->is_link)
|
|
strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
|
|
|
|
/* Compute number of padding bytes. */
|
|
mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
|
|
|
|
/* Forward the entire header to the next bbstreamer. */
|
|
bbstreamer_content(mystreamer->base.bbs_next, member,
|
|
buffer, TAR_BLOCK_SIZE,
|
|
BBSTREAMER_MEMBER_HEADER);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* End-of-stream processing for a tar parser.
|
|
*/
|
|
static void
|
|
bbstreamer_tar_parser_finalize(bbstreamer *streamer)
|
|
{
|
|
bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
|
|
|
|
if (mystreamer->next_context != BBSTREAMER_ARCHIVE_TRAILER &&
|
|
(mystreamer->next_context != BBSTREAMER_MEMBER_HEADER ||
|
|
mystreamer->base.bbs_buffer.len > 0))
|
|
pg_fatal("COPY stream ended before last file was finished");
|
|
|
|
/* Send the archive trailer, even if empty. */
|
|
bbstreamer_content(streamer->bbs_next, NULL,
|
|
streamer->bbs_buffer.data, streamer->bbs_buffer.len,
|
|
BBSTREAMER_ARCHIVE_TRAILER);
|
|
|
|
/* Now finalize successor. */
|
|
bbstreamer_finalize(streamer->bbs_next);
|
|
}
|
|
|
|
/*
|
|
* Free memory associated with a tar parser.
|
|
*/
|
|
static void
|
|
bbstreamer_tar_parser_free(bbstreamer *streamer)
|
|
{
|
|
pfree(streamer->bbs_buffer.data);
|
|
bbstreamer_free(streamer->bbs_next);
|
|
}
|
|
|
|
/*
|
|
* Create a bbstreamer that can generate a tar archive.
|
|
*
|
|
* This is intended to be usable either for generating a brand-new tar archive
|
|
* or for modifying one on the fly. The input should be a series of typed
|
|
* chunks (i.e. not BBSTREAMER_UNKNOWN). See also the comments for
|
|
* bbstreamer_tar_parser_content.
|
|
*/
|
|
extern bbstreamer *
|
|
bbstreamer_tar_archiver_new(bbstreamer *next)
|
|
{
|
|
bbstreamer_tar_archiver *streamer;
|
|
|
|
streamer = palloc0(sizeof(bbstreamer_tar_archiver));
|
|
*((const bbstreamer_ops **) &streamer->base.bbs_ops) =
|
|
&bbstreamer_tar_archiver_ops;
|
|
streamer->base.bbs_next = next;
|
|
|
|
return &streamer->base;
|
|
}
|
|
|
|
/*
|
|
* Fix up the stream of input chunks to create a valid tar file.
|
|
*
|
|
* If a BBSTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
|
|
* newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
|
|
* passed through without change. Any other size is a fatal error (and
|
|
* indicates a bug).
|
|
*
|
|
* Whenever a new BBSTREAMER_MEMBER_HEADER chunk is constructed, the
|
|
* corresponding BBSTREAMER_MEMBER_TRAILER chunk is also constructed from
|
|
* scratch. Specifically, we construct a block of zero bytes sufficient to
|
|
* pad out to a block boundary, as required by the tar format. Other
|
|
* BBSTREAMER_MEMBER_TRAILER chunks are passed through without change.
|
|
*
|
|
* Any BBSTREAMER_MEMBER_CONTENTS chunks are passed through without change.
|
|
*
|
|
* The BBSTREAMER_ARCHIVE_TRAILER chunk is replaced with two
|
|
* blocks of zero bytes. Not all tar programs require this, but apparently
|
|
* some do. The server does not supply this trailer. If no archive trailer is
|
|
* present, one will be added by bbstreamer_tar_parser_finalize.
|
|
*/
|
|
static void
|
|
bbstreamer_tar_archiver_content(bbstreamer *streamer,
|
|
bbstreamer_member *member,
|
|
const char *data, int len,
|
|
bbstreamer_archive_context context)
|
|
{
|
|
bbstreamer_tar_archiver *mystreamer = (bbstreamer_tar_archiver *) streamer;
|
|
char buffer[2 * TAR_BLOCK_SIZE];
|
|
|
|
Assert(context != BBSTREAMER_UNKNOWN);
|
|
|
|
if (context == BBSTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
|
|
{
|
|
Assert(len == 0);
|
|
|
|
/* Replace zero-length tar header with a newly constructed one. */
|
|
tarCreateHeader(buffer, member->pathname, NULL,
|
|
member->size, member->mode, member->uid, member->gid,
|
|
time(NULL));
|
|
data = buffer;
|
|
len = TAR_BLOCK_SIZE;
|
|
|
|
/* Also make a note to replace padding, in case size changed. */
|
|
mystreamer->rearchive_member = true;
|
|
}
|
|
else if (context == BBSTREAMER_MEMBER_TRAILER &&
|
|
mystreamer->rearchive_member)
|
|
{
|
|
int pad_bytes = tarPaddingBytesRequired(member->size);
|
|
|
|
/* Also replace padding, if we regenerated the header. */
|
|
memset(buffer, 0, pad_bytes);
|
|
data = buffer;
|
|
len = pad_bytes;
|
|
|
|
/* Don't do this again unless we replace another header. */
|
|
mystreamer->rearchive_member = false;
|
|
}
|
|
else if (context == BBSTREAMER_ARCHIVE_TRAILER)
|
|
{
|
|
/* Trailer should always be two blocks of zero bytes. */
|
|
memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
|
|
data = buffer;
|
|
len = 2 * TAR_BLOCK_SIZE;
|
|
}
|
|
|
|
bbstreamer_content(streamer->bbs_next, member, data, len, context);
|
|
}
|
|
|
|
/*
|
|
* End-of-stream processing for a tar archiver.
|
|
*/
|
|
static void
|
|
bbstreamer_tar_archiver_finalize(bbstreamer *streamer)
|
|
{
|
|
bbstreamer_finalize(streamer->bbs_next);
|
|
}
|
|
|
|
/*
|
|
* Free memory associated with a tar archiver.
|
|
*/
|
|
static void
|
|
bbstreamer_tar_archiver_free(bbstreamer *streamer)
|
|
{
|
|
bbstreamer_free(streamer->bbs_next);
|
|
pfree(streamer);
|
|
}
|
|
|
|
/*
|
|
* Create a bbstreamer that blindly adds two blocks of NUL bytes to the
|
|
* end of an incomplete tarfile that the server might send us.
|
|
*/
|
|
bbstreamer *
|
|
bbstreamer_tar_terminator_new(bbstreamer *next)
|
|
{
|
|
bbstreamer *streamer;
|
|
|
|
streamer = palloc0(sizeof(bbstreamer));
|
|
*((const bbstreamer_ops **) &streamer->bbs_ops) =
|
|
&bbstreamer_tar_terminator_ops;
|
|
streamer->bbs_next = next;
|
|
|
|
return streamer;
|
|
}
|
|
|
|
/*
|
|
* Pass all the content through without change.
|
|
*/
|
|
static void
|
|
bbstreamer_tar_terminator_content(bbstreamer *streamer,
|
|
bbstreamer_member *member,
|
|
const char *data, int len,
|
|
bbstreamer_archive_context context)
|
|
{
|
|
/* Expect unparsed input. */
|
|
Assert(member == NULL);
|
|
Assert(context == BBSTREAMER_UNKNOWN);
|
|
|
|
/* Just forward it. */
|
|
bbstreamer_content(streamer->bbs_next, member, data, len, context);
|
|
}
|
|
|
|
/*
|
|
* At the end, blindly add the two blocks of NUL bytes which the server fails
|
|
* to supply.
|
|
*/
|
|
static void
|
|
bbstreamer_tar_terminator_finalize(bbstreamer *streamer)
|
|
{
|
|
char buffer[2 * TAR_BLOCK_SIZE];
|
|
|
|
memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
|
|
bbstreamer_content(streamer->bbs_next, NULL, buffer,
|
|
2 * TAR_BLOCK_SIZE, BBSTREAMER_UNKNOWN);
|
|
bbstreamer_finalize(streamer->bbs_next);
|
|
}
|
|
|
|
/*
|
|
* Free memory associated with a tar terminator.
|
|
*/
|
|
static void
|
|
bbstreamer_tar_terminator_free(bbstreamer *streamer)
|
|
{
|
|
bbstreamer_free(streamer->bbs_next);
|
|
pfree(streamer);
|
|
}
|