postgresql/src/backend/backup/basebackup.c

1851 lines
52 KiB
C

/*-------------------------------------------------------------------------
*
* basebackup.c
* code for taking a base backup and streaming it to a standby
*
* Portions Copyright (c) 2010-2023, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/backup/basebackup.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <sys/stat.h>
#include <unistd.h>
#include <time.h>
#include "access/xlog_internal.h"
#include "access/xlogbackup.h"
#include "backup/backup_manifest.h"
#include "backup/basebackup.h"
#include "backup/basebackup_sink.h"
#include "backup/basebackup_target.h"
#include "commands/defrem.h"
#include "common/compression.h"
#include "common/file_perm.h"
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "nodes/pg_list.h"
#include "pgstat.h"
#include "pgtar.h"
#include "port.h"
#include "postmaster/syslogger.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
#include "storage/bufpage.h"
#include "storage/checksum.h"
#include "storage/dsm_impl.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/reinit.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/ps_status.h"
#include "utils/relcache.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
/*
* How much data do we want to send in one CopyData message? Note that
* this may also result in reading the underlying files in chunks of this
* size.
*
* NB: The buffer size is required to be a multiple of the system block
* size, so use that value instead if it's bigger than our preference.
*/
#define SINK_BUFFER_LENGTH Max(32768, BLCKSZ)
typedef struct
{
const char *label;
bool progress;
bool fastcheckpoint;
bool nowait;
bool includewal;
uint32 maxrate;
bool sendtblspcmapfile;
bool send_to_client;
bool use_copytblspc;
BaseBackupTargetHandle *target_handle;
backup_manifest_option manifest;
pg_compress_algorithm compression;
pg_compress_specification compression_specification;
pg_checksum_type manifest_checksum_type;
} basebackup_options;
static int64 sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
struct backup_manifest_info *manifest);
static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
List *tablespaces, bool sendtblspclinks,
backup_manifest_info *manifest, const char *spcoid);
static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
struct stat *statbuf, bool missing_ok, Oid dboid,
backup_manifest_info *manifest, const char *spcoid);
static void sendFileWithContent(bbsink *sink, const char *filename,
const char *content,
backup_manifest_info *manifest);
static int64 _tarWriteHeader(bbsink *sink, const char *filename,
const char *linktarget, struct stat *statbuf,
bool sizeonly);
static void _tarWritePadding(bbsink *sink, int len);
static void convert_link_to_directory(const char *pathbuf, struct stat *statbuf);
static void perform_base_backup(basebackup_options *opt, bbsink *sink);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static int compareWalFileNames(const ListCell *a, const ListCell *b);
static bool is_checksummed_file(const char *fullpath, const char *filename);
static int basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
const char *filename, bool partial_read_ok);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
/* Total number of checksum failures during base backup. */
static long long int total_checksum_failures;
/* Do not verify checksums. */
static bool noverify_checksums = false;
/*
* Definition of one element part of an exclusion list, used for paths part
* of checksum validation or base backups. "name" is the name of the file
* or path to check for exclusion. If "match_prefix" is true, any items
* matching the name as prefix are excluded.
*/
struct exclude_list_item
{
const char *name;
bool match_prefix;
};
/*
* The contents of these directories are removed or recreated during server
* start so they are not included in backups. The directories themselves are
* kept and included as empty to preserve access permissions.
*
* Note: this list should be kept in sync with the filter lists in pg_rewind's
* filemap.c.
*/
static const char *const excludeDirContents[] =
{
/*
* Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
* because extensions like pg_stat_statements store data there.
*/
PG_STAT_TMP_DIR,
/*
* It is generally not useful to backup the contents of this directory
* even if the intention is to restore to another primary. See backup.sgml
* for a more detailed description.
*/
"pg_replslot",
/* Contents removed on startup, see dsm_cleanup_for_mmap(). */
PG_DYNSHMEM_DIR,
/* Contents removed on startup, see AsyncShmemInit(). */
"pg_notify",
/*
* Old contents are loaded for possible debugging but are not required for
* normal operation, see SerialInit().
*/
"pg_serial",
/* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
"pg_snapshots",
/* Contents zeroed on startup, see StartupSUBTRANS(). */
"pg_subtrans",
/* end of list */
NULL
};
/*
* List of files excluded from backups.
*/
static const struct exclude_list_item excludeFiles[] =
{
/* Skip auto conf temporary file. */
{PG_AUTOCONF_FILENAME ".tmp", false},
/* Skip current log file temporary file */
{LOG_METAINFO_DATAFILE_TMP, false},
/*
* Skip relation cache because it is rebuilt on startup. This includes
* temporary files.
*/
{RELCACHE_INIT_FILENAME, true},
/*
* backup_label and tablespace_map should not exist in a running cluster
* capable of doing an online backup, but exclude them just in case.
*/
{BACKUP_LABEL_FILE, false},
{TABLESPACE_MAP, false},
/*
* If there's a backup_manifest, it belongs to a backup that was used to
* start this server. It is *not* correct for this backup. Our
* backup_manifest is injected into the backup separately if users want
* it.
*/
{"backup_manifest", false},
{"postmaster.pid", false},
{"postmaster.opts", false},
/* end of list */
{NULL, false}
};
/*
* List of files excluded from checksum validation.
*
* Note: this list should be kept in sync with what pg_checksums.c
* includes.
*/
static const struct exclude_list_item noChecksumFiles[] = {
{"pg_control", false},
{"pg_filenode.map", false},
{"pg_internal.init", true},
{"PG_VERSION", false},
#ifdef EXEC_BACKEND
{"config_exec_params", true},
#endif
{NULL, false}
};
/*
* Actually do a base backup for the specified tablespaces.
*
* This is split out mainly to avoid complaints about "variable might be
* clobbered by longjmp" from stupider versions of gcc.
*/
static void
perform_base_backup(basebackup_options *opt, bbsink *sink)
{
bbsink_state state;
XLogRecPtr endptr;
TimeLineID endtli;
backup_manifest_info manifest;
BackupState *backup_state;
StringInfo tablespace_map;
/* Initial backup state, insofar as we know it now. */
state.tablespaces = NIL;
state.tablespace_num = 0;
state.bytes_done = 0;
state.bytes_total = 0;
state.bytes_total_is_valid = false;
/* we're going to use a BufFile, so we need a ResourceOwner */
Assert(CurrentResourceOwner == NULL);
CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup");
backup_started_in_recovery = RecoveryInProgress();
InitializeBackupManifest(&manifest, opt->manifest,
opt->manifest_checksum_type);
total_checksum_failures = 0;
/* Allocate backup related variables. */
backup_state = (BackupState *) palloc0(sizeof(BackupState));
tablespace_map = makeStringInfo();
basebackup_progress_wait_checkpoint();
do_pg_backup_start(opt->label, opt->fastcheckpoint, &state.tablespaces,
backup_state, tablespace_map);
state.startptr = backup_state->startpoint;
state.starttli = backup_state->starttli;
/*
* Once do_pg_backup_start has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this
* reason, *all* functionality between do_pg_backup_start() and the end of
* do_pg_backup_stop() should be inside the error cleanup block!
*/
PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
{
ListCell *lc;
tablespaceinfo *newti;
/* Add a node for the base directory at the end */
newti = palloc0(sizeof(tablespaceinfo));
newti->size = -1;
state.tablespaces = lappend(state.tablespaces, newti);
/*
* Calculate the total backup size by summing up the size of each
* tablespace
*/
if (opt->progress)
{
basebackup_progress_estimate_backup_size();
foreach(lc, state.tablespaces)
{
tablespaceinfo *tmp = (tablespaceinfo *) lfirst(lc);
if (tmp->path == NULL)
tmp->size = sendDir(sink, ".", 1, true, state.tablespaces,
true, NULL, NULL);
else
tmp->size = sendTablespace(sink, tmp->path, tmp->oid, true,
NULL);
state.bytes_total += tmp->size;
}
state.bytes_total_is_valid = true;
}
/* notify basebackup sink about start of backup */
bbsink_begin_backup(sink, &state, SINK_BUFFER_LENGTH);
/* Send off our tablespaces one by one */
foreach(lc, state.tablespaces)
{
tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
if (ti->path == NULL)
{
struct stat statbuf;
bool sendtblspclinks = true;
char *backup_label;
bbsink_begin_archive(sink, "base.tar");
/* In the main tar, include the backup_label first... */
backup_label = build_backup_content(backup_state, false);
sendFileWithContent(sink, BACKUP_LABEL_FILE,
backup_label, &manifest);
pfree(backup_label);
/* Then the tablespace_map file, if required... */
if (opt->sendtblspcmapfile)
{
sendFileWithContent(sink, TABLESPACE_MAP,
tablespace_map->data, &manifest);
sendtblspclinks = false;
}
/* Then the bulk of the files... */
sendDir(sink, ".", 1, false, state.tablespaces,
sendtblspclinks, &manifest, NULL);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
XLOG_CONTROL_FILE)));
sendFile(sink, XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
false, InvalidOid, &manifest, NULL);
}
else
{
char *archive_name = psprintf("%s.tar", ti->oid);
bbsink_begin_archive(sink, archive_name);
sendTablespace(sink, ti->path, ti->oid, false, &manifest);
}
/*
* If we're including WAL, and this is the main data directory we
* don't treat this as the end of the tablespace. Instead, we will
* include the xlog files below and stop afterwards. This is safe
* since the main data directory is always sent *last*.
*/
if (opt->includewal && ti->path == NULL)
{
Assert(lnext(state.tablespaces, lc) == NULL);
}
else
{
/* Properly terminate the tarfile. */
StaticAssertDecl(2 * TAR_BLOCK_SIZE <= BLCKSZ,
"BLCKSZ too small for 2 tar blocks");
memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
/* OK, that's the end of the archive. */
bbsink_end_archive(sink);
}
}
basebackup_progress_wait_wal_archive(&state);
do_pg_backup_stop(backup_state, !opt->nowait);
endptr = backup_state->stoppoint;
endtli = backup_state->stoptli;
/* Deallocate backup-related variables. */
pfree(tablespace_map->data);
pfree(tablespace_map);
pfree(backup_state);
}
PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
if (opt->includewal)
{
/*
* We've left the last tar file "open", so we can now append the
* required WAL files to it.
*/
char pathbuf[MAXPGPATH];
XLogSegNo segno;
XLogSegNo startsegno;
XLogSegNo endsegno;
struct stat statbuf;
List *historyFileList = NIL;
List *walFileList = NIL;
char firstoff[MAXFNAMELEN];
char lastoff[MAXFNAMELEN];
DIR *dir;
struct dirent *de;
ListCell *lc;
TimeLineID tli;
basebackup_progress_transfer_wal();
/*
* I'd rather not worry about timelines here, so scan pg_wal and
* include all WAL files in the range between 'startptr' and 'endptr',
* regardless of the timeline the file is stamped with. If there are
* some spurious WAL files belonging to timelines that don't belong in
* this server's history, they will be included too. Normally there
* shouldn't be such files, but if there are, there's little harm in
* including them.
*/
XLByteToSeg(state.startptr, startsegno, wal_segment_size);
XLogFileName(firstoff, state.starttli, startsegno, wal_segment_size);
XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
XLogFileName(lastoff, endtli, endsegno, wal_segment_size);
dir = AllocateDir("pg_wal");
while ((de = ReadDir(dir, "pg_wal")) != NULL)
{
/* Does it look like a WAL segment, and is it in the range? */
if (IsXLogFileName(de->d_name) &&
strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
strcmp(de->d_name + 8, lastoff + 8) <= 0)
{
walFileList = lappend(walFileList, pstrdup(de->d_name));
}
/* Does it look like a timeline history file? */
else if (IsTLHistoryFileName(de->d_name))
{
historyFileList = lappend(historyFileList, pstrdup(de->d_name));
}
}
FreeDir(dir);
/*
* Before we go any further, check that none of the WAL segments we
* need were removed.
*/
CheckXLogRemoved(startsegno, state.starttli);
/*
* Sort the WAL filenames. We want to send the files in order from
* oldest to newest, to reduce the chance that a file is recycled
* before we get a chance to send it over.
*/
list_sort(walFileList, compareWalFileNames);
/*
* There must be at least one xlog file in the pg_wal directory, since
* we are doing backup-including-xlog.
*/
if (walFileList == NIL)
ereport(ERROR,
(errmsg("could not find any WAL files")));
/*
* Sanity check: the first and last segment should cover startptr and
* endptr, with no gaps in between.
*/
XLogFromFileName((char *) linitial(walFileList),
&tli, &segno, wal_segment_size);
if (segno != startsegno)
{
char startfname[MAXFNAMELEN];
XLogFileName(startfname, state.starttli, startsegno,
wal_segment_size);
ereport(ERROR,
(errmsg("could not find WAL file \"%s\"", startfname)));
}
foreach(lc, walFileList)
{
char *walFileName = (char *) lfirst(lc);
XLogSegNo currsegno = segno;
XLogSegNo nextsegno = segno + 1;
XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
if (!(nextsegno == segno || currsegno == segno))
{
char nextfname[MAXFNAMELEN];
XLogFileName(nextfname, tli, nextsegno, wal_segment_size);
ereport(ERROR,
(errmsg("could not find WAL file \"%s\"", nextfname)));
}
}
if (segno != endsegno)
{
char endfname[MAXFNAMELEN];
XLogFileName(endfname, endtli, endsegno, wal_segment_size);
ereport(ERROR,
(errmsg("could not find WAL file \"%s\"", endfname)));
}
/* Ok, we have everything we need. Send the WAL files. */
foreach(lc, walFileList)
{
char *walFileName = (char *) lfirst(lc);
int fd;
size_t cnt;
pgoff_t len = 0;
snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY);
if (fd < 0)
{
int save_errno = errno;
/*
* Most likely reason for this is that the file was already
* removed by a checkpoint, so check for that to get a better
* error message.
*/
CheckXLogRemoved(segno, tli);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", pathbuf)));
}
if (fstat(fd, &statbuf) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
pathbuf)));
if (statbuf.st_size != wal_segment_size)
{
CheckXLogRemoved(segno, tli);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("unexpected WAL file size \"%s\"", walFileName)));
}
/* send the WAL file itself */
_tarWriteHeader(sink, pathbuf, NULL, &statbuf, false);
while ((cnt = basebackup_read_file(fd, sink->bbs_buffer,
Min(sink->bbs_buffer_length,
wal_segment_size - len),
len, pathbuf, true)) > 0)
{
CheckXLogRemoved(segno, tli);
bbsink_archive_contents(sink, cnt);
len += cnt;
if (len == wal_segment_size)
break;
}
if (len != wal_segment_size)
{
CheckXLogRemoved(segno, tli);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("unexpected WAL file size \"%s\"", walFileName)));
}
/*
* wal_segment_size is a multiple of TAR_BLOCK_SIZE, so no need
* for padding.
*/
Assert(wal_segment_size % TAR_BLOCK_SIZE == 0);
CloseTransientFile(fd);
/*
* Mark file as archived, otherwise files can get archived again
* after promotion of a new node. This is in line with
* walreceiver.c always doing an XLogArchiveForceDone() after a
* complete segment.
*/
StatusFilePath(pathbuf, walFileName, ".done");
sendFileWithContent(sink, pathbuf, "", &manifest);
}
/*
* Send timeline history files too. Only the latest timeline history
* file is required for recovery, and even that only if there happens
* to be a timeline switch in the first WAL segment that contains the
* checkpoint record, or if we're taking a base backup from a standby
* server and the target timeline changes while the backup is taken.
* But they are small and highly useful for debugging purposes, so
* better include them all, always.
*/
foreach(lc, historyFileList)
{
char *fname = lfirst(lc);
snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
if (lstat(pathbuf, &statbuf) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", pathbuf)));
sendFile(sink, pathbuf, pathbuf, &statbuf, false, InvalidOid,
&manifest, NULL);
/* unconditionally mark file as archived */
StatusFilePath(pathbuf, fname, ".done");
sendFileWithContent(sink, pathbuf, "", &manifest);
}
/* Properly terminate the tar file. */
StaticAssertStmt(2 * TAR_BLOCK_SIZE <= BLCKSZ,
"BLCKSZ too small for 2 tar blocks");
memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
/* OK, that's the end of the archive. */
bbsink_end_archive(sink);
}
AddWALInfoToBackupManifest(&manifest, state.startptr, state.starttli,
endptr, endtli);
SendBackupManifest(&manifest, sink);
bbsink_end_backup(sink, endptr, endtli);
if (total_checksum_failures)
{
if (total_checksum_failures > 1)
ereport(WARNING,
(errmsg_plural("%lld total checksum verification failure",
"%lld total checksum verification failures",
total_checksum_failures,
total_checksum_failures)));
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("checksum verification failure during base backup")));
}
/*
* Make sure to free the manifest before the resource owners as manifests
* use cryptohash contexts that may depend on resource owners (like
* OpenSSL).
*/
FreeBackupManifest(&manifest);
/* clean up the resource owner we created */
WalSndResourceCleanup(true);
basebackup_progress_done();
}
/*
* list_sort comparison function, to compare log/seg portion of WAL segment
* filenames, ignoring the timeline portion.
*/
static int
compareWalFileNames(const ListCell *a, const ListCell *b)
{
char *fna = (char *) lfirst(a);
char *fnb = (char *) lfirst(b);
return strcmp(fna + 8, fnb + 8);
}
/*
* Parse the base backup options passed down by the parser
*/
static void
parse_basebackup_options(List *options, basebackup_options *opt)
{
ListCell *lopt;
bool o_label = false;
bool o_progress = false;
bool o_checkpoint = false;
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
bool o_tablespace_map = false;
bool o_noverify_checksums = false;
bool o_manifest = false;
bool o_manifest_checksums = false;
bool o_target = false;
bool o_target_detail = false;
char *target_str = NULL;
char *target_detail_str = NULL;
bool o_compression = false;
bool o_compression_detail = false;
char *compression_detail_str = NULL;
MemSet(opt, 0, sizeof(*opt));
opt->manifest = MANIFEST_OPTION_NO;
opt->manifest_checksum_type = CHECKSUM_TYPE_CRC32C;
opt->compression = PG_COMPRESSION_NONE;
opt->compression_specification.algorithm = PG_COMPRESSION_NONE;
foreach(lopt, options)
{
DefElem *defel = (DefElem *) lfirst(lopt);
if (strcmp(defel->defname, "label") == 0)
{
if (o_label)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->label = defGetString(defel);
o_label = true;
}
else if (strcmp(defel->defname, "progress") == 0)
{
if (o_progress)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->progress = defGetBoolean(defel);
o_progress = true;
}
else if (strcmp(defel->defname, "checkpoint") == 0)
{
char *optval = defGetString(defel);
if (o_checkpoint)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
if (pg_strcasecmp(optval, "fast") == 0)
opt->fastcheckpoint = true;
else if (pg_strcasecmp(optval, "spread") == 0)
opt->fastcheckpoint = false;
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized checkpoint type: \"%s\"",
optval)));
o_checkpoint = true;
}
else if (strcmp(defel->defname, "wait") == 0)
{
if (o_nowait)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->nowait = !defGetBoolean(defel);
o_nowait = true;
}
else if (strcmp(defel->defname, "wal") == 0)
{
if (o_wal)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->includewal = defGetBoolean(defel);
o_wal = true;
}
else if (strcmp(defel->defname, "max_rate") == 0)
{
int64 maxrate;
if (o_maxrate)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
maxrate = defGetInt64(defel);
if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
(int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
else if (strcmp(defel->defname, "tablespace_map") == 0)
{
if (o_tablespace_map)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->sendtblspcmapfile = defGetBoolean(defel);
o_tablespace_map = true;
}
else if (strcmp(defel->defname, "verify_checksums") == 0)
{
if (o_noverify_checksums)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
noverify_checksums = !defGetBoolean(defel);
o_noverify_checksums = true;
}
else if (strcmp(defel->defname, "manifest") == 0)
{
char *optval = defGetString(defel);
bool manifest_bool;
if (o_manifest)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
if (parse_bool(optval, &manifest_bool))
{
if (manifest_bool)
opt->manifest = MANIFEST_OPTION_YES;
else
opt->manifest = MANIFEST_OPTION_NO;
}
else if (pg_strcasecmp(optval, "force-encode") == 0)
opt->manifest = MANIFEST_OPTION_FORCE_ENCODE;
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized manifest option: \"%s\"",
optval)));
o_manifest = true;
}
else if (strcmp(defel->defname, "manifest_checksums") == 0)
{
char *optval = defGetString(defel);
if (o_manifest_checksums)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
if (!pg_checksum_parse_type(optval,
&opt->manifest_checksum_type))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized checksum algorithm: \"%s\"",
optval)));
o_manifest_checksums = true;
}
else if (strcmp(defel->defname, "target") == 0)
{
if (o_target)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
target_str = defGetString(defel);
o_target = true;
}
else if (strcmp(defel->defname, "target_detail") == 0)
{
char *optval = defGetString(defel);
if (o_target_detail)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
target_detail_str = optval;
o_target_detail = true;
}
else if (strcmp(defel->defname, "compression") == 0)
{
char *optval = defGetString(defel);
if (o_compression)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
if (!parse_compress_algorithm(optval, &opt->compression))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized compression algorithm: \"%s\"",
optval)));
o_compression = true;
}
else if (strcmp(defel->defname, "compression_detail") == 0)
{
if (o_compression_detail)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
compression_detail_str = defGetString(defel);
o_compression_detail = true;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized base backup option: \"%s\"",
defel->defname)));
}
if (opt->label == NULL)
opt->label = "base backup";
if (opt->manifest == MANIFEST_OPTION_NO)
{
if (o_manifest_checksums)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("manifest checksums require a backup manifest")));
opt->manifest_checksum_type = CHECKSUM_TYPE_NONE;
}
if (target_str == NULL)
{
if (target_detail_str != NULL)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("target detail cannot be used without target")));
opt->use_copytblspc = true;
opt->send_to_client = true;
}
else if (strcmp(target_str, "client") == 0)
{
if (target_detail_str != NULL)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("target \"%s\" does not accept a target detail",
target_str)));
opt->send_to_client = true;
}
else
opt->target_handle =
BaseBackupGetTargetHandle(target_str, target_detail_str);
if (o_compression_detail && !o_compression)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("compression detail cannot be specified unless compression is enabled")));
if (o_compression)
{
char *error_detail;
parse_compress_specification(opt->compression, compression_detail_str,
&opt->compression_specification);
error_detail =
validate_compress_specification(&opt->compression_specification);
if (error_detail != NULL)
ereport(ERROR,
errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid compression specification: %s",
error_detail));
}
}
/*
* SendBaseBackup() - send a complete base backup.
*
* The function will put the system into backup mode like pg_backup_start()
* does, so that the backup is consistent even though we read directly from
* the filesystem, bypassing the buffer cache.
*/
void
SendBaseBackup(BaseBackupCmd *cmd)
{
basebackup_options opt;
bbsink *sink;
SessionBackupState status = get_backup_status();
if (status == SESSION_BACKUP_RUNNING)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("a backup is already in progress in this session")));
parse_basebackup_options(cmd->options, &opt);
WalSndSetState(WALSNDSTATE_BACKUP);
if (update_process_title)
{
char activitymsg[50];
snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
opt.label);
set_ps_display(activitymsg);
}
/*
* If the target is specifically 'client' then set up to stream the backup
* to the client; otherwise, it's being sent someplace else and should not
* be sent to the client. BaseBackupGetSink has the job of setting up a
* sink to send the backup data wherever it needs to go.
*/
sink = bbsink_copystream_new(opt.send_to_client);
if (opt.target_handle != NULL)
sink = BaseBackupGetSink(opt.target_handle, sink);
/* Set up network throttling, if client requested it */
if (opt.maxrate > 0)
sink = bbsink_throttle_new(sink, opt.maxrate);
/* Set up server-side compression, if client requested it */
if (opt.compression == PG_COMPRESSION_GZIP)
sink = bbsink_gzip_new(sink, &opt.compression_specification);
else if (opt.compression == PG_COMPRESSION_LZ4)
sink = bbsink_lz4_new(sink, &opt.compression_specification);
else if (opt.compression == PG_COMPRESSION_ZSTD)
sink = bbsink_zstd_new(sink, &opt.compression_specification);
/* Set up progress reporting. */
sink = bbsink_progress_new(sink, opt.progress);
/*
* Perform the base backup, but make sure we clean up the bbsink even if
* an error occurs.
*/
PG_TRY();
{
perform_base_backup(&opt, sink);
}
PG_FINALLY();
{
bbsink_cleanup(sink);
}
PG_END_TRY();
}
/*
* Inject a file with given name and content in the output tar stream.
*/
static void
sendFileWithContent(bbsink *sink, const char *filename, const char *content,
backup_manifest_info *manifest)
{
struct stat statbuf;
int bytes_done = 0,
len;
pg_checksum_context checksum_ctx;
if (pg_checksum_init(&checksum_ctx, manifest->checksum_type) < 0)
elog(ERROR, "could not initialize checksum of file \"%s\"",
filename);
len = strlen(content);
/*
* Construct a stat struct for the backup_label file we're injecting in
* the tar.
*/
/* Windows doesn't have the concept of uid and gid */
#ifdef WIN32
statbuf.st_uid = 0;
statbuf.st_gid = 0;
#else
statbuf.st_uid = geteuid();
statbuf.st_gid = getegid();
#endif
statbuf.st_mtime = time(NULL);
statbuf.st_mode = pg_file_create_mode;
statbuf.st_size = len;
_tarWriteHeader(sink, filename, NULL, &statbuf, false);
if (pg_checksum_update(&checksum_ctx, (uint8 *) content, len) < 0)
elog(ERROR, "could not update checksum of file \"%s\"",
filename);
while (bytes_done < len)
{
size_t remaining = len - bytes_done;
size_t nbytes = Min(sink->bbs_buffer_length, remaining);
memcpy(sink->bbs_buffer, content, nbytes);
bbsink_archive_contents(sink, nbytes);
bytes_done += nbytes;
content += nbytes;
}
_tarWritePadding(sink, len);
AddFileToBackupManifest(manifest, NULL, filename, len,
(pg_time_t) statbuf.st_mtime, &checksum_ctx);
}
/*
* Include the tablespace directory pointed to by 'path' in the output tar
* stream. If 'sizeonly' is true, we just calculate a total length and return
* it, without actually sending anything.
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
static int64
sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
backup_manifest_info *manifest)
{
int64 size;
char pathbuf[MAXPGPATH];
struct stat statbuf;
/*
* 'path' points to the tablespace location, but we only want to include
* the version directory in it that belongs to us.
*/
snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
TABLESPACE_VERSION_DIRECTORY);
/*
* Store a directory entry in the tar file so we get the permissions
* right.
*/
if (lstat(pathbuf, &statbuf) != 0)
{
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file or directory \"%s\": %m",
pathbuf)));
/* If the tablespace went away while scanning, it's no error. */
return 0;
}
size = _tarWriteHeader(sink, TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
sizeonly);
/* Send all the files in the tablespace version directory */
size += sendDir(sink, pathbuf, strlen(path), sizeonly, NIL, true, manifest,
spcoid);
return size;
}
/*
* Include all files from the given directory in the output tar stream. If
* 'sizeonly' is true, we just calculate a total length and return it, without
* actually sending anything.
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
*
* If sendtblspclinks is true, we need to include symlink
* information in the tar file. If not, we can skip that
* as it will be sent separately in the tablespace_map file.
*/
static int64
sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
List *tablespaces, bool sendtblspclinks, backup_manifest_info *manifest,
const char *spcoid)
{
DIR *dir;
struct dirent *de;
char pathbuf[MAXPGPATH * 2];
struct stat statbuf;
int64 size = 0;
const char *lastDir; /* Split last dir from parent path. */
bool isDbDir = false; /* Does this directory contain relations? */
/*
* Determine if the current path is a database directory that can contain
* relations.
*
* Start by finding the location of the delimiter between the parent path
* and the current path.
*/
lastDir = last_dir_separator(path);
/* Does this path look like a database path (i.e. all digits)? */
if (lastDir != NULL &&
strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
{
/* Part of path that contains the parent directory. */
int parentPathLen = lastDir - path;
/*
* Mark path as a database directory if the parent path is either
* $PGDATA/base or a tablespace version path.
*/
if (strncmp(path, "./base", parentPathLen) == 0 ||
(parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
TABLESPACE_VERSION_DIRECTORY,
sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
isDbDir = true;
}
dir = AllocateDir(path);
while ((de = ReadDir(dir, path)) != NULL)
{
int excludeIdx;
bool excludeFound;
ForkNumber relForkNum; /* Type of fork if file is a relation */
int relnumchars; /* Chars in filename that are the
* relnumber */
/* Skip special stuff */
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
continue;
/* Skip temporary files */
if (strncmp(de->d_name,
PG_TEMP_FILE_PREFIX,
strlen(PG_TEMP_FILE_PREFIX)) == 0)
continue;
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
* do_pg_abort_backup() for us. Also check that if the backup was
* started while still in recovery, the server wasn't promoted.
* do_pg_backup_stop() will check that too, but it's better to stop
* the backup early than continue to the end and fail there.
*/
CHECK_FOR_INTERRUPTS();
if (RecoveryInProgress() != backup_started_in_recovery)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("the standby was promoted during online backup"),
errhint("This means that the backup being taken is corrupt "
"and should not be used. "
"Try taking another online backup.")));
/* Scan for files that should be excluded */
excludeFound = false;
for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
{
int cmplen = strlen(excludeFiles[excludeIdx].name);
if (!excludeFiles[excludeIdx].match_prefix)
cmplen++;
if (strncmp(de->d_name, excludeFiles[excludeIdx].name, cmplen) == 0)
{
elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
excludeFound = true;
break;
}
}
if (excludeFound)
continue;
/* Exclude all forks for unlogged tables except the init fork */
if (isDbDir &&
parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
&relForkNum))
{
/* Never exclude init forks */
if (relForkNum != INIT_FORKNUM)
{
char initForkFile[MAXPGPATH];
char relNumber[OIDCHARS + 1];
/*
* If any other type of fork, check if there is an init fork
* with the same RelFileNumber. If so, the file can be
* excluded.
*/
memcpy(relNumber, de->d_name, relnumchars);
relNumber[relnumchars] = '\0';
snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
path, relNumber);
if (lstat(initForkFile, &statbuf) == 0)
{
elog(DEBUG2,
"unlogged relation file \"%s\" excluded from backup",
de->d_name);
continue;
}
}
}
/* Exclude temporary relations */
if (isDbDir && looks_like_temp_rel_name(de->d_name))
{
elog(DEBUG2,
"temporary relation file \"%s\" excluded from backup",
de->d_name);
continue;
}
snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
/* Skip pg_control here to back up it last */
if (strcmp(pathbuf, "./global/pg_control") == 0)
continue;
if (lstat(pathbuf, &statbuf) != 0)
{
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file or directory \"%s\": %m",
pathbuf)));
/* If the file went away while scanning, it's not an error. */
continue;
}
/* Scan for directories whose contents should be excluded */
excludeFound = false;
for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
{
if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
{
elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
convert_link_to_directory(pathbuf, &statbuf);
size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL,
&statbuf, sizeonly);
excludeFound = true;
break;
}
}
if (excludeFound)
continue;
/*
* We can skip pg_wal, the WAL segments need to be fetched from the
* WAL archive anyway. But include it as an empty directory anyway, so
* we get permissions right.
*/
if (strcmp(pathbuf, "./pg_wal") == 0)
{
/* If pg_wal is a symlink, write it as a directory anyway */
convert_link_to_directory(pathbuf, &statbuf);
size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL,
&statbuf, sizeonly);
/*
* Also send archive_status directory (by hackishly reusing
* statbuf from above ...).
*/
size += _tarWriteHeader(sink, "./pg_wal/archive_status", NULL,
&statbuf, sizeonly);
continue; /* don't recurse into pg_wal */
}
/* Allow symbolic links in pg_tblspc only */
if (strcmp(path, "./pg_tblspc") == 0 && S_ISLNK(statbuf.st_mode))
{
char linkpath[MAXPGPATH];
int rllen;
rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
if (rllen < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read symbolic link \"%s\": %m",
pathbuf)));
if (rllen >= sizeof(linkpath))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("symbolic link \"%s\" target is too long",
pathbuf)));
linkpath[rllen] = '\0';
size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, linkpath,
&statbuf, sizeonly);
}
else if (S_ISDIR(statbuf.st_mode))
{
bool skip_this_dir = false;
ListCell *lc;
/*
* Store a directory entry in the tar file so we can get the
* permissions right.
*/
size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL, &statbuf,
sizeonly);
/*
* Call ourselves recursively for a directory, unless it happens
* to be a separate tablespace located within PGDATA.
*/
foreach(lc, tablespaces)
{
tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
/*
* ti->rpath is the tablespace relative path within PGDATA, or
* NULL if the tablespace has been properly located somewhere
* else.
*
* Skip past the leading "./" in pathbuf when comparing.
*/
if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
{
skip_this_dir = true;
break;
}
}
/*
* skip sending directories inside pg_tblspc, if not required.
*/
if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
skip_this_dir = true;
if (!skip_this_dir)
size += sendDir(sink, pathbuf, basepathlen, sizeonly, tablespaces,
sendtblspclinks, manifest, spcoid);
}
else if (S_ISREG(statbuf.st_mode))
{
bool sent = false;
if (!sizeonly)
sent = sendFile(sink, pathbuf, pathbuf + basepathlen + 1, &statbuf,
true, isDbDir ? atooid(lastDir + 1) : InvalidOid,
manifest, spcoid);
if (sent || sizeonly)
{
/* Add size. */
size += statbuf.st_size;
/* Pad to a multiple of the tar block size. */
size += tarPaddingBytesRequired(statbuf.st_size);
/* Size of the header for the file. */
size += TAR_BLOCK_SIZE;
}
}
else
ereport(WARNING,
(errmsg("skipping special file \"%s\"", pathbuf)));
}
FreeDir(dir);
return size;
}
/*
* Check if a file should have its checksum validated.
* We validate checksums on files in regular tablespaces
* (including global and default) only, and in those there
* are some files that are explicitly excluded.
*/
static bool
is_checksummed_file(const char *fullpath, const char *filename)
{
/* Check that the file is in a tablespace */
if (strncmp(fullpath, "./global/", 9) == 0 ||
strncmp(fullpath, "./base/", 7) == 0 ||
strncmp(fullpath, "/", 1) == 0)
{
int excludeIdx;
/* Compare file against noChecksumFiles skip list */
for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++)
{
int cmplen = strlen(noChecksumFiles[excludeIdx].name);
if (!noChecksumFiles[excludeIdx].match_prefix)
cmplen++;
if (strncmp(filename, noChecksumFiles[excludeIdx].name,
cmplen) == 0)
return false;
}
return true;
}
else
return false;
}
/*
* Given the member, write the TAR header & send the file.
*
* If 'missing_ok' is true, will not throw an error if the file is not found.
*
* If dboid is anything other than InvalidOid then any checksum failures
* detected will get reported to the cumulative stats system.
*
* Returns true if the file was successfully sent, false if 'missing_ok',
* and the file did not exist.
*/
static bool
sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
struct stat *statbuf, bool missing_ok, Oid dboid,
backup_manifest_info *manifest, const char *spcoid)
{
int fd;
BlockNumber blkno = 0;
bool block_retry = false;
uint16 checksum;
int checksum_failures = 0;
off_t cnt;
int i;
pgoff_t len = 0;
char *page;
PageHeader phdr;
int segmentno = 0;
char *segmentpath;
bool verify_checksum = false;
pg_checksum_context checksum_ctx;
if (pg_checksum_init(&checksum_ctx, manifest->checksum_type) < 0)
elog(ERROR, "could not initialize checksum of file \"%s\"",
readfilename);
fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
if (fd < 0)
{
if (errno == ENOENT && missing_ok)
return false;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", readfilename)));
}
_tarWriteHeader(sink, tarfilename, NULL, statbuf, false);
if (!noverify_checksums && DataChecksumsEnabled())
{
char *filename;
/*
* Get the filename (excluding path). As last_dir_separator()
* includes the last directory separator, we chop that off by
* incrementing the pointer.
*/
filename = last_dir_separator(readfilename) + 1;
if (is_checksummed_file(readfilename, filename))
{
verify_checksum = true;
/*
* Cut off at the segment boundary (".") to get the segment number
* in order to mix it into the checksum.
*/
segmentpath = strstr(filename, ".");
if (segmentpath != NULL)
{
segmentno = atoi(segmentpath + 1);
if (segmentno == 0)
ereport(ERROR,
(errmsg("invalid segment number %d in file \"%s\"",
segmentno, filename)));
}
}
}
/*
* Loop until we read the amount of data the caller told us to expect. The
* file could be longer, if it was extended while we were sending it, but
* for a base backup we can ignore such extended data. It will be restored
* from WAL.
*/
while (len < statbuf->st_size)
{
size_t remaining = statbuf->st_size - len;
/* Try to read some more data. */
cnt = basebackup_read_file(fd, sink->bbs_buffer,
Min(sink->bbs_buffer_length, remaining),
len, readfilename, true);
/*
* The checksums are verified at block level, so we iterate over the
* buffer in chunks of BLCKSZ, after making sure that
* TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
* BLCKSZ bytes.
*/
Assert((sink->bbs_buffer_length % BLCKSZ) == 0);
if (verify_checksum && (cnt % BLCKSZ != 0))
{
ereport(WARNING,
(errmsg("could not verify checksum in file \"%s\", block "
"%u: read buffer size %d and page size %d "
"differ",
readfilename, blkno, (int) cnt, BLCKSZ)));
verify_checksum = false;
}
if (verify_checksum)
{
for (i = 0; i < cnt / BLCKSZ; i++)
{
page = sink->bbs_buffer + BLCKSZ * i;
/*
* Only check pages which have not been modified since the
* start of the base backup. Otherwise, they might have been
* written only halfway and the checksum would not be valid.
* However, replaying WAL would reinstate the correct page in
* this case. We also skip completely new pages, since they
* don't have a checksum yet.
*/
if (!PageIsNew(page) && PageGetLSN(page) < sink->bbs_state->startptr)
{
checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
phdr = (PageHeader) page;
if (phdr->pd_checksum != checksum)
{
/*
* Retry the block on the first failure. It's
* possible that we read the first 4K page of the
* block just before postgres updated the entire block
* so it ends up looking torn to us. If, before we
* retry the read, the concurrent write of the block
* finishes, the page LSN will be updated and we'll
* realize that we should ignore this block.
*
* There's no guarantee that this will actually
* happen, though: the torn write could take an
* arbitrarily long time to complete. Retrying
* multiple times wouldn't fix this problem, either,
* though it would reduce the chances of it happening
* in practice. The only real fix here seems to be to
* have some kind of interlock that allows us to wait
* until we can be certain that no write to the block
* is in progress. Since we don't have any such thing
* right now, we just do this and hope for the best.
*/
if (block_retry == false)
{
int reread_cnt;
/* Reread the failed block */
reread_cnt =
basebackup_read_file(fd,
sink->bbs_buffer + BLCKSZ * i,
BLCKSZ, len + BLCKSZ * i,
readfilename,
false);
if (reread_cnt == 0)
{
/*
* If we hit end-of-file, a concurrent
* truncation must have occurred, so break out
* of this loop just as if the initial fread()
* returned 0. We'll drop through to the same
* code that handles that case. (We must fix
* up cnt first, though.)
*/
cnt = BLCKSZ * i;
break;
}
/* Set flag so we know a retry was attempted */
block_retry = true;
/* Reset loop to validate the block again */
i--;
continue;
}
checksum_failures++;
if (checksum_failures <= 5)
ereport(WARNING,
(errmsg("checksum verification failed in "
"file \"%s\", block %u: calculated "
"%X but expected %X",
readfilename, blkno, checksum,
phdr->pd_checksum)));
if (checksum_failures == 5)
ereport(WARNING,
(errmsg("further checksum verification "
"failures in file \"%s\" will not "
"be reported", readfilename)));
}
}
block_retry = false;
blkno++;
}
}
/*
* If we hit end-of-file, a concurrent truncation must have occurred.
* That's not an error condition, because WAL replay will fix things
* up.
*/
if (cnt == 0)
break;
/* Archive the data we just read. */
bbsink_archive_contents(sink, cnt);
/* Also feed it to the checksum machinery. */
if (pg_checksum_update(&checksum_ctx,
(uint8 *) sink->bbs_buffer, cnt) < 0)
elog(ERROR, "could not update checksum of base backup");
len += cnt;
}
/* If the file was truncated while we were sending it, pad it with zeros */
while (len < statbuf->st_size)
{
size_t remaining = statbuf->st_size - len;
size_t nbytes = Min(sink->bbs_buffer_length, remaining);
MemSet(sink->bbs_buffer, 0, nbytes);
if (pg_checksum_update(&checksum_ctx,
(uint8 *) sink->bbs_buffer,
nbytes) < 0)
elog(ERROR, "could not update checksum of base backup");
bbsink_archive_contents(sink, nbytes);
len += nbytes;
}
/*
* Pad to a block boundary, per tar format requirements. (This small piece
* of data is probably not worth throttling, and is not checksummed
* because it's not actually part of the file.)
*/
_tarWritePadding(sink, len);
CloseTransientFile(fd);
if (checksum_failures > 1)
{
ereport(WARNING,
(errmsg_plural("file \"%s\" has a total of %d checksum verification failure",
"file \"%s\" has a total of %d checksum verification failures",
checksum_failures,
readfilename, checksum_failures)));
pgstat_report_checksum_failures_in_db(dboid, checksum_failures);
}
total_checksum_failures += checksum_failures;
AddFileToBackupManifest(manifest, spcoid, tarfilename, statbuf->st_size,
(pg_time_t) statbuf->st_mtime, &checksum_ctx);
return true;
}
static int64
_tarWriteHeader(bbsink *sink, const char *filename, const char *linktarget,
struct stat *statbuf, bool sizeonly)
{
enum tarError rc;
if (!sizeonly)
{
/*
* As of this writing, the smallest supported block size is 1kB, which
* is twice TAR_BLOCK_SIZE. Since the buffer size is required to be a
* multiple of BLCKSZ, it should be safe to assume that the buffer is
* large enough to fit an entire tar block. We double-check by means
* of these assertions.
*/
StaticAssertDecl(TAR_BLOCK_SIZE <= BLCKSZ,
"BLCKSZ too small for tar block");
Assert(sink->bbs_buffer_length >= TAR_BLOCK_SIZE);
rc = tarCreateHeader(sink->bbs_buffer, filename, linktarget,
statbuf->st_size, statbuf->st_mode,
statbuf->st_uid, statbuf->st_gid,
statbuf->st_mtime);
switch (rc)
{
case TAR_OK:
break;
case TAR_NAME_TOO_LONG:
ereport(ERROR,
(errmsg("file name too long for tar format: \"%s\"",
filename)));
break;
case TAR_SYMLINK_TOO_LONG:
ereport(ERROR,
(errmsg("symbolic link target too long for tar format: "
"file name \"%s\", target \"%s\"",
filename, linktarget)));
break;
default:
elog(ERROR, "unrecognized tar error: %d", rc);
}
bbsink_archive_contents(sink, TAR_BLOCK_SIZE);
}
return TAR_BLOCK_SIZE;
}
/*
* Pad with zero bytes out to a multiple of TAR_BLOCK_SIZE.
*/
static void
_tarWritePadding(bbsink *sink, int len)
{
int pad = tarPaddingBytesRequired(len);
/*
* As in _tarWriteHeader, it should be safe to assume that the buffer is
* large enough that we don't need to do this in multiple chunks.
*/
Assert(sink->bbs_buffer_length >= TAR_BLOCK_SIZE);
Assert(pad <= TAR_BLOCK_SIZE);
if (pad > 0)
{
MemSet(sink->bbs_buffer, 0, pad);
bbsink_archive_contents(sink, pad);
}
}
/*
* If the entry in statbuf is a link, then adjust statbuf to make it look like a
* directory, so that it will be written that way.
*/
static void
convert_link_to_directory(const char *pathbuf, struct stat *statbuf)
{
/* If symlink, write it as a directory anyway */
if (S_ISLNK(statbuf->st_mode))
statbuf->st_mode = S_IFDIR | pg_dir_create_mode;
}
/*
* Read some data from a file, setting a wait event and reporting any error
* encountered.
*
* If partial_read_ok is false, also report an error if the number of bytes
* read is not equal to the number of bytes requested.
*
* Returns the number of bytes read.
*/
static int
basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
const char *filename, bool partial_read_ok)
{
int rc;
pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ);
rc = pg_pread(fd, buf, nbytes, offset);
pgstat_report_wait_end();
if (rc < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", filename)));
if (!partial_read_ok && rc > 0 && rc != nbytes)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": read %d of %zu",
filename, rc, nbytes)));
return rc;
}