postgresql/src/bin/pg_combinebackup/pg_combinebackup.c

1288 lines
38 KiB
C

/*-------------------------------------------------------------------------
*
* pg_combinebackup.c
* Combine incremental backups with prior backups.
*
* Copyright (c) 2017-2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/bin/pg_combinebackup/pg_combinebackup.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <dirent.h>
#include <fcntl.h>
#include <limits.h>
#include "backup_label.h"
#include "common/blkreftable.h"
#include "common/checksum_helper.h"
#include "common/controldata_utils.h"
#include "common/file_perm.h"
#include "common/file_utils.h"
#include "common/logging.h"
#include "copy_file.h"
#include "fe_utils/option_utils.h"
#include "getopt_long.h"
#include "lib/stringinfo.h"
#include "load_manifest.h"
#include "reconstruct.h"
#include "write_manifest.h"
/* Incremental file naming convention. */
#define INCREMENTAL_PREFIX "INCREMENTAL."
#define INCREMENTAL_PREFIX_LENGTH (sizeof(INCREMENTAL_PREFIX) - 1)
/*
* Tracking for directories that need to be removed, or have their contents
* removed, if the operation fails.
*/
typedef struct cb_cleanup_dir
{
char *target_path;
bool rmtopdir;
struct cb_cleanup_dir *next;
} cb_cleanup_dir;
/*
* Stores a tablespace mapping provided using -T, --tablespace-mapping.
*/
typedef struct cb_tablespace_mapping
{
char old_dir[MAXPGPATH];
char new_dir[MAXPGPATH];
struct cb_tablespace_mapping *next;
} cb_tablespace_mapping;
/*
* Stores data parsed from all command-line options.
*/
typedef struct cb_options
{
bool debug;
char *output;
bool dry_run;
bool no_sync;
cb_tablespace_mapping *tsmappings;
pg_checksum_type manifest_checksums;
bool no_manifest;
DataDirSyncMethod sync_method;
} cb_options;
/*
* Data about a tablespace.
*
* Every normal tablespace needs a tablespace mapping, but in-place tablespaces
* don't, so the list of tablespaces can contain more entries than the list of
* tablespace mappings.
*/
typedef struct cb_tablespace
{
Oid oid;
bool in_place;
char old_dir[MAXPGPATH];
char new_dir[MAXPGPATH];
struct cb_tablespace *next;
} cb_tablespace;
/* Directories to be removed if we exit uncleanly. */
cb_cleanup_dir *cleanup_dir_list = NULL;
static void add_tablespace_mapping(cb_options *opt, char *arg);
static StringInfo check_backup_label_files(int n_backups, char **backup_dirs);
static void check_control_files(int n_backups, char **backup_dirs);
static void check_input_dir_permissions(char *dir);
static void cleanup_directories_atexit(void);
static void create_output_directory(char *dirname, cb_options *opt);
static void help(const char *progname);
static bool parse_oid(char *s, Oid *result);
static void process_directory_recursively(Oid tsoid,
char *input_directory,
char *output_directory,
char *relative_path,
int n_prior_backups,
char **prior_backup_dirs,
manifest_data **manifests,
manifest_writer *mwriter,
cb_options *opt);
static int read_pg_version_file(char *directory);
static void remember_to_cleanup_directory(char *target_path, bool rmtopdir);
static void reset_directory_cleanup_list(void);
static cb_tablespace *scan_for_existing_tablespaces(char *pathname,
cb_options *opt);
static void slurp_file(int fd, char *filename, StringInfo buf, int maxlen);
/*
* Main program.
*/
int
main(int argc, char *argv[])
{
static struct option long_options[] = {
{"debug", no_argument, NULL, 'd'},
{"dry-run", no_argument, NULL, 'n'},
{"no-sync", no_argument, NULL, 'N'},
{"output", required_argument, NULL, 'o'},
{"tablespace-mapping", no_argument, NULL, 'T'},
{"manifest-checksums", required_argument, NULL, 1},
{"no-manifest", no_argument, NULL, 2},
{"sync-method", required_argument, NULL, 3},
{NULL, 0, NULL, 0}
};
const char *progname;
char *last_input_dir;
int optindex;
int c;
int n_backups;
int n_prior_backups;
int version;
char **prior_backup_dirs;
cb_options opt;
cb_tablespace *tablespaces;
cb_tablespace *ts;
StringInfo last_backup_label;
manifest_data **manifests;
manifest_writer *mwriter;
pg_logging_init(argv[0]);
progname = get_progname(argv[0]);
handle_help_version_opts(argc, argv, progname, help);
memset(&opt, 0, sizeof(opt));
opt.manifest_checksums = CHECKSUM_TYPE_CRC32C;
opt.sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
/* process command-line options */
while ((c = getopt_long(argc, argv, "dnNPo:T:",
long_options, &optindex)) != -1)
{
switch (c)
{
case 'd':
opt.debug = true;
pg_logging_increase_verbosity();
break;
case 'n':
opt.dry_run = true;
break;
case 'N':
opt.no_sync = true;
break;
case 'o':
opt.output = optarg;
break;
case 'T':
add_tablespace_mapping(&opt, optarg);
break;
case 1:
if (!pg_checksum_parse_type(optarg,
&opt.manifest_checksums))
pg_fatal("unrecognized checksum algorithm: \"%s\"",
optarg);
break;
case 2:
opt.no_manifest = true;
break;
case 3:
if (!parse_sync_method(optarg, &opt.sync_method))
exit(1);
break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
exit(1);
}
}
if (optind >= argc)
{
pg_log_error("%s: no input directories specified", progname);
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
exit(1);
}
if (opt.output == NULL)
pg_fatal("no output directory specified");
/* If no manifest is needed, no checksums are needed, either. */
if (opt.no_manifest)
opt.manifest_checksums = CHECKSUM_TYPE_NONE;
/* Read the server version from the final backup. */
version = read_pg_version_file(argv[argc - 1]);
/* Sanity-check control files. */
n_backups = argc - optind;
check_control_files(n_backups, argv + optind);
/* Sanity-check backup_label files, and get the contents of the last one. */
last_backup_label = check_backup_label_files(n_backups, argv + optind);
/*
* We'll need the pathnames to the prior backups. By "prior" we mean all
* but the last one listed on the command line.
*/
n_prior_backups = argc - optind - 1;
prior_backup_dirs = argv + optind;
/* Load backup manifests. */
manifests = load_backup_manifests(n_backups, prior_backup_dirs);
/* Figure out which tablespaces are going to be included in the output. */
last_input_dir = argv[argc - 1];
check_input_dir_permissions(last_input_dir);
tablespaces = scan_for_existing_tablespaces(last_input_dir, &opt);
/*
* Create output directories.
*
* We create one output directory for the main data directory plus one for
* each non-in-place tablespace. create_output_directory() will arrange
* for those directories to be cleaned up on failure. In-place tablespaces
* aren't handled at this stage because they're located beneath the main
* output directory, and thus the cleanup of that directory will get rid
* of them. Plus, the pg_tblspc directory that needs to contain them
* doesn't exist yet.
*/
atexit(cleanup_directories_atexit);
create_output_directory(opt.output, &opt);
for (ts = tablespaces; ts != NULL; ts = ts->next)
if (!ts->in_place)
create_output_directory(ts->new_dir, &opt);
/* If we need to write a backup_manifest, prepare to do so. */
if (!opt.dry_run && !opt.no_manifest)
{
mwriter = create_manifest_writer(opt.output);
/*
* Verify that we have a backup manifest for the final backup; else we
* won't have the WAL ranges for the resulting manifest.
*/
if (manifests[n_prior_backups] == NULL)
pg_fatal("can't generate a manifest because no manifest is available for the final input backup");
}
else
mwriter = NULL;
/* Write backup label into output directory. */
if (opt.dry_run)
pg_log_debug("would generate \"%s/backup_label\"", opt.output);
else
{
pg_log_debug("generating \"%s/backup_label\"", opt.output);
last_backup_label->cursor = 0;
write_backup_label(opt.output, last_backup_label,
opt.manifest_checksums, mwriter);
}
/* Process everything that's not part of a user-defined tablespace. */
pg_log_debug("processing backup directory \"%s\"", last_input_dir);
process_directory_recursively(InvalidOid, last_input_dir, opt.output,
NULL, n_prior_backups, prior_backup_dirs,
manifests, mwriter, &opt);
/* Process user-defined tablespaces. */
for (ts = tablespaces; ts != NULL; ts = ts->next)
{
pg_log_debug("processing tablespace directory \"%s\"", ts->old_dir);
/*
* If it's a normal tablespace, we need to set up a symbolic link from
* pg_tblspc/${OID} to the target directory; if it's an in-place
* tablespace, we need to create a directory at pg_tblspc/${OID}.
*/
if (!ts->in_place)
{
char linkpath[MAXPGPATH];
snprintf(linkpath, MAXPGPATH, "%s/pg_tblspc/%u", opt.output,
ts->oid);
if (opt.dry_run)
pg_log_debug("would create symbolic link from \"%s\" to \"%s\"",
linkpath, ts->new_dir);
else
{
pg_log_debug("creating symbolic link from \"%s\" to \"%s\"",
linkpath, ts->new_dir);
if (symlink(ts->new_dir, linkpath) != 0)
pg_fatal("could not create symbolic link from \"%s\" to \"%s\": %m",
linkpath, ts->new_dir);
}
}
else
{
if (opt.dry_run)
pg_log_debug("would create directory \"%s\"", ts->new_dir);
else
{
pg_log_debug("creating directory \"%s\"", ts->new_dir);
if (pg_mkdir_p(ts->new_dir, pg_dir_create_mode) == -1)
pg_fatal("could not create directory \"%s\": %m",
ts->new_dir);
}
}
/* OK, now handle the directory contents. */
process_directory_recursively(ts->oid, ts->old_dir, ts->new_dir,
NULL, n_prior_backups, prior_backup_dirs,
manifests, mwriter, &opt);
}
/* Finalize the backup_manifest, if we're generating one. */
if (mwriter != NULL)
finalize_manifest(mwriter,
manifests[n_prior_backups]->first_wal_range);
/* fsync that output directory unless we've been told not to do so */
if (!opt.no_sync)
{
if (opt.dry_run)
pg_log_debug("would recursively fsync \"%s\"", opt.output);
else
{
pg_log_debug("recursively fsyncing \"%s\"", opt.output);
sync_pgdata(opt.output, version * 10000, opt.sync_method);
}
}
/* It's a success, so don't remove the output directories. */
reset_directory_cleanup_list();
exit(0);
}
/*
* Process the option argument for the -T, --tablespace-mapping switch.
*/
static void
add_tablespace_mapping(cb_options *opt, char *arg)
{
cb_tablespace_mapping *tsmap = pg_malloc0(sizeof(cb_tablespace_mapping));
char *dst;
char *dst_ptr;
char *arg_ptr;
/*
* Basically, we just want to copy everything before the equals sign to
* tsmap->old_dir and everything afterwards to tsmap->new_dir, but if
* there's more or less than one equals sign, that's an error, and if
* there's an equals sign preceded by a backslash, don't treat it as a
* field separator but instead copy a literal equals sign.
*/
dst_ptr = dst = tsmap->old_dir;
for (arg_ptr = arg; *arg_ptr != '\0'; arg_ptr++)
{
if (dst_ptr - dst >= MAXPGPATH)
pg_fatal("directory name too long");
if (*arg_ptr == '\\' && *(arg_ptr + 1) == '=')
; /* skip backslash escaping = */
else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\'))
{
if (tsmap->new_dir[0] != '\0')
pg_fatal("multiple \"=\" signs in tablespace mapping");
else
dst = dst_ptr = tsmap->new_dir;
}
else
*dst_ptr++ = *arg_ptr;
}
if (!tsmap->old_dir[0] || !tsmap->new_dir[0])
pg_fatal("invalid tablespace mapping format \"%s\", must be \"OLDDIR=NEWDIR\"", arg);
/*
* All tablespaces are created with absolute directories, so specifying a
* non-absolute path here would never match, possibly confusing users.
*
* In contrast to pg_basebackup, both the old and new directories are on
* the local machine, so the local machine's definition of an absolute
* path is the only relevant one.
*/
if (!is_absolute_path(tsmap->old_dir))
pg_fatal("old directory is not an absolute path in tablespace mapping: %s",
tsmap->old_dir);
if (!is_absolute_path(tsmap->new_dir))
pg_fatal("old directory is not an absolute path in tablespace mapping: %s",
tsmap->new_dir);
/* Canonicalize paths to avoid spurious failures when comparing. */
canonicalize_path(tsmap->old_dir);
canonicalize_path(tsmap->new_dir);
/* Add it to the list. */
tsmap->next = opt->tsmappings;
opt->tsmappings = tsmap;
}
/*
* Check that the backup_label files form a coherent backup chain, and return
* the contents of the backup_label file from the latest backup.
*/
static StringInfo
check_backup_label_files(int n_backups, char **backup_dirs)
{
StringInfo buf = makeStringInfo();
StringInfo lastbuf = buf;
int i;
TimeLineID check_tli = 0;
XLogRecPtr check_lsn = InvalidXLogRecPtr;
/* Try to read each backup_label file in turn, last to first. */
for (i = n_backups - 1; i >= 0; --i)
{
char pathbuf[MAXPGPATH];
int fd;
TimeLineID start_tli;
TimeLineID previous_tli;
XLogRecPtr start_lsn;
XLogRecPtr previous_lsn;
/* Open the backup_label file. */
snprintf(pathbuf, MAXPGPATH, "%s/backup_label", backup_dirs[i]);
pg_log_debug("reading \"%s\"", pathbuf);
if ((fd = open(pathbuf, O_RDONLY, 0)) < 0)
pg_fatal("could not open file \"%s\": %m", pathbuf);
/*
* Slurp the whole file into memory.
*
* The exact size limit that we impose here doesn't really matter --
* most of what's supposed to be in the file is fixed size and quite
* short. However, the length of the backup_label is limited (at least
* by some parts of the code) to MAXPGPATH, so include that value in
* the maximum length that we tolerate.
*/
slurp_file(fd, pathbuf, buf, 10000 + MAXPGPATH);
/* Close the file. */
if (close(fd) != 0)
pg_fatal("could not close \"%s\": %m", pathbuf);
/* Parse the file contents. */
parse_backup_label(pathbuf, buf, &start_tli, &start_lsn,
&previous_tli, &previous_lsn);
/*
* Sanity checks.
*
* XXX. It's actually not required that start_lsn == check_lsn. It
* would be OK if start_lsn > check_lsn provided that start_lsn is
* less than or equal to the relevant switchpoint. But at the moment
* we don't have that information.
*/
if (i > 0 && previous_tli == 0)
pg_fatal("backup at \"%s\" is a full backup, but only the first backup should be a full backup",
backup_dirs[i]);
if (i == 0 && previous_tli != 0)
pg_fatal("backup at \"%s\" is an incremental backup, but the first backup should be a full backup",
backup_dirs[i]);
if (i < n_backups - 1 && start_tli != check_tli)
pg_fatal("backup at \"%s\" starts on timeline %u, but expected %u",
backup_dirs[i], start_tli, check_tli);
if (i < n_backups - 1 && start_lsn != check_lsn)
pg_fatal("backup at \"%s\" starts at LSN %X/%X, but expected %X/%X",
backup_dirs[i],
LSN_FORMAT_ARGS(start_lsn),
LSN_FORMAT_ARGS(check_lsn));
check_tli = previous_tli;
check_lsn = previous_lsn;
/*
* The last backup label in the chain needs to be saved for later use,
* while the others are only needed within this loop.
*/
if (lastbuf == buf)
buf = makeStringInfo();
else
resetStringInfo(buf);
}
/* Free memory that we don't need any more. */
if (lastbuf != buf)
{
pfree(buf->data);
pfree(buf);
}
/*
* Return the data from the first backup_info that we read (which is the
* backup_label from the last directory specified on the command line).
*/
return lastbuf;
}
/*
* Sanity check control files.
*/
static void
check_control_files(int n_backups, char **backup_dirs)
{
int i;
uint64 system_identifier = 0; /* placate compiler */
/* Try to read each control file in turn, last to first. */
for (i = n_backups - 1; i >= 0; --i)
{
ControlFileData *control_file;
bool crc_ok;
char *controlpath;
controlpath = psprintf("%s/%s", backup_dirs[i], "global/pg_control");
pg_log_debug("reading \"%s\"", controlpath);
control_file = get_controlfile(backup_dirs[i], &crc_ok);
/* Control file contents not meaningful if CRC is bad. */
if (!crc_ok)
pg_fatal("%s: CRC is incorrect", controlpath);
/* Can't interpret control file if not current version. */
if (control_file->pg_control_version != PG_CONTROL_VERSION)
pg_fatal("%s: unexpected control file version",
controlpath);
/* System identifiers should all match. */
if (i == n_backups - 1)
system_identifier = control_file->system_identifier;
else if (system_identifier != control_file->system_identifier)
pg_fatal("%s: expected system identifier %llu, but found %llu",
controlpath, (unsigned long long) system_identifier,
(unsigned long long) control_file->system_identifier);
/* Release memory. */
pfree(control_file);
pfree(controlpath);
}
/*
* If debug output is enabled, make a note of the system identifier that
* we found in all of the relevant control files.
*/
pg_log_debug("system identifier is %llu",
(unsigned long long) system_identifier);
}
/*
* Set default permissions for new files and directories based on the
* permissions of the given directory. The intent here is that the output
* directory should use the same permissions scheme as the final input
* directory.
*/
static void
check_input_dir_permissions(char *dir)
{
struct stat st;
if (stat(dir, &st) != 0)
pg_fatal("could not stat \"%s\": %m", dir);
SetDataDirectoryCreatePerm(st.st_mode);
}
/*
* Clean up output directories before exiting.
*/
static void
cleanup_directories_atexit(void)
{
while (cleanup_dir_list != NULL)
{
cb_cleanup_dir *dir = cleanup_dir_list;
if (dir->rmtopdir)
{
pg_log_info("removing output directory \"%s\"", dir->target_path);
if (!rmtree(dir->target_path, dir->rmtopdir))
pg_log_error("failed to remove output directory");
}
else
{
pg_log_info("removing contents of output directory \"%s\"",
dir->target_path);
if (!rmtree(dir->target_path, dir->rmtopdir))
pg_log_error("failed to remove contents of output directory");
}
cleanup_dir_list = cleanup_dir_list->next;
pfree(dir);
}
}
/*
* Create the named output directory, unless it already exists or we're in
* dry-run mode. If it already exists but is not empty, that's a fatal error.
*
* Adds the created directory to the list of directories to be cleaned up
* at process exit.
*/
static void
create_output_directory(char *dirname, cb_options *opt)
{
switch (pg_check_dir(dirname))
{
case 0:
if (opt->dry_run)
{
pg_log_debug("would create directory \"%s\"", dirname);
return;
}
pg_log_debug("creating directory \"%s\"", dirname);
if (pg_mkdir_p(dirname, pg_dir_create_mode) == -1)
pg_fatal("could not create directory \"%s\": %m", dirname);
remember_to_cleanup_directory(dirname, true);
break;
case 1:
pg_log_debug("using existing directory \"%s\"", dirname);
remember_to_cleanup_directory(dirname, false);
break;
case 2:
case 3:
case 4:
pg_fatal("directory \"%s\" exists but is not empty", dirname);
case -1:
pg_fatal("could not access directory \"%s\": %m", dirname);
}
}
/*
* help
*
* Prints help page for the program
*
* progname: the name of the executed program, such as "pg_combinebackup"
*/
static void
help(const char *progname)
{
printf(_("%s reconstructs full backups from incrementals.\n\n"), progname);
printf(_("Usage:\n"));
printf(_(" %s [OPTION]... DIRECTORY...\n"), progname);
printf(_("\nOptions:\n"));
printf(_(" -d, --debug generate lots of debugging output\n"));
printf(_(" -n, --dry-run don't actually do anything\n"));
printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
printf(_(" -o, --output output directory\n"));
printf(_(" -T, --tablespace-mapping=OLDDIR=NEWDIR\n"
" relocate tablespace in OLDDIR to NEWDIR\n"));
printf(_(" --manifest-checksums=SHA{224,256,384,512}|CRC32C|NONE\n"
" use algorithm for manifest checksums\n"));
printf(_(" --no-manifest suppress generation of backup manifest\n"));
printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
}
/*
* Try to parse a string as a non-zero OID without leading zeroes.
*
* If it works, return true and set *result to the answer, else return false.
*/
static bool
parse_oid(char *s, Oid *result)
{
Oid oid;
char *ep;
errno = 0;
oid = strtoul(s, &ep, 10);
if (errno != 0 || *ep != '\0' || oid < 1 || oid > PG_UINT32_MAX)
return false;
*result = oid;
return true;
}
/*
* Copy files from the input directory to the output directory, reconstructing
* full files from incremental files as required.
*
* If processing is a user-defined tablespace, the tsoid should be the OID
* of that tablespace and input_directory and output_directory should be the
* toplevel input and output directories for that tablespace. Otherwise,
* tsoid should be InvalidOid and input_directory and output_directory should
* be the main input and output directories.
*
* relative_path is the path beneath the given input and output directories
* that we are currently processing. If NULL, it indicates that we're
* processing the input and output directories themselves.
*
* n_prior_backups is the number of prior backups that we have available.
* This doesn't count the very last backup, which is referenced by
* output_directory, just the older ones. prior_backup_dirs is an array of
* the locations of those previous backups.
*/
static void
process_directory_recursively(Oid tsoid,
char *input_directory,
char *output_directory,
char *relative_path,
int n_prior_backups,
char **prior_backup_dirs,
manifest_data **manifests,
manifest_writer *mwriter,
cb_options *opt)
{
char ifulldir[MAXPGPATH];
char ofulldir[MAXPGPATH];
char manifest_prefix[MAXPGPATH];
DIR *dir;
struct dirent *de;
bool is_pg_tblspc;
bool is_pg_wal;
manifest_data *latest_manifest = manifests[n_prior_backups];
pg_checksum_type checksum_type;
/*
* pg_tblspc and pg_wal are special cases, so detect those here.
*
* pg_tblspc is only special at the top level, but subdirectories of
* pg_wal are just as special as the top level directory.
*
* Since incremental backup does not exist in pre-v10 versions, we don't
* have to worry about the old pg_xlog naming.
*/
is_pg_tblspc = !OidIsValid(tsoid) && relative_path != NULL &&
strcmp(relative_path, "pg_tblspc") == 0;
is_pg_wal = !OidIsValid(tsoid) && relative_path != NULL &&
(strcmp(relative_path, "pg_wal") == 0 ||
strncmp(relative_path, "pg_wal/", 7) == 0);
/*
* If we're under pg_wal, then we don't need checksums, because these
* files aren't included in the backup manifest. Otherwise use whatever
* type of checksum is configured.
*/
if (!is_pg_wal)
checksum_type = opt->manifest_checksums;
else
checksum_type = CHECKSUM_TYPE_NONE;
/*
* Append the relative path to the input and output directories, and
* figure out the appropriate prefix to add to files in this directory
* when looking them up in a backup manifest.
*/
if (relative_path == NULL)
{
strlcpy(ifulldir, input_directory, MAXPGPATH);
strlcpy(ofulldir, output_directory, MAXPGPATH);
if (OidIsValid(tsoid))
snprintf(manifest_prefix, MAXPGPATH, "pg_tblspc/%u/", tsoid);
else
manifest_prefix[0] = '\0';
}
else
{
snprintf(ifulldir, MAXPGPATH, "%s/%s", input_directory,
relative_path);
snprintf(ofulldir, MAXPGPATH, "%s/%s", output_directory,
relative_path);
if (OidIsValid(tsoid))
snprintf(manifest_prefix, MAXPGPATH, "pg_tblspc/%u/%s/",
tsoid, relative_path);
else
snprintf(manifest_prefix, MAXPGPATH, "%s/", relative_path);
}
/*
* Toplevel output directories have already been created by the time this
* function is called, but any subdirectories are our responsibility.
*/
if (relative_path != NULL)
{
if (opt->dry_run)
pg_log_debug("would create directory \"%s\"", ofulldir);
else
{
pg_log_debug("creating directory \"%s\"", ofulldir);
if (mkdir(ofulldir, pg_dir_create_mode) == -1)
pg_fatal("could not create directory \"%s\": %m", ofulldir);
}
}
/* It's time to scan the directory. */
if ((dir = opendir(ifulldir)) == NULL)
pg_fatal("could not open directory \"%s\": %m", ifulldir);
while (errno = 0, (de = readdir(dir)) != NULL)
{
PGFileType type;
char ifullpath[MAXPGPATH];
char ofullpath[MAXPGPATH];
char manifest_path[MAXPGPATH];
Oid oid = InvalidOid;
int checksum_length = 0;
uint8 *checksum_payload = NULL;
pg_checksum_context checksum_ctx;
/* Ignore "." and ".." entries. */
if (strcmp(de->d_name, ".") == 0 ||
strcmp(de->d_name, "..") == 0)
continue;
/* Construct input path. */
snprintf(ifullpath, MAXPGPATH, "%s/%s", ifulldir, de->d_name);
/* Figure out what kind of directory entry this is. */
type = get_dirent_type(ifullpath, de, false, PG_LOG_ERROR);
if (type == PGFILETYPE_ERROR)
exit(1);
/*
* If we're processing pg_tblspc, then check whether the filename
* looks like it could be a tablespace OID. If so, and if the
* directory entry is a symbolic link or a directory, skip it.
*
* Our goal here is to ignore anything that would have been considered
* by scan_for_existing_tablespaces to be a tablespace.
*/
if (is_pg_tblspc && parse_oid(de->d_name, &oid) &&
(type == PGFILETYPE_LNK || type == PGFILETYPE_DIR))
continue;
/* If it's a directory, recurse. */
if (type == PGFILETYPE_DIR)
{
char new_relative_path[MAXPGPATH];
/* Append new pathname component to relative path. */
if (relative_path == NULL)
strlcpy(new_relative_path, de->d_name, MAXPGPATH);
else
snprintf(new_relative_path, MAXPGPATH, "%s/%s", relative_path,
de->d_name);
/* And recurse. */
process_directory_recursively(tsoid,
input_directory, output_directory,
new_relative_path,
n_prior_backups, prior_backup_dirs,
manifests, mwriter, opt);
continue;
}
/* Skip anything that's not a regular file. */
if (type != PGFILETYPE_REG)
{
if (type == PGFILETYPE_LNK)
pg_log_warning("skipping symbolic link \"%s\"", ifullpath);
else
pg_log_warning("skipping special file \"%s\"", ifullpath);
continue;
}
/*
* Skip the backup_label and backup_manifest files; they require
* special handling and are handled elsewhere.
*/
if (relative_path == NULL &&
(strcmp(de->d_name, "backup_label") == 0 ||
strcmp(de->d_name, "backup_manifest") == 0))
continue;
/*
* If it's an incremental file, hand it off to the reconstruction
* code, which will figure out what to do.
*/
if (strncmp(de->d_name, INCREMENTAL_PREFIX,
INCREMENTAL_PREFIX_LENGTH) == 0)
{
/* Output path should not include "INCREMENTAL." prefix. */
snprintf(ofullpath, MAXPGPATH, "%s/%s", ofulldir,
de->d_name + INCREMENTAL_PREFIX_LENGTH);
/* Manifest path likewise omits incremental prefix. */
snprintf(manifest_path, MAXPGPATH, "%s%s", manifest_prefix,
de->d_name + INCREMENTAL_PREFIX_LENGTH);
/* Reconstruction logic will do the rest. */
reconstruct_from_incremental_file(ifullpath, ofullpath,
relative_path,
de->d_name + INCREMENTAL_PREFIX_LENGTH,
n_prior_backups,
prior_backup_dirs,
manifests,
manifest_path,
checksum_type,
&checksum_length,
&checksum_payload,
opt->debug,
opt->dry_run);
}
else
{
/* Construct the path that the backup_manifest will use. */
snprintf(manifest_path, MAXPGPATH, "%s%s", manifest_prefix,
de->d_name);
/*
* It's not an incremental file, so we need to copy the entire
* file to the output directory.
*
* If a checksum of the required type already exists in the
* backup_manifest for the final input directory, we can save some
* work by reusing that checksum instead of computing a new one.
*/
if (checksum_type != CHECKSUM_TYPE_NONE &&
latest_manifest != NULL)
{
manifest_file *mfile;
mfile = manifest_files_lookup(latest_manifest->files,
manifest_path);
if (mfile == NULL)
{
char *bmpath;
/*
* The directory is out of sync with the backup_manifest,
* so emit a warning.
*/
bmpath = psprintf("%s/%s", input_directory,
"backup_manifest");
pg_log_warning("\"%s\" contains no entry for \"%s\"",
bmpath, manifest_path);
pfree(bmpath);
}
else if (mfile->checksum_type == checksum_type)
{
checksum_length = mfile->checksum_length;
checksum_payload = mfile->checksum_payload;
}
}
/*
* If we're reusing a checksum, then we don't need copy_file() to
* compute one for us, but otherwise, it needs to compute whatever
* type of checksum we need.
*/
if (checksum_length != 0)
pg_checksum_init(&checksum_ctx, CHECKSUM_TYPE_NONE);
else
pg_checksum_init(&checksum_ctx, checksum_type);
/* Actually copy the file. */
snprintf(ofullpath, MAXPGPATH, "%s/%s", ofulldir, de->d_name);
copy_file(ifullpath, ofullpath, &checksum_ctx, opt->dry_run);
/*
* If copy_file() performed a checksum calculation for us, then
* save the results (except in dry-run mode, when there's no
* point).
*/
if (checksum_ctx.type != CHECKSUM_TYPE_NONE && !opt->dry_run)
{
checksum_payload = pg_malloc(PG_CHECKSUM_MAX_LENGTH);
checksum_length = pg_checksum_final(&checksum_ctx,
checksum_payload);
}
}
/* Generate manifest entry, if needed. */
if (mwriter != NULL)
{
struct stat sb;
/*
* In order to generate a manifest entry, we need the file size
* and mtime. We have no way to know the correct mtime except to
* stat() the file, so just do that and get the size as well.
*
* If we didn't need the mtime here, we could try to obtain the
* file size from the reconstruction or file copy process above,
* although that is actually not convenient in all cases. If we
* write the file ourselves then clearly we can keep a count of
* bytes, but if we use something like CopyFile() then it's
* trickier. Since we have to stat() anyway to get the mtime,
* there's no point in worrying about it.
*/
if (stat(ofullpath, &sb) < 0)
pg_fatal("could not stat file \"%s\": %m", ofullpath);
/* OK, now do the work. */
add_file_to_manifest(mwriter, manifest_path,
sb.st_size, sb.st_mtime,
checksum_type, checksum_length,
checksum_payload);
}
/* Avoid leaking memory. */
if (checksum_payload != NULL)
pfree(checksum_payload);
}
closedir(dir);
}
/*
* Read the version number from PG_VERSION and convert it to the usual server
* version number format. (e.g. If PG_VERSION contains "14\n" this function
* will return 140000)
*/
static int
read_pg_version_file(char *directory)
{
char filename[MAXPGPATH];
StringInfoData buf;
int fd;
int version;
char *ep;
/* Construct pathname. */
snprintf(filename, MAXPGPATH, "%s/PG_VERSION", directory);
/* Open file. */
if ((fd = open(filename, O_RDONLY, 0)) < 0)
pg_fatal("could not open file \"%s\": %m", filename);
/* Read into memory. Length limit of 128 should be more than generous. */
initStringInfo(&buf);
slurp_file(fd, filename, &buf, 128);
/* Close the file. */
if (close(fd) != 0)
pg_fatal("could not close \"%s\": %m", filename);
/* Convert to integer. */
errno = 0;
version = strtoul(buf.data, &ep, 10);
if (errno != 0 || *ep != '\n')
{
/*
* Incremental backup is not relevant to very old server versions that
* used multi-part version number (e.g. 9.6, or 8.4). So if we see
* what looks like the beginning of such a version number, just bail
* out.
*/
if (version < 10 && *ep == '.')
pg_fatal("%s: server version too old\n", filename);
pg_fatal("%s: could not parse version number\n", filename);
}
/* Debugging output. */
pg_log_debug("read server version %d from \"%s\"", version, filename);
/* Release memory and return result. */
pfree(buf.data);
return version * 10000;
}
/*
* Add a directory to the list of output directories to clean up.
*/
static void
remember_to_cleanup_directory(char *target_path, bool rmtopdir)
{
cb_cleanup_dir *dir = pg_malloc(sizeof(cb_cleanup_dir));
dir->target_path = target_path;
dir->rmtopdir = rmtopdir;
dir->next = cleanup_dir_list;
cleanup_dir_list = dir;
}
/*
* Empty out the list of directories scheduled for cleanup a exit.
*
* We want to remove the output directories only on a failure, so call this
* function when we know that the operation has succeeded.
*
* Since we only expect this to be called when we're about to exit, we could
* just set cleanup_dir_list to NULL and be done with it, but we free the
* memory to be tidy.
*/
static void
reset_directory_cleanup_list(void)
{
while (cleanup_dir_list != NULL)
{
cb_cleanup_dir *dir = cleanup_dir_list;
cleanup_dir_list = cleanup_dir_list->next;
pfree(dir);
}
}
/*
* Scan the pg_tblspc directory of the final input backup to get a canonical
* list of what tablespaces are part of the backup.
*
* 'pathname' should be the path to the toplevel backup directory for the
* final backup in the backup chain.
*/
static cb_tablespace *
scan_for_existing_tablespaces(char *pathname, cb_options *opt)
{
char pg_tblspc[MAXPGPATH];
DIR *dir;
struct dirent *de;
cb_tablespace *tslist = NULL;
snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pathname);
pg_log_debug("scanning \"%s\"", pg_tblspc);
if ((dir = opendir(pg_tblspc)) == NULL)
pg_fatal("could not open directory \"%s\": %m", pg_tblspc);
while (errno = 0, (de = readdir(dir)) != NULL)
{
Oid oid;
char tblspcdir[MAXPGPATH];
char link_target[MAXPGPATH];
int link_length;
cb_tablespace *ts;
cb_tablespace *otherts;
PGFileType type;
/* Silently ignore "." and ".." entries. */
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
continue;
/* Construct full pathname. */
snprintf(tblspcdir, MAXPGPATH, "%s/%s", pg_tblspc, de->d_name);
/* Ignore any file name that doesn't look like a proper OID. */
if (!parse_oid(de->d_name, &oid))
{
pg_log_debug("skipping \"%s\" because the filename is not a legal tablespace OID",
tblspcdir);
continue;
}
/* Only symbolic links and directories are tablespaces. */
type = get_dirent_type(tblspcdir, de, false, PG_LOG_ERROR);
if (type == PGFILETYPE_ERROR)
exit(1);
if (type != PGFILETYPE_LNK && type != PGFILETYPE_DIR)
{
pg_log_debug("skipping \"%s\" because it is neither a symbolic link nor a directory",
tblspcdir);
continue;
}
/* Create a new tablespace object. */
ts = pg_malloc0(sizeof(cb_tablespace));
ts->oid = oid;
/*
* If it's a link, it's not an in-place tablespace. Otherwise, it must
* be a directory, and thus an in-place tablespace.
*/
if (type == PGFILETYPE_LNK)
{
cb_tablespace_mapping *tsmap;
/* Read the link target. */
link_length = readlink(tblspcdir, link_target, sizeof(link_target));
if (link_length < 0)
pg_fatal("could not read symbolic link \"%s\": %m",
tblspcdir);
if (link_length >= sizeof(link_target))
pg_fatal("symbolic link \"%s\" is too long", tblspcdir);
link_target[link_length] = '\0';
if (!is_absolute_path(link_target))
pg_fatal("symbolic link \"%s\" is relative", tblspcdir);
/* Canonicalize the link target. */
canonicalize_path(link_target);
/*
* Find the corresponding tablespace mapping and copy the relevant
* details into the new tablespace entry.
*/
for (tsmap = opt->tsmappings; tsmap != NULL; tsmap = tsmap->next)
{
if (strcmp(tsmap->old_dir, link_target) == 0)
{
strlcpy(ts->old_dir, tsmap->old_dir, MAXPGPATH);
strlcpy(ts->new_dir, tsmap->new_dir, MAXPGPATH);
ts->in_place = false;
break;
}
}
/* Every non-in-place tablespace must be mapped. */
if (tsmap == NULL)
pg_fatal("tablespace at \"%s\" has no tablespace mapping",
link_target);
}
else
{
/*
* For an in-place tablespace, there's no separate directory, so
* we just record the paths within the data directories.
*/
snprintf(ts->old_dir, MAXPGPATH, "%s/%s", pg_tblspc, de->d_name);
snprintf(ts->new_dir, MAXPGPATH, "%s/pg_tblspc/%s", opt->output,
de->d_name);
ts->in_place = true;
}
/* Tablespaces should not share a directory. */
for (otherts = tslist; otherts != NULL; otherts = otherts->next)
if (strcmp(ts->new_dir, otherts->new_dir) == 0)
pg_fatal("tablespaces with OIDs %u and %u both point at \"%s\"",
otherts->oid, oid, ts->new_dir);
/* Add this tablespace to the list. */
ts->next = tslist;
tslist = ts;
}
if (closedir(dir) != 0)
pg_fatal("could not close directory \"%s\": %m", pg_tblspc);
return tslist;
}
/*
* Read a file into a StringInfo.
*
* fd is used for the actual file I/O, filename for error reporting purposes.
* A file longer than maxlen is a fatal error.
*/
static void
slurp_file(int fd, char *filename, StringInfo buf, int maxlen)
{
struct stat st;
ssize_t rb;
/* Check file size, and complain if it's too large. */
if (fstat(fd, &st) != 0)
pg_fatal("could not stat \"%s\": %m", filename);
if (st.st_size > maxlen)
pg_fatal("file \"%s\" is too large", filename);
/* Make sure we have enough space. */
enlargeStringInfo(buf, st.st_size);
/* Read the data. */
rb = read(fd, &buf->data[buf->len], st.st_size);
/*
* We don't expect any concurrent changes, so we should read exactly the
* expected number of bytes.
*/
if (rb != st.st_size)
{
if (rb < 0)
pg_fatal("could not read file \"%s\": %m", filename);
else
pg_fatal("could not read file \"%s\": read only %zd of %lld bytes",
filename, rb, (long long int) st.st_size);
}
/* Adjust buffer length for new data and restore trailing-\0 invariant */
buf->len += rb;
buf->data[buf->len] = '\0';
}