postgresql/src/backend/replication/basebackup.c

1408 lines
37 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* basebackup.c
* code for taking a base backup and streaming it to a standby
*
2017-01-03 19:48:53 +01:00
* Portions Copyright (c) 2010-2017, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/replication/basebackup.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <sys/stat.h>
#include <unistd.h>
#include <time.h>
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#include "access/xlog_internal.h" /* for pg_start/stop_backup */
Rationalize common/relpath.[hc]. Commit a73018392636ce832b09b5c31f6ad1f18a4643ea created rather a mess by putting dependencies on backend-only include files into include/common. We really shouldn't do that. To clean it up: * Move TABLESPACE_VERSION_DIRECTORY back to its longtime home in catalog/catalog.h. We won't consider this symbol part of the FE/BE API. * Push enum ForkNumber from relfilenode.h into relpath.h. We'll consider relpath.h as the source of truth for fork numbers, since relpath.c was already partially serving that function, and anyway relfilenode.h was kind of a random place for that enum. * So, relfilenode.h now includes relpath.h rather than vice-versa. This direction of dependency is fine. (That allows most, but not quite all, of the existing explicit #includes of relpath.h to go away again.) * Push forkname_to_number from catalog.c to relpath.c, just to centralize fork number stuff a bit better. * Push GetDatabasePath from catalog.c to relpath.c; it was rather odd that the previous commit didn't keep this together with relpath(). * To avoid needing relfilenode.h in common/, redefine the underlying function (now called GetRelationPath) as taking separate OID arguments, and make the APIs using RelFileNode or RelFileNodeBackend into macro wrappers. (The macros have a potential multiple-eval risk, but none of the existing call sites have an issue with that; one of them had such a risk already anyway.) * Fix failure to follow the directions when "init" fork type was added; specifically, the errhint in forkname_to_number wasn't updated, and neither was the SGML documentation for pg_relation_size(). * Fix tablespace-path-too-long check in CreateTableSpace() to account for fork-name component of maximum-length pathnames. This requires putting FORKNAMECHARS into a header file, but it was rather useless (and actually unreferenced) where it was. The last couple of items are potentially back-patchable bug fixes, if anyone is sufficiently excited about them; but personally I'm not. Per a gripe from Christoph Berg about how include/common wasn't self-contained.
2014-04-30 23:30:50 +02:00
#include "catalog/catalog.h"
#include "catalog/pg_type.h"
#include "lib/stringinfo.h"
#include "libpq/libpq.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "nodes/pg_list.h"
#include "pgtar.h"
#include "pgstat.h"
#include "postmaster/syslogger.h"
#include "replication/basebackup.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
#include "storage/dsm_impl.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/ps_status.h"
#include "utils/timestamp.h"
typedef struct
{
const char *label;
bool progress;
bool fastcheckpoint;
bool nowait;
bool includewal;
uint32 maxrate;
bool sendtblspcmapfile;
2011-04-10 17:42:00 +02:00
} basebackup_options;
static int64 sendDir(char *path, int basepathlen, bool sizeonly,
2015-05-24 03:35:49 +02:00
List *tablespaces, bool sendtblspclinks);
static bool sendFile(char *readfilename, char *tarfilename,
2017-06-21 20:39:04 +02:00
struct stat *statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
static int64 _tarWriteHeader(const char *filename, const char *linktarget,
2017-06-21 20:39:04 +02:00
struct stat *statbuf, bool sizeonly);
static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
bool sizeonly);
static void send_int8_string(StringInfoData *buf, int64 intval);
static void SendBackupHeader(List *tablespaces);
static void base_backup_cleanup(int code, Datum arg);
static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const void *a, const void *b);
static void throttle(size_t increment);
Allow a streaming replication standby to follow a timeline switch. Before this patch, streaming replication would refuse to start replicating if the timeline in the primary doesn't exactly match the standby. The situation where it doesn't match is when you have a master, and two standbys, and you promote one of the standbys to become new master. Promoting bumps up the timeline ID, and after that bump, the other standby would refuse to continue. There's significantly more timeline related logic in streaming replication now. First of all, when a standby connects to primary, it will ask the primary for any timeline history files that are missing from the standby. The missing files are sent using a new replication command TIMELINE_HISTORY, and stored in standby's pg_xlog directory. Using the timeline history files, the standby can follow the latest timeline present in the primary (recovery_target_timeline='latest'), just as it can follow new timelines appearing in an archive directory. START_REPLICATION now takes a TIMELINE parameter, to specify exactly which timeline to stream WAL from. This allows the standby to request the primary to send over WAL that precedes the promotion. The replication protocol is changed slightly (in a backwards-compatible way although there's little hope of streaming replication working across major versions anyway), to allow replication to stop when the end of timeline reached, putting the walsender back into accepting a replication command. Many thanks to Amit Kapila for testing and reviewing various versions of this patch.
2012-12-13 18:00:00 +01:00
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
/* Relative path of temporary statistics directory */
static char *statrelpath = NULL;
/*
* Size of each block sent into the tar stream for larger files.
*/
#define TAR_SEND_SIZE 32768
/*
* How frequently to throttle, as a fraction of the specified rate-second.
*/
#define THROTTLING_FREQUENCY 8
/* The actual number of bytes, transfer of which may cause sleep. */
static uint64 throttling_sample;
/* Amount of data already transferred but not yet throttled. */
static int64 throttling_counter;
/* The minimum time required to transfer throttling_sample bytes. */
static TimeOffset elapsed_min_unit;
/* The last check of the transfer rate. */
static TimestampTz throttled_last;
/*
* The contents of these directories are removed or recreated during server
* start so they are not included in backups. The directories themselves are
* kept and included as empty to preserve access permissions.
*/
static const char *excludeDirContents[] =
{
/*
* Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
* when stats_temp_directory is set because PGSS_TEXT_FILE is always
* created there.
*/
PG_STAT_TMP_DIR,
/*
* It is generally not useful to backup the contents of this directory
* even if the intention is to restore to another master. See backup.sgml
* for a more detailed description.
*/
"pg_replslot",
/* Contents removed on startup, see dsm_cleanup_for_mmap(). */
PG_DYNSHMEM_DIR,
/* Contents removed on startup, see AsyncShmemInit(). */
"pg_notify",
/*
* Old contents are loaded for possible debugging but are not required for
* normal operation, see OldSerXidInit().
*/
"pg_serial",
/* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
"pg_snapshots",
/* Contents zeroed on startup, see StartupSUBTRANS(). */
"pg_subtrans",
/* end of list */
NULL
};
/*
* List of files excluded from backups.
*/
static const char *excludeFiles[] =
{
/* Skip auto conf temporary file. */
PG_AUTOCONF_FILENAME ".tmp",
/* Skip current log file temporary file */
LOG_METAINFO_DATAFILE_TMP,
/*
* If there's a backup_label or tablespace_map file, it belongs to a
* backup started by the user with pg_start_backup(). It is *not* correct
* for this backup. Our backup_label/tablespace_map is injected into the
* tar separately.
*/
BACKUP_LABEL_FILE,
TABLESPACE_MAP,
"postmaster.pid",
"postmaster.opts",
/* end of list */
NULL
};
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
*/
static void
base_backup_cleanup(int code, Datum arg)
{
do_pg_abort_backup();
}
/*
* Actually do a base backup for the specified tablespaces.
*
* This is split out mainly to avoid complaints about "variable might be
* clobbered by longjmp" from stupider versions of gcc.
*/
static void
perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
{
XLogRecPtr startptr;
TimeLineID starttli;
XLogRecPtr endptr;
TimeLineID endtli;
2016-06-10 00:02:36 +02:00
StringInfo labelfile;
StringInfo tblspc_map_file = NULL;
int datadirpathlen;
List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
Allow a streaming replication standby to follow a timeline switch. Before this patch, streaming replication would refuse to start replicating if the timeline in the primary doesn't exactly match the standby. The situation where it doesn't match is when you have a master, and two standbys, and you promote one of the standbys to become new master. Promoting bumps up the timeline ID, and after that bump, the other standby would refuse to continue. There's significantly more timeline related logic in streaming replication now. First of all, when a standby connects to primary, it will ask the primary for any timeline history files that are missing from the standby. The missing files are sent using a new replication command TIMELINE_HISTORY, and stored in standby's pg_xlog directory. Using the timeline history files, the standby can follow the latest timeline present in the primary (recovery_target_timeline='latest'), just as it can follow new timelines appearing in an archive directory. START_REPLICATION now takes a TIMELINE parameter, to specify exactly which timeline to stream WAL from. This allows the standby to request the primary to send over WAL that precedes the promotion. The replication protocol is changed slightly (in a backwards-compatible way although there's little hope of streaming replication working across major versions anyway), to allow replication to stop when the end of timeline reached, putting the walsender back into accepting a replication command. Many thanks to Amit Kapila for testing and reviewing various versions of this patch.
2012-12-13 18:00:00 +01:00
backup_started_in_recovery = RecoveryInProgress();
labelfile = makeStringInfo();
tblspc_map_file = makeStringInfo();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
labelfile, tblspcdir, &tablespaces,
tblspc_map_file,
opt->progress, opt->sendtblspcmapfile);
2015-05-24 03:35:49 +02:00
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
2015-05-24 03:35:49 +02:00
* us to abort the backup so we don't "leak" a backup counter. For this
* reason, *all* functionality between do_pg_start_backup() and
* do_pg_stop_backup() should be inside the error cleanup block!
*/
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
ListCell *lc;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
/*
2015-05-24 03:35:49 +02:00
* Calculate the relative path of temporary statistics directory in
* order to skip the files which are located in that directory later.
*/
if (is_absolute_path(pgstat_stat_directory) &&
strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
statrelpath = psprintf("./%s", pgstat_stat_directory);
else
statrelpath = pgstat_stat_directory;
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
SendBackupHeader(tablespaces);
/* Setup and activate network throttling, if client requested it */
if (opt->maxrate > 0)
{
throttling_sample =
(int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
/*
* The minimum amount of time for throttling_sample bytes to be
* transferred.
*/
elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
/* Enable throttling. */
throttling_counter = 0;
/* The 'real data' starts now (header was ignored). */
throttled_last = GetCurrentTimestamp();
}
else
{
/* Disable throttling. */
throttling_counter = -1;
}
/* Send off our tablespaces one by one */
foreach(lc, tablespaces)
{
tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
StringInfoData buf;
/* Send CopyOutResponse message */
pq_beginmessage(&buf, 'H');
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
pq_sendbyte(&buf, 0); /* overall format */
pq_sendint16(&buf, 0); /* natts */
pq_endmessage(&buf);
if (ti->path == NULL)
{
struct stat statbuf;
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data);
/*
* Send tablespace_map file if required and then the bulk of
* the files.
*/
if (tblspc_map_file && opt->sendtblspcmapfile)
{
sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
sendDir(".", 1, false, tablespaces, false);
}
else
sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat control file \"%s\": %m",
XLOG_CONTROL_FILE)));
sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false);
}
else
sendTablespace(ti->path, false);
/*
* If we're including WAL, and this is the main data directory we
* don't terminate the tar stream here. Instead, we will append
* the xlog files below and terminate it then. This is safe since
* the main data directory is always sent *last*.
*/
if (opt->includewal && ti->path == NULL)
{
Assert(lnext(lc) == NULL);
}
else
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
pq_putemptymessage('c'); /* CopyDone */
}
}
PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
if (opt->includewal)
{
/*
* We've left the last tar file "open", so we can now append the
* required WAL files to it.
*/
char pathbuf[MAXPGPATH];
XLogSegNo segno;
XLogSegNo startsegno;
XLogSegNo endsegno;
struct stat statbuf;
List *historyFileList = NIL;
List *walFileList = NIL;
char **walFiles;
int nWalFiles;
char firstoff[MAXFNAMELEN];
char lastoff[MAXFNAMELEN];
DIR *dir;
struct dirent *de;
int i;
ListCell *lc;
TimeLineID tli;
/*
* I'd rather not worry about timelines here, so scan pg_wal and
* include all WAL files in the range between 'startptr' and 'endptr',
* regardless of the timeline the file is stamped with. If there are
* some spurious WAL files belonging to timelines that don't belong in
* this server's history, they will be included too. Normally there
* shouldn't be such files, but if there are, there's little harm in
* including them.
*/
XLByteToSeg(startptr, startsegno, wal_segment_size);
XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
dir = AllocateDir("pg_wal");
if (!dir)
ereport(ERROR,
(errmsg("could not open directory \"%s\": %m", "pg_wal")));
while ((de = ReadDir(dir, "pg_wal")) != NULL)
{
/* Does it look like a WAL segment, and is it in the range? */
if (IsXLogFileName(de->d_name) &&
strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
strcmp(de->d_name + 8, lastoff + 8) <= 0)
{
walFileList = lappend(walFileList, pstrdup(de->d_name));
}
/* Does it look like a timeline history file? */
else if (IsTLHistoryFileName(de->d_name))
{
historyFileList = lappend(historyFileList, pstrdup(de->d_name));
}
}
FreeDir(dir);
/*
* Before we go any further, check that none of the WAL segments we
* need were removed.
*/
CheckXLogRemoved(startsegno, ThisTimeLineID);
/*
* Put the WAL filenames into an array, and sort. We send the files in
* order from oldest to newest, to reduce the chance that a file is
* recycled before we get a chance to send it over.
*/
nWalFiles = list_length(walFileList);
walFiles = palloc(nWalFiles * sizeof(char *));
i = 0;
foreach(lc, walFileList)
{
walFiles[i++] = lfirst(lc);
}
qsort(walFiles, nWalFiles, sizeof(char *), compareWalFileNames);
/*
* There must be at least one xlog file in the pg_wal directory, since
* we are doing backup-including-xlog.
*/
if (nWalFiles < 1)
ereport(ERROR,
(errmsg("could not find any WAL files")));
/*
* Sanity check: the first and last segment should cover startptr and
* endptr, with no gaps in between.
*/
XLogFromFileName(walFiles[0], &tli, &segno, wal_segment_size);
if (segno != startsegno)
{
char startfname[MAXFNAMELEN];
XLogFileName(startfname, ThisTimeLineID, startsegno,
wal_segment_size);
ereport(ERROR,
2013-02-23 05:33:07 +01:00
(errmsg("could not find WAL file \"%s\"", startfname)));
}
for (i = 0; i < nWalFiles; i++)
{
XLogSegNo currsegno = segno;
XLogSegNo nextsegno = segno + 1;
XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size);
if (!(nextsegno == segno || currsegno == segno))
{
char nextfname[MAXFNAMELEN];
XLogFileName(nextfname, ThisTimeLineID, nextsegno,
wal_segment_size);
ereport(ERROR,
(errmsg("could not find WAL file \"%s\"", nextfname)));
}
}
if (segno != endsegno)
{
char endfname[MAXFNAMELEN];
XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
ereport(ERROR,
2013-02-23 05:33:07 +01:00
(errmsg("could not find WAL file \"%s\"", endfname)));
}
/* Ok, we have everything we need. Send the WAL files. */
for (i = 0; i < nWalFiles; i++)
{
FILE *fp;
char buf[TAR_SEND_SIZE];
size_t cnt;
pgoff_t len = 0;
snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFiles[i]);
XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size);
fp = AllocateFile(pathbuf, "rb");
if (fp == NULL)
{
/*
* Most likely reason for this is that the file was already
* removed by a checkpoint, so check for that to get a better
* error message.
*/
CheckXLogRemoved(segno, tli);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", pathbuf)));
}
if (fstat(fileno(fp), &statbuf) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
pathbuf)));
if (statbuf.st_size != wal_segment_size)
{
CheckXLogRemoved(segno, tli);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
}
/* send the WAL file itself */
_tarWriteHeader(pathbuf, NULL, &statbuf, false);
while ((cnt = fread(buf, 1,
Min(sizeof(buf), wal_segment_size - len),
fp)) > 0)
{
CheckXLogRemoved(segno, tli);
/* Send the chunk as a CopyData message */
if (pq_putmessage('d', buf, cnt))
ereport(ERROR,
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
throttle(cnt);
if (len == wal_segment_size)
break;
}
if (len != wal_segment_size)
{
CheckXLogRemoved(segno, tli);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
}
/* wal_segment_size is a multiple of 512, so no need for padding */
FreeFile(fp);
/*
* Mark file as archived, otherwise files can get archived again
* after promotion of a new node. This is in line with
* walreceiver.c always doing an XLogArchiveForceDone() after a
* complete segment.
*/
StatusFilePath(pathbuf, walFiles[i], ".done");
sendFileWithContent(pathbuf, "");
}
/*
* Send timeline history files too. Only the latest timeline history
* file is required for recovery, and even that only if there happens
* to be a timeline switch in the first WAL segment that contains the
* checkpoint record, or if we're taking a base backup from a standby
* server and the target timeline changes while the backup is taken.
* But they are small and highly useful for debugging purposes, so
* better include them all, always.
*/
foreach(lc, historyFileList)
{
char *fname = lfirst(lc);
snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
if (lstat(pathbuf, &statbuf) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", pathbuf)));
sendFile(pathbuf, pathbuf, &statbuf, false);
/* unconditionally mark file as archived */
StatusFilePath(pathbuf, fname, ".done");
sendFileWithContent(pathbuf, "");
}
/* Send CopyDone message for the last tar file */
pq_putemptymessage('c');
}
SendXlogRecPtrResult(endptr, endtli);
}
/*
* qsort comparison function, to compare log/seg portion of WAL segment
* filenames, ignoring the timeline portion.
*/
static int
compareWalFileNames(const void *a, const void *b)
{
char *fna = *((char **) a);
char *fnb = *((char **) b);
return strcmp(fna + 8, fnb + 8);
}
/*
* Parse the base backup options passed down by the parser
*/
static void
parse_basebackup_options(List *options, basebackup_options *opt)
{
ListCell *lopt;
bool o_label = false;
bool o_progress = false;
bool o_fast = false;
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
bool o_tablespace_map = false;
2011-01-25 10:50:04 +01:00
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
{
DefElem *defel = (DefElem *) lfirst(lopt);
if (strcmp(defel->defname, "label") == 0)
{
if (o_label)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->label = strVal(defel->arg);
o_label = true;
}
else if (strcmp(defel->defname, "progress") == 0)
{
if (o_progress)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->progress = true;
o_progress = true;
}
else if (strcmp(defel->defname, "fast") == 0)
{
if (o_fast)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->fastcheckpoint = true;
o_fast = true;
}
else if (strcmp(defel->defname, "nowait") == 0)
{
if (o_nowait)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->nowait = true;
o_nowait = true;
}
else if (strcmp(defel->defname, "wal") == 0)
{
if (o_wal)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->includewal = true;
o_wal = true;
}
else if (strcmp(defel->defname, "max_rate") == 0)
{
long maxrate;
if (o_maxrate)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
maxrate = intVal(defel->arg);
if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
(int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
else if (strcmp(defel->defname, "tablespace_map") == 0)
{
if (o_tablespace_map)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("duplicate option \"%s\"", defel->defname)));
opt->sendtblspcmapfile = true;
o_tablespace_map = true;
}
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
}
if (opt->label == NULL)
opt->label = "base backup";
}
/*
* SendBaseBackup() - send a complete base backup.
*
* The function will put the system into backup mode like pg_start_backup()
* does, so that the backup is consistent even though we read directly from
* the filesystem, bypassing the buffer cache.
*/
void
SendBaseBackup(BaseBackupCmd *cmd)
{
DIR *dir;
basebackup_options opt;
parse_basebackup_options(cmd->options, &opt);
WalSndSetState(WALSNDSTATE_BACKUP);
if (update_process_title)
{
char activitymsg[50];
snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
opt.label);
set_ps_display(activitymsg, false);
}
/* Make sure we can open the directory with tablespaces in it */
dir = AllocateDir("pg_tblspc");
if (!dir)
ereport(ERROR,
2011-09-07 23:19:32 +02:00
(errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
perform_base_backup(&opt, dir);
FreeDir(dir);
}
static void
send_int8_string(StringInfoData *buf, int64 intval)
{
char is[32];
sprintf(is, INT64_FORMAT, intval);
pq_sendint32(buf, strlen(is));
pq_sendbytes(buf, is, strlen(is));
}
static void
SendBackupHeader(List *tablespaces)
{
StringInfoData buf;
ListCell *lc;
/* Construct and send the directory information */
pq_beginmessage(&buf, 'T'); /* RowDescription */
pq_sendint16(&buf, 3); /* 3 fields */
/* First field - spcoid */
pq_sendstring(&buf, "spcoid");
pq_sendint32(&buf, 0); /* table oid */
pq_sendint16(&buf, 0); /* attnum */
pq_sendint32(&buf, OIDOID); /* type oid */
pq_sendint16(&buf, 4); /* typlen */
pq_sendint32(&buf, 0); /* typmod */
pq_sendint16(&buf, 0); /* format code */
/* Second field - spcpath */
pq_sendstring(&buf, "spclocation");
pq_sendint32(&buf, 0);
pq_sendint16(&buf, 0);
pq_sendint32(&buf, TEXTOID);
pq_sendint16(&buf, -1);
pq_sendint32(&buf, 0);
pq_sendint16(&buf, 0);
/* Third field - size */
pq_sendstring(&buf, "size");
pq_sendint32(&buf, 0);
pq_sendint16(&buf, 0);
pq_sendint32(&buf, INT8OID);
pq_sendint16(&buf, 8);
pq_sendint32(&buf, 0);
pq_sendint16(&buf, 0);
pq_endmessage(&buf);
foreach(lc, tablespaces)
{
tablespaceinfo *ti = lfirst(lc);
/* Send one datarow message */
pq_beginmessage(&buf, 'D');
pq_sendint16(&buf, 3); /* number of columns */
if (ti->path == NULL)
{
pq_sendint32(&buf, -1); /* Length = -1 ==> NULL */
pq_sendint32(&buf, -1);
}
else
{
Adopt the GNU convention for handling tar-archive members exceeding 8GB. The POSIX standard for tar headers requires archive member sizes to be printed in octal with at most 11 digits, limiting the representable file size to 8GB. However, GNU tar and apparently most other modern tars support a convention in which oversized values can be stored in base-256, allowing any practical file to be a tar member. Adopt this convention to remove two limitations: * pg_dump with -Ft output format failed if the contents of any one table exceeded 8GB. * pg_basebackup failed if the data directory contained any file exceeding 8GB. (This would be a fatal problem for installations configured with a table segment size of 8GB or more, and it has also been seen to fail when large core dump files exist in the data directory.) File sizes under 8GB are still printed in octal, so that no compatibility issues are created except in cases that would have failed entirely before. In addition, this patch fixes several bugs in the same area: * In 9.3 and later, we'd defined tarCreateHeader's file-size argument as size_t, which meant that on 32-bit machines it would write a corrupt tar header for file sizes between 4GB and 8GB, even though no error was raised. This broke both "pg_dump -Ft" and pg_basebackup for such cases. * pg_restore from a tar archive would fail on tables of size between 4GB and 8GB, on machines where either "size_t" or "unsigned long" is 32 bits. This happened even with an archive file not affected by the previous bug. * pg_basebackup would fail if there were files of size between 4GB and 8GB, even on 64-bit machines. * In 9.3 and later, "pg_basebackup -Ft" failed entirely, for any file size, on 64-bit big-endian machines. In view of these potential data-loss bugs, back-patch to all supported branches, even though removal of the documented 8GB limit might otherwise be considered a new feature rather than a bug fix.
2015-11-22 02:21:31 +01:00
Size len;
len = strlen(ti->oid);
pq_sendint32(&buf, len);
pq_sendbytes(&buf, ti->oid, len);
len = strlen(ti->path);
pq_sendint32(&buf, len);
pq_sendbytes(&buf, ti->path, len);
}
if (ti->size >= 0)
send_int8_string(&buf, ti->size / 1024);
else
pq_sendint32(&buf, -1); /* NULL */
pq_endmessage(&buf);
}
/* Send a CommandComplete message */
pq_puttextmessage('C', "SELECT");
}
/*
* Send a single resultset containing just a single
2014-06-11 13:54:06 +02:00
* XLogRecPtr record (in text format)
*/
static void
SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
{
StringInfoData buf;
char str[MAXFNAMELEN];
Size len;
pq_beginmessage(&buf, 'T'); /* RowDescription */
pq_sendint16(&buf, 2); /* 2 fields */
/* Field headers */
pq_sendstring(&buf, "recptr");
pq_sendint32(&buf, 0); /* table oid */
pq_sendint16(&buf, 0); /* attnum */
pq_sendint32(&buf, TEXTOID); /* type oid */
pq_sendint16(&buf, -1);
pq_sendint32(&buf, 0);
pq_sendint16(&buf, 0);
pq_sendstring(&buf, "tli");
pq_sendint32(&buf, 0); /* table oid */
pq_sendint16(&buf, 0); /* attnum */
/*
* int8 may seem like a surprising data type for this, but in theory int4
* would not be wide enough for this, as TimeLineID is unsigned.
*/
pq_sendint32(&buf, INT8OID); /* type oid */
pq_sendint16(&buf, -1);
pq_sendint32(&buf, 0);
pq_sendint16(&buf, 0);
pq_endmessage(&buf);
/* Data row */
pq_beginmessage(&buf, 'D');
pq_sendint16(&buf, 2); /* number of columns */
len = snprintf(str, sizeof(str),
"%X/%X", (uint32) (ptr >> 32), (uint32) ptr);
pq_sendint32(&buf, len);
pq_sendbytes(&buf, str, len);
len = snprintf(str, sizeof(str), "%u", tli);
pq_sendint32(&buf, len);
pq_sendbytes(&buf, str, len);
pq_endmessage(&buf);
/* Send a CommandComplete message */
pq_puttextmessage('C', "SELECT");
}
/*
* Inject a file with given name and content in the output tar stream.
*/
static void
sendFileWithContent(const char *filename, const char *content)
{
struct stat statbuf;
2011-04-10 17:42:00 +02:00
int pad,
len;
len = strlen(content);
/*
2011-04-10 17:42:00 +02:00
* Construct a stat struct for the backup_label file we're injecting in
* the tar.
*/
/* Windows doesn't have the concept of uid and gid */
#ifdef WIN32
statbuf.st_uid = 0;
statbuf.st_gid = 0;
#else
statbuf.st_uid = geteuid();
statbuf.st_gid = getegid();
#endif
statbuf.st_mtime = time(NULL);
statbuf.st_mode = S_IRUSR | S_IWUSR;
statbuf.st_size = len;
_tarWriteHeader(filename, NULL, &statbuf, false);
/* Send the contents as a CopyData message */
pq_putmessage('d', content, len);
/* Pad to 512 byte boundary, per tar format requirements */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
2011-04-10 17:42:00 +02:00
char buf[512];
MemSet(buf, 0, pad);
pq_putmessage('d', buf, pad);
}
}
/*
* Include the tablespace directory pointed to by 'path' in the output tar
* stream. If 'sizeonly' is true, we just calculate a total length and return
* it, without actually sending anything.
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
char pathbuf[MAXPGPATH];
struct stat statbuf;
/*
* 'path' points to the tablespace location, but we only want to include
* the version directory in it that belongs to us.
*/
snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
TABLESPACE_VERSION_DIRECTORY);
/*
* Store a directory entry in the tar file so we get the permissions
* right.
*/
if (lstat(pathbuf, &statbuf) != 0)
{
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file or directory \"%s\": %m",
pathbuf)));
/* If the tablespace went away while scanning, it's no error. */
return 0;
}
size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
sizeonly);
/* Send all the files in the tablespace version directory */
size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
/*
* Include all files from the given directory in the output tar stream. If
* 'sizeonly' is true, we just calculate a total length and return it, without
* actually sending anything.
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
*
* If sendtblspclinks is true, we need to include symlink
* information in the tar file. If not, we can skip that
* as it will be sent separately in the tablespace_map file.
*/
static int64
sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
bool sendtblspclinks)
{
DIR *dir;
struct dirent *de;
char pathbuf[MAXPGPATH * 2];
struct stat statbuf;
int64 size = 0;
dir = AllocateDir(path);
while ((de = ReadDir(dir, path)) != NULL)
{
int excludeIdx;
bool excludeFound;
/* Skip special stuff */
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
continue;
/* Skip temporary files */
if (strncmp(de->d_name,
PG_TEMP_FILE_PREFIX,
strlen(PG_TEMP_FILE_PREFIX)) == 0)
continue;
/*
2011-04-10 17:42:00 +02:00
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
Allow a streaming replication standby to follow a timeline switch. Before this patch, streaming replication would refuse to start replicating if the timeline in the primary doesn't exactly match the standby. The situation where it doesn't match is when you have a master, and two standbys, and you promote one of the standbys to become new master. Promoting bumps up the timeline ID, and after that bump, the other standby would refuse to continue. There's significantly more timeline related logic in streaming replication now. First of all, when a standby connects to primary, it will ask the primary for any timeline history files that are missing from the standby. The missing files are sent using a new replication command TIMELINE_HISTORY, and stored in standby's pg_xlog directory. Using the timeline history files, the standby can follow the latest timeline present in the primary (recovery_target_timeline='latest'), just as it can follow new timelines appearing in an archive directory. START_REPLICATION now takes a TIMELINE parameter, to specify exactly which timeline to stream WAL from. This allows the standby to request the primary to send over WAL that precedes the promotion. The replication protocol is changed slightly (in a backwards-compatible way although there's little hope of streaming replication working across major versions anyway), to allow replication to stop when the end of timeline reached, putting the walsender back into accepting a replication command. Many thanks to Amit Kapila for testing and reviewing various versions of this patch.
2012-12-13 18:00:00 +01:00
* do_pg_abort_backup() for us. Also check that if the backup was
* started while still in recovery, the server wasn't promoted.
* dp_pg_stop_backup() will check that too, but it's better to stop
* the backup early than continue to the end and fail there.
*/
Allow a streaming replication standby to follow a timeline switch. Before this patch, streaming replication would refuse to start replicating if the timeline in the primary doesn't exactly match the standby. The situation where it doesn't match is when you have a master, and two standbys, and you promote one of the standbys to become new master. Promoting bumps up the timeline ID, and after that bump, the other standby would refuse to continue. There's significantly more timeline related logic in streaming replication now. First of all, when a standby connects to primary, it will ask the primary for any timeline history files that are missing from the standby. The missing files are sent using a new replication command TIMELINE_HISTORY, and stored in standby's pg_xlog directory. Using the timeline history files, the standby can follow the latest timeline present in the primary (recovery_target_timeline='latest'), just as it can follow new timelines appearing in an archive directory. START_REPLICATION now takes a TIMELINE parameter, to specify exactly which timeline to stream WAL from. This allows the standby to request the primary to send over WAL that precedes the promotion. The replication protocol is changed slightly (in a backwards-compatible way although there's little hope of streaming replication working across major versions anyway), to allow replication to stop when the end of timeline reached, putting the walsender back into accepting a replication command. Many thanks to Amit Kapila for testing and reviewing various versions of this patch.
2012-12-13 18:00:00 +01:00
CHECK_FOR_INTERRUPTS();
if (RecoveryInProgress() != backup_started_in_recovery)
ereport(ERROR,
Allow a streaming replication standby to follow a timeline switch. Before this patch, streaming replication would refuse to start replicating if the timeline in the primary doesn't exactly match the standby. The situation where it doesn't match is when you have a master, and two standbys, and you promote one of the standbys to become new master. Promoting bumps up the timeline ID, and after that bump, the other standby would refuse to continue. There's significantly more timeline related logic in streaming replication now. First of all, when a standby connects to primary, it will ask the primary for any timeline history files that are missing from the standby. The missing files are sent using a new replication command TIMELINE_HISTORY, and stored in standby's pg_xlog directory. Using the timeline history files, the standby can follow the latest timeline present in the primary (recovery_target_timeline='latest'), just as it can follow new timelines appearing in an archive directory. START_REPLICATION now takes a TIMELINE parameter, to specify exactly which timeline to stream WAL from. This allows the standby to request the primary to send over WAL that precedes the promotion. The replication protocol is changed slightly (in a backwards-compatible way although there's little hope of streaming replication working across major versions anyway), to allow replication to stop when the end of timeline reached, putting the walsender back into accepting a replication command. Many thanks to Amit Kapila for testing and reviewing various versions of this patch.
2012-12-13 18:00:00 +01:00
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("the standby was promoted during online backup"),
errhint("This means that the backup being taken is corrupt "
"and should not be used. "
"Try taking another online backup.")));
/* Scan for files that should be excluded */
excludeFound = false;
for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
{
if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
{
elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
excludeFound = true;
break;
}
}
if (excludeFound)
continue;
snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
/* Skip pg_control here to back up it last */
if (strcmp(pathbuf, "./global/pg_control") == 0)
continue;
if (lstat(pathbuf, &statbuf) != 0)
{
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file or directory \"%s\": %m",
pathbuf)));
/* If the file went away while scanning, it's not an error. */
continue;
}
/* Scan for directories whose contents should be excluded */
excludeFound = false;
for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
{
if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
{
elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
excludeFound = true;
break;
}
}
if (excludeFound)
continue;
/*
* Exclude contents of directory specified by statrelpath if not set
* to the default (pg_stat_tmp) which is caught in the loop above.
*/
if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
{
elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
continue;
}
/*
* We can skip pg_wal, the WAL segments need to be fetched from the
* WAL archive anyway. But include it as an empty directory anyway, so
* we get permissions right.
*/
if (strcmp(pathbuf, "./pg_wal") == 0)
{
/* If pg_wal is a symlink, write it as a directory anyway */
size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
/*
* Also send archive_status directory (by hackishly reusing
* statbuf from above ...).
*/
size += _tarWriteHeader("./pg_wal/archive_status", NULL, &statbuf,
sizeonly);
continue; /* don't recurse into pg_wal */
}
/* Allow symbolic links in pg_tblspc only */
if (strcmp(path, "./pg_tblspc") == 0 &&
#ifndef WIN32
S_ISLNK(statbuf.st_mode)
#else
pgwin32_is_junction(pathbuf)
#endif
)
{
#if defined(HAVE_READLINK) || defined(WIN32)
char linkpath[MAXPGPATH];
int rllen;
rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
if (rllen < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read symbolic link \"%s\": %m",
pathbuf)));
if (rllen >= sizeof(linkpath))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("symbolic link \"%s\" target is too long",
pathbuf)));
linkpath[rllen] = '\0';
size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath,
&statbuf, sizeonly);
#else
/*
* If the platform does not have symbolic links, it should not be
* possible to have tablespaces - clearly somebody else created
* them. Warn about it and ignore.
*/
ereport(WARNING,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("tablespaces are not supported on this platform")));
continue;
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#endif /* HAVE_READLINK */
}
else if (S_ISDIR(statbuf.st_mode))
{
bool skip_this_dir = false;
ListCell *lc;
/*
* Store a directory entry in the tar file so we can get the
* permissions right.
*/
size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf,
sizeonly);
/*
* Call ourselves recursively for a directory, unless it happens
* to be a separate tablespace located within PGDATA.
*/
foreach(lc, tablespaces)
{
tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
/*
* ti->rpath is the tablespace relative path within PGDATA, or
* NULL if the tablespace has been properly located somewhere
* else.
*
* Skip past the leading "./" in pathbuf when comparing.
*/
if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
{
skip_this_dir = true;
break;
}
}
/*
* skip sending directories inside pg_tblspc, if not required.
*/
if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
skip_this_dir = true;
if (!skip_this_dir)
size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
}
else if (S_ISREG(statbuf.st_mode))
{
bool sent = false;
if (!sizeonly)
sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
true);
if (sent || sizeonly)
{
/* Add size, rounded up to 512byte block */
size += ((statbuf.st_size + 511) & ~511);
size += 512; /* Size of the header of the file */
}
}
else
ereport(WARNING,
(errmsg("skipping special file \"%s\"", pathbuf)));
}
FreeDir(dir);
return size;
}
/*****
* Functions for handling tar file format
*
* Copied from pg_dump, but modified to work with libpq for sending
*/
/*
* Given the member, write the TAR header & send the file.
*
* If 'missing_ok' is true, will not throw an error if the file is not found.
*
* Returns true if the file was successfully sent, false if 'missing_ok',
* and the file did not exist.
*/
static bool
2017-06-21 20:39:04 +02:00
sendFile(char *readfilename, char *tarfilename, struct stat *statbuf,
bool missing_ok)
{
FILE *fp;
char buf[TAR_SEND_SIZE];
size_t cnt;
pgoff_t len = 0;
size_t pad;
fp = AllocateFile(readfilename, "rb");
if (fp == NULL)
{
if (errno == ENOENT && missing_ok)
return false;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", readfilename)));
}
_tarWriteHeader(tarfilename, NULL, statbuf, false);
while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
{
/* Send the chunk as a CopyData message */
if (pq_putmessage('d', buf, cnt))
ereport(ERROR,
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
throttle(cnt);
if (len >= statbuf->st_size)
{
/*
* Reached end of file. The file could be longer, if it was
* extended while we were sending it, but for a base backup we can
* ignore such extended data. It will be restored from WAL.
*/
break;
}
}
/* If the file was truncated while we were sending it, pad it with zeros */
if (len < statbuf->st_size)
{
MemSet(buf, 0, sizeof(buf));
while (len < statbuf->st_size)
{
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
throttle(cnt);
}
}
/*
* Pad to 512 byte boundary, per tar format requirements. (This small
* piece of data is probably not worth throttling.)
*/
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
MemSet(buf, 0, pad);
pq_putmessage('d', buf, pad);
}
FreeFile(fp);
return true;
}
static int64
_tarWriteHeader(const char *filename, const char *linktarget,
2017-06-21 20:39:04 +02:00
struct stat *statbuf, bool sizeonly)
{
char h[512];
enum tarError rc;
if (!sizeonly)
{
rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
statbuf->st_mtime);
switch (rc)
{
case TAR_OK:
break;
case TAR_NAME_TOO_LONG:
ereport(ERROR,
(errmsg("file name too long for tar format: \"%s\"",
filename)));
break;
case TAR_SYMLINK_TOO_LONG:
ereport(ERROR,
(errmsg("symbolic link target too long for tar format: "
"file name \"%s\", target \"%s\"",
filename, linktarget)));
break;
default:
elog(ERROR, "unrecognized tar error: %d", rc);
}
pq_putmessage('d', h, sizeof(h));
}
return sizeof(h);
}
/*
* Write tar header for a directory. If the entry in statbuf is a link then
* write it as a directory anyway.
*/
static int64
2017-06-21 20:39:04 +02:00
_tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
bool sizeonly)
{
/* If symlink, write it as a directory anyway */
#ifndef WIN32
if (S_ISLNK(statbuf->st_mode))
#else
if (pgwin32_is_junction(pathbuf))
#endif
statbuf->st_mode = S_IFDIR | S_IRWXU;
return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly);
}
/*
* Increment the network transfer counter by the given number of bytes,
* and sleep if necessary to comply with the requested network transfer
* rate.
*/
static void
throttle(size_t increment)
{
TimeOffset elapsed_min;
if (throttling_counter < 0)
return;
throttling_counter += increment;
if (throttling_counter < throttling_sample)
return;
/* How much time should have elapsed at minimum? */
elapsed_min = elapsed_min_unit *
(throttling_counter / throttling_sample);
/*
* Since the latch could be set repeatedly because of concurrently WAL
* activity, sleep in a loop to ensure enough time has passed.
*/
for (;;)
{
TimeOffset elapsed,
sleep;
int wait_result;
/* Time elapsed since the last measurement (and possible wake up). */
elapsed = GetCurrentTimestamp() - throttled_last;
/* sleep if the transfer is faster than it should be */
sleep = elapsed_min - elapsed;
if (sleep <= 0)
break;
ResetLatch(MyLatch);
/* We're eating a potentially set latch, so check for interrupts */
CHECK_FOR_INTERRUPTS();
/*
* (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
* the maximum time to sleep. Thus the cast to long is safe.
*/
wait_result = WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
(long) (sleep / 1000),
WAIT_EVENT_BASE_BACKUP_THROTTLE);
if (wait_result & WL_LATCH_SET)
CHECK_FOR_INTERRUPTS();
/* Done waiting? */
if (wait_result & WL_TIMEOUT)
break;
}
/*
* As we work with integers, only whole multiple of throttling_sample was
* processed. The rest will be done during the next call of this function.
*/
throttling_counter %= throttling_sample;
/*
* Time interval for the remaining amount and possible next increments
* starts now.
*/
throttled_last = GetCurrentTimestamp();
}