Exclude additional directories in pg_basebackup

The list of files and directories that pg_basebackup excludes from the
backup was somewhat incomplete and unorganized.  Change that with having
the exclusion driven from tables.  Clean up some code around it.  Also
document the exclusions in more detail so that users of pg_start_backup
can make use of it as well.

The contents of these directories are now excluded from the backup:
pg_dynshmem, pg_notify, pg_serial, pg_snapshots, pg_subtrans

Also fix a bug that a pg_repl_slot or pg_stat_tmp being a symlink would
cause a corrupt tar header to be created.  Now such symlinks are
included in the backup as empty directories.  Bug found by Ashutosh
Sharma <ashu.coek88@gmail.com>.

From: David Steele <david@pgmasters.net>
Reviewed-by: Michael Paquier <michael.paquier@gmail.com>
This commit is contained in:
Peter Eisentraut 2016-09-28 12:00:00 -04:00
parent b82d5a2c7c
commit 6ad8ac6026
5 changed files with 243 additions and 100 deletions

View File

@ -1089,6 +1089,22 @@ SELECT pg_stop_backup();
the new master comes on line.
</para>
<para>
The contents of the directories <filename>pg_dynshmem/</>,
<filename>pg_notify/</>, <filename>pg_serial/</>,
<filename>pg_snapshots/</>, <filename>pg_stat_tmp/</>,
and <filename>pg_subtrans/</> (but not the directories themselves) can be
omitted from the backup as they will be initialized on postmaster startup.
If <xref linkend="guc-stats-temp-directory"> is set and is under the data
directory then the contents of that directory can also be omitted.
</para>
<para>
Any file or directory beginning with <filename>pgsql_tmp</filename> can be
omitted from the backup. These files are removed on postmaster start and
the directories will be recreated as needed.
</para>
<para>
The backup label
file includes the label string you gave to <function>pg_start_backup</>,

View File

@ -2069,7 +2069,9 @@ The commands accepted in walsender mode are:
</listitem>
<listitem>
<para>
various temporary files created during the operation of the PostgreSQL server
Various temporary files and directories created during the operation
of the PostgreSQL server, such as any file or directory beginning
with <filename>pgsql_tmp</>.
</para>
</listitem>
<listitem>
@ -2082,13 +2084,18 @@ The commands accepted in walsender mode are:
</listitem>
<listitem>
<para>
<filename>pg_replslot</> is copied as an empty directory.
<filename>pg_dynshmem</>, <filename>pg_notify</>,
<filename>pg_replslot</>, <filename>pg_serial</>,
<filename>pg_snapshots</>, <filename>pg_stat_tmp</>, and
<filename>pg_subtrans</> are copied as empty directories (even if
they are symbolic links).
</para>
</listitem>
<listitem>
<para>
Files other than regular files and directories, such as symbolic
links and special device files, are skipped. (Symbolic links
links (other than for the directories listed above) and special
device files, are skipped. (Symbolic links
in <filename>pg_tblspc</filename> are maintained.)
</para>
</listitem>

View File

@ -610,10 +610,12 @@ PostgreSQL documentation
<para>
The backup will include all files in the data directory and tablespaces,
including the configuration files and any additional files placed in the
directory by third parties. But only regular files and directories are
copied. Symbolic links (other than those used for tablespaces) and special
device files are skipped. (See <xref linkend="protocol-replication"> for
the precise details.)
directory by third parties, except certain temporary files managed by
PostgreSQL. But only regular files and directories are copied, except that
symbolic links used for tablespaces are preserved. Symbolic links pointing
to certain directories known to PostgreSQL are copied as empty directories.
Other symbolic links and special device files are skipped.
See <xref linkend="protocol-replication"> for the precise details.
</para>
<para>

View File

@ -30,6 +30,7 @@
#include "replication/basebackup.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
#include "storage/dsm_impl.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "utils/builtins.h"
@ -55,8 +56,10 @@ static int64 sendDir(char *path, int basepathlen, bool sizeonly,
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
static void _tarWriteHeader(const char *filename, const char *linktarget,
struct stat * statbuf);
static int64 _tarWriteHeader(const char *filename, const char *linktarget,
struct stat * statbuf, bool sizeonly);
static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
bool sizeonly);
static void send_int8_string(StringInfoData *buf, int64 intval);
static void SendBackupHeader(List *tablespaces);
static void base_backup_cleanup(int code, Datum arg);
@ -94,6 +97,73 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
/*
* The contents of these directories are removed or recreated during server
* start so they are not included in backups. The directories themselves are
* kept and included as empty to preserve access permissions.
*/
static const char *excludeDirContents[] =
{
/*
* Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
* when stats_temp_directory is set because PGSS_TEXT_FILE is always created
* there.
*/
PG_STAT_TMP_DIR,
/*
* It is generally not useful to backup the contents of this directory even
* if the intention is to restore to another master. See backup.sgml for a
* more detailed description.
*/
"pg_replslot",
/* Contents removed on startup, see dsm_cleanup_for_mmap(). */
PG_DYNSHMEM_DIR,
/* Contents removed on startup, see AsyncShmemInit(). */
"pg_notify",
/*
* Old contents are loaded for possible debugging but are not required for
* normal operation, see OldSerXidInit().
*/
"pg_serial",
/* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
"pg_snapshots",
/* Contents zeroed on startup, see StartupSUBTRANS(). */
"pg_subtrans",
/* end of list */
NULL
};
/*
* List of files excluded from backups.
*/
static const char *excludeFiles[] =
{
/* Skip auto conf temporary file. */
PG_AUTOCONF_FILENAME ".tmp",
/*
* If there's a backup_label or tablespace_map file, it belongs to a
* backup started by the user with pg_start_backup(). It is *not* correct
* for this backup. Our backup_label/tablespace_map is injected into the
* tar separately.
*/
BACKUP_LABEL_FILE,
TABLESPACE_MAP,
"postmaster.pid",
"postmaster.opts",
/* end of list */
NULL
};
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@ -415,7 +485,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
}
/* send the WAL file itself */
_tarWriteHeader(pathbuf, NULL, &statbuf);
_tarWriteHeader(pathbuf, NULL, &statbuf, false);
while ((cnt = fread(buf, 1, Min(sizeof(buf), XLogSegSize - len), fp)) > 0)
{
@ -807,7 +877,7 @@ sendFileWithContent(const char *filename, const char *content)
statbuf.st_mode = S_IRUSR | S_IWUSR;
statbuf.st_size = len;
_tarWriteHeader(filename, NULL, &statbuf);
_tarWriteHeader(filename, NULL, &statbuf, false);
/* Send the contents as a CopyData message */
pq_putmessage('d', content, len);
@ -858,9 +928,9 @@ sendTablespace(char *path, bool sizeonly)
/* If the tablespace went away while scanning, it's no error. */
return 0;
}
if (!sizeonly)
_tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf);
size = 512; /* Size of the header just added */
size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
sizeonly);
/* Send all the files in the tablespace version directory */
size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
@ -893,6 +963,9 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
dir = AllocateDir(path);
while ((de = ReadDir(dir, path)) != NULL)
{
int excludeIdx;
bool excludeFound;
/* Skip special stuff */
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
continue;
@ -903,24 +976,6 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
strlen(PG_TEMP_FILE_PREFIX)) == 0)
continue;
/* skip auto conf temporary file */
if (strncmp(de->d_name,
PG_AUTOCONF_FILENAME ".tmp",
sizeof(PG_AUTOCONF_FILENAME) + 4) == 0)
continue;
/*
* If there's a backup_label or tablespace_map file, it belongs to a
* backup started by the user with pg_start_backup(). It is *not*
* correct for this backup, our backup_label/tablespace_map is
* injected into the tar separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
if (strcmp(de->d_name, TABLESPACE_MAP) == 0)
continue;
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@ -938,13 +993,23 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
"and should not be used. "
"Try taking another online backup.")));
snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name);
/* Scan for files that should be excluded */
excludeFound = false;
for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
{
if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
{
elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
excludeFound = true;
break;
}
}
/* Skip postmaster.pid and postmaster.opts in the data directory */
if (strcmp(pathbuf, "./postmaster.pid") == 0 ||
strcmp(pathbuf, "./postmaster.opts") == 0)
if (excludeFound)
continue;
snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name);
/* Skip pg_control here to back up it last */
if (strcmp(pathbuf, "./global/pg_control") == 0)
continue;
@ -957,33 +1022,34 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
errmsg("could not stat file or directory \"%s\": %m",
pathbuf)));
/* If the file went away while scanning, it's no error. */
/* If the file went away while scanning, it's not an error. */
continue;
}
/*
* Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
* even when stats_temp_directory is set because PGSS_TEXT_FILE is
* always created there.
*/
if ((statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0) ||
strncmp(de->d_name, PG_STAT_TMP_DIR, strlen(PG_STAT_TMP_DIR)) == 0)
/* Scan for directories whose contents should be excluded */
excludeFound = false;
for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
{
if (!sizeonly)
_tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
size += 512;
continue;
if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
{
elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
excludeFound = true;
break;
}
}
if (excludeFound)
continue;
/*
* Skip pg_replslot, not useful to copy. But include it as an empty
* directory anyway, so we get permissions right.
* Exclude contents of directory specified by statrelpath if not set
* to the default (pg_stat_tmp) which is caught in the loop above.
*/
if (strcmp(de->d_name, "pg_replslot") == 0)
if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
{
if (!sizeonly)
_tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
size += 512; /* Size of the header just added */
elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
continue;
}
@ -994,26 +1060,15 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
*/
if (strcmp(pathbuf, "./pg_xlog") == 0)
{
if (!sizeonly)
{
/* If pg_xlog is a symlink, write it as a directory anyway */
#ifndef WIN32
if (S_ISLNK(statbuf.st_mode))
#else
if (pgwin32_is_junction(pathbuf))
#endif
statbuf.st_mode = S_IFDIR | S_IRWXU;
_tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
}
size += 512; /* Size of the header just added */
/* If pg_xlog is a symlink, write it as a directory anyway */
size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
/*
* Also send archive_status directory (by hackishly reusing
* statbuf from above ...).
*/
if (!sizeonly)
_tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf);
size += 512; /* Size of the header just added */
size += _tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf,
sizeonly);
continue; /* don't recurse into pg_xlog */
}
@ -1044,9 +1099,8 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
pathbuf)));
linkpath[rllen] = '\0';
if (!sizeonly)
_tarWriteHeader(pathbuf + basepathlen + 1, linkpath, &statbuf);
size += 512; /* Size of the header just added */
size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath,
&statbuf, sizeonly);
#else
/*
@ -1069,9 +1123,8 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
* Store a directory entry in the tar file so we can get the
* permissions right.
*/
if (!sizeonly)
_tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
size += 512; /* Size of the header just added */
size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf,
sizeonly);
/*
* Call ourselves recursively for a directory, unless it happens
@ -1162,7 +1215,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
errmsg("could not open file \"%s\": %m", readfilename)));
}
_tarWriteHeader(tarfilename, NULL, statbuf);
_tarWriteHeader(tarfilename, NULL, statbuf, false);
while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
{
@ -1215,36 +1268,61 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
}
static void
static int64
_tarWriteHeader(const char *filename, const char *linktarget,
struct stat * statbuf)
struct stat * statbuf, bool sizeonly)
{
char h[512];
enum tarError rc;
rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
statbuf->st_mtime);
switch (rc)
if (!sizeonly)
{
case TAR_OK:
break;
case TAR_NAME_TOO_LONG:
ereport(ERROR,
(errmsg("file name too long for tar format: \"%s\"",
filename)));
break;
case TAR_SYMLINK_TOO_LONG:
ereport(ERROR,
(errmsg("symbolic link target too long for tar format: file name \"%s\", target \"%s\"",
filename, linktarget)));
break;
default:
elog(ERROR, "unrecognized tar error: %d", rc);
rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
statbuf->st_mtime);
switch (rc)
{
case TAR_OK:
break;
case TAR_NAME_TOO_LONG:
ereport(ERROR,
(errmsg("file name too long for tar format: \"%s\"",
filename)));
break;
case TAR_SYMLINK_TOO_LONG:
ereport(ERROR,
(errmsg("symbolic link target too long for tar format: "
"file name \"%s\", target \"%s\"",
filename, linktarget)));
break;
default:
elog(ERROR, "unrecognized tar error: %d", rc);
}
pq_putmessage('d', h, sizeof(h));
}
pq_putmessage('d', h, 512);
return sizeof(h);
}
/*
* Write tar header for a directory. If the entry in statbuf is a link then
* write it as a directory anyway.
*/
static int64
_tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
bool sizeonly)
{
/* If symlink, write it as a directory anyway */
#ifndef WIN32
if (S_ISLNK(statbuf->st_mode))
#else
if (pgwin32_is_junction(pathbuf))
#endif
statbuf->st_mode = S_IFDIR | S_IRWXU;
return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly);
}
/*

View File

@ -4,7 +4,7 @@ use Cwd;
use Config;
use PostgresNode;
use TestLib;
use Test::More tests => 54;
use Test::More tests => 67;
program_help_ok('pg_basebackup');
program_version_ok('pg_basebackup');
@ -55,15 +55,43 @@ print CONF "wal_level = replica\n";
close CONF;
$node->restart;
# Write some files to test that they are not copied.
foreach my $filename (qw(backup_label tablespace_map postgresql.auto.conf.tmp))
{
open FILE, ">>$pgdata/$filename";
print FILE "DONOTCOPY";
close FILE;
}
$node->command_ok([ 'pg_basebackup', '-D', "$tempdir/backup" ],
'pg_basebackup runs');
ok(-f "$tempdir/backup/PG_VERSION", 'backup was created');
# Only archive_status directory should be copied in pg_xlog/.
is_deeply(
[ sort(slurp_dir("$tempdir/backup/pg_xlog/")) ],
[ sort qw(. .. archive_status) ],
'no WAL files copied');
# Contents of these directories should not be copied.
foreach my $dirname (qw(pg_dynshmem pg_notify pg_replslot pg_serial pg_snapshots pg_stat_tmp pg_subtrans))
{
is_deeply(
[ sort(slurp_dir("$tempdir/backup/$dirname/")) ],
[ sort qw(. ..) ],
"contents of $dirname/ not copied");
}
# These files should not be copied.
foreach my $filename (qw(postgresql.auto.conf.tmp postmaster.opts postmaster.pid tablespace_map))
{
ok(! -f "$tempdir/backup/$filename", "$filename not copied");
}
# Make sure existing backup_label was ignored.
isnt(slurp_file("$tempdir/backup/backup_label"), 'DONOTCOPY',
'existing backup_label not copied');
$node->command_ok(
[ 'pg_basebackup', '-D', "$tempdir/backup2", '--xlogdir',
"$tempdir/xlog2" ],
@ -110,7 +138,17 @@ unlink "$pgdata/$superlongname";
# skip on Windows.
SKIP:
{
skip "symlinks not supported on Windows", 10 if ($windows_os);
skip "symlinks not supported on Windows", 11 if ($windows_os);
# Move pg_replslot out of $pgdata and create a symlink to it.
$node->stop;
rename("$pgdata/pg_replslot", "$tempdir/pg_replslot")
or BAIL_OUT "could not move $pgdata/pg_replslot";
symlink("$tempdir/pg_replslot", "$pgdata/pg_replslot")
or BAIL_OUT "could not symlink to $pgdata/pg_replslot";
$node->start;
# Create a temporary directory in the system location and symlink it
# to our physical temp location. That way we can use shorter names
@ -148,6 +186,8 @@ SKIP:
"tablespace symlink was updated");
closedir $dh;
ok(-d "$tempdir/backup1/pg_replslot", 'pg_replslot symlink copied as directory');
mkdir "$tempdir/tbl=spc2";
$node->safe_psql('postgres', "DROP TABLE test1;");
$node->safe_psql('postgres', "DROP TABLESPACE tblspc1;");