Exclude unlogged tables from base backups

Exclude unlogged tables from base backup entirely except init fork which marks
created unlogged table. The next question is do not backup temp table but
it's a story for separate patch.

Author: David Steele
Review by: Adam Brightwell, Masahiko Sawada
Discussion: https://www.postgresql.org/message-id/flat/04791bab-cb04-ba43-e9c0-664a4c1ffb2c@pgmasters.net
This commit is contained in:
Teodor Sigaev 2018-03-23 19:14:12 +03:00
parent 7ba7986fb4
commit 8694cc96b5
5 changed files with 113 additions and 5 deletions

View File

@ -2568,6 +2568,12 @@ The commands accepted in replication mode are:
with <filename>pgsql_tmp</filename>.
</para>
</listitem>
<listitem>
<para>
Unlogged relations, except for the init fork which is required to
recreate the (empty) unlogged relation on recovery.
</para>
</listitem>
<listitem>
<para>
<filename>pg_wal</filename>, including subdirectories. If the backup is run

View File

@ -26,6 +26,7 @@
#include "nodes/pg_list.h"
#include "pgtar.h"
#include "pgstat.h"
#include "port.h"
#include "postmaster/syslogger.h"
#include "replication/basebackup.h"
#include "replication/walsender.h"
@ -33,6 +34,7 @@
#include "storage/dsm_impl.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/reinit.h"
#include "utils/builtins.h"
#include "utils/ps_status.h"
#include "utils/relcache.h"
@ -958,12 +960,44 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
char pathbuf[MAXPGPATH * 2];
struct stat statbuf;
int64 size = 0;
const char *lastDir; /* Split last dir from parent path. */
bool isDbDir = false; /* Does this directory contain relations? */
/*
* Determine if the current path is a database directory that can
* contain relations.
*
* Start by finding the location of the delimiter between the parent
* path and the current path.
*/
lastDir = last_dir_separator(path);
/* Does this path look like a database path (i.e. all digits)? */
if (lastDir != NULL &&
strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
{
/* Part of path that contains the parent directory. */
int parentPathLen = lastDir - path;
/*
* Mark path as a database directory if the parent path is either
* $PGDATA/base or a tablespace version path.
*/
if (strncmp(path, "./base", parentPathLen) == 0 ||
(parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
TABLESPACE_VERSION_DIRECTORY,
sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
isDbDir = true;
}
dir = AllocateDir(path);
while ((de = ReadDir(dir, path)) != NULL)
{
int excludeIdx;
bool excludeFound;
ForkNumber relForkNum; /* Type of fork if file is a relation */
int relOidChars; /* Chars in filename that are the rel oid */
/* Skip special stuff */
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
@ -1007,6 +1041,36 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
if (excludeFound)
continue;
/* Exclude all forks for unlogged tables except the init fork */
if (isDbDir &&
parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
&relForkNum))
{
/* Never exclude init forks */
if (relForkNum != INIT_FORKNUM)
{
char initForkFile[MAXPGPATH];
char relOid[OIDCHARS + 1];
/*
* If any other type of fork, check if there is an init fork
* with the same OID. If so, the file can be excluded.
*/
strncpy(relOid, de->d_name, relOidChars);
snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
path, relOid);
if (lstat(initForkFile, &statbuf) == 0)
{
elog(DEBUG2,
"unlogged relation file \"%s\" excluded from backup",
de->d_name);
continue;
}
}
}
snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
/* Skip pg_control here to back up it last */

View File

@ -28,8 +28,6 @@ static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
int op);
static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
int op);
static bool parse_filename_for_nontemp_relation(const char *name,
int *oidchars, ForkNumber *fork);
typedef struct
{
@ -373,7 +371,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
* portion of the filename. This is critical to protect against a possible
* buffer overrun.
*/
static bool
bool
parse_filename_for_nontemp_relation(const char *name, int *oidchars,
ForkNumber *fork)
{

View File

@ -4,7 +4,7 @@ use Cwd;
use Config;
use PostgresNode;
use TestLib;
use Test::More tests => 79;
use Test::More tests => 87;
program_help_ok('pg_basebackup');
program_version_ok('pg_basebackup');
@ -66,6 +66,16 @@ foreach my $filename (
# positive.
$node->safe_psql('postgres', 'SELECT 1;');
# Create an unlogged table to test that forks other than init are not copied.
$node->safe_psql('postgres', 'CREATE UNLOGGED TABLE base_unlogged (id int)');
my $baseUnloggedPath = $node->safe_psql('postgres',
q{select pg_relation_filepath('base_unlogged')});
# Make sure main and init forks exist
ok(-f "$pgdata/${baseUnloggedPath}_init", 'unlogged init fork in base');
ok(-f "$pgdata/$baseUnloggedPath", 'unlogged main fork in base');
$node->command_ok([ 'pg_basebackup', '-D', "$tempdir/backup", '-X', 'none' ],
'pg_basebackup runs');
ok(-f "$tempdir/backup/PG_VERSION", 'backup was created');
@ -96,6 +106,12 @@ foreach my $filename (
ok(!-f "$tempdir/backup/$filename", "$filename not copied");
}
# Unlogged relation forks other than init should not be copied
ok(-f "$tempdir/backup/${baseUnloggedPath}_init",
'unlogged init fork in backup');
ok(!-f "$tempdir/backup/$baseUnloggedPath",
'unlogged main fork not in backup');
# Make sure existing backup_label was ignored.
isnt(slurp_file("$tempdir/backup/backup_label"),
'DONOTCOPY', 'existing backup_label not copied');
@ -147,7 +163,7 @@ unlink "$pgdata/$superlongname";
# skip on Windows.
SKIP:
{
skip "symlinks not supported on Windows", 11 if ($windows_os);
skip "symlinks not supported on Windows", 15 if ($windows_os);
# Move pg_replslot out of $pgdata and create a symlink to it.
$node->stop;
@ -177,6 +193,19 @@ SKIP:
my @tblspc_tars = glob "$tempdir/tarbackup2/[0-9]*.tar";
is(scalar(@tblspc_tars), 1, 'one tablespace tar was created');
# Create an unlogged table to test that forks other than init are not copied.
$node->safe_psql('postgres',
'CREATE UNLOGGED TABLE tblspc1_unlogged (id int) TABLESPACE tblspc1;');
my $tblspc1UnloggedPath = $node->safe_psql(
'postgres', q{select pg_relation_filepath('tblspc1_unlogged')});
# Make sure main and init forks exist
ok(-f "$pgdata/${tblspc1UnloggedPath}_init",
'unlogged init fork in tablespace');
ok(-f "$pgdata/$tblspc1UnloggedPath",
'unlogged main fork in tablespace');
$node->command_fails(
[ 'pg_basebackup', '-D', "$tempdir/backup1", '-Fp' ],
'plain format with tablespaces fails without tablespace mapping');
@ -195,11 +224,20 @@ SKIP:
"tablespace symlink was updated");
closedir $dh;
# Unlogged relation forks other than init should not be copied
my ($tblspc1UnloggedBackupPath) = $tblspc1UnloggedPath =~ /[^\/]*\/[^\/]*\/[^\/]*$/g;
ok(-f "$tempdir/tbackup/tblspc1/${tblspc1UnloggedBackupPath}_init",
'unlogged init fork in tablespace backup');
ok(!-f "$tempdir/tbackup/tblspc1/$tblspc1UnloggedBackupPath",
'unlogged main fork not in tablespace backup');
ok( -d "$tempdir/backup1/pg_replslot",
'pg_replslot symlink copied as directory');
mkdir "$tempdir/tbl=spc2";
$node->safe_psql('postgres', "DROP TABLE test1;");
$node->safe_psql('postgres', "DROP TABLE tblspc1_unlogged;");
$node->safe_psql('postgres', "DROP TABLESPACE tblspc1;");
$node->safe_psql('postgres',
"CREATE TABLESPACE tblspc2 LOCATION '$shorter_tempdir/tbl=spc2';");

View File

@ -16,6 +16,8 @@
#define REINIT_H
extern void ResetUnloggedRelations(int op);
extern bool parse_filename_for_nontemp_relation(
const char *name, int *oidchars, ForkNumber *fork);
#define UNLOGGED_RELATION_CLEANUP 0x0001
#define UNLOGGED_RELATION_INIT 0x0002