Add -c/--restore-target-wal to pg_rewind

pg_rewind needs to copy from the source cluster to the target cluster a
set of relation blocks changed from the previous checkpoint where WAL
forked up to the end of WAL on the target.  Building this list of
relation blocks requires a range of WAL segments that may not be present
anymore on the target's pg_wal, causing pg_rewind to fail.  It is
possible to work around this issue by copying manually the WAL segments
needed but this may lead to some extra and actually useless work.

This commit introduces a new option allowing pg_rewind to use a
restore_command while doing the rewind by grabbing the parameter value
of restore_command from the target cluster configuration.  This allows
the rewind operation to be more reliable, so as only the WAL segments
needed by the rewind are restored from the archives.

In order to be able to do that, a new routine is added to src/common/ to
allow frontend tools to restore files from archives using an
already-built restore command.  This version is more simple than the
backend equivalent as there is no need to handle the non-recovery case.

Author: Alexey Kondratov
Reviewed-by: Andrey Borodin, Andres Freund, Alvaro Herrera, Alexander
Korotkov, Michael Paquier
Discussion: https://postgr.es/m/a3acff50-5a0d-9a2c-b3b2-ee36168955c1@postgrespro.ru
This commit is contained in:
Michael Paquier 2020-04-01 10:57:03 +09:00
parent 92d31085e9
commit a7e8ece41c
12 changed files with 360 additions and 27 deletions

View File

@ -66,11 +66,11 @@ PostgreSQL documentation
can be found either on the target timeline, the source timeline, or their common
ancestor. In the typical failover scenario where the target cluster was
shut down soon after the divergence, this is not a problem, but if the
target cluster ran for a long time after the divergence, the old WAL
files might no longer be present. In that case, they can be manually
copied from the WAL archive to the <filename>pg_wal</filename> directory, or
fetched on startup by configuring <xref linkend="guc-primary-conninfo"/> or
<xref linkend="guc-restore-command"/>. The use of
target cluster ran for a long time after the divergence, its old WAL
files might no longer be present. In this case, you can manually copy them
from the WAL archive to the <filename>pg_wal</filename> directory, or run
<application>pg_rewind</application> with the <literal>-c</literal> option to
automatically retrieve them from the WAL archive. The use of
<application>pg_rewind</application> is not limited to failover, e.g. a standby
server can be promoted, run some write transactions, and then rewinded
to become a standby again.
@ -232,6 +232,19 @@ PostgreSQL documentation
</listitem>
</varlistentry>
<varlistentry>
<term><option>-c</option></term>
<term><option>--restore-target-wal</option></term>
<listitem>
<para>
Use <varname>restore_command</varname> defined in the target cluster
configuration to retrieve WAL files from the WAL archive if these
files are no longer available in the <filename>pg_wal</filename>
directory.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--debug</option></term>
<listitem>
@ -318,7 +331,10 @@ GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, b
history forked off from the target cluster. For each WAL record,
record each data block that was touched. This yields a list of all
the data blocks that were changed in the target cluster, after the
source cluster forked off.
source cluster forked off. If some of the WAL files are no longer
available, try re-running <application>pg_rewind</application> with
the <option>-c</option> option to search for the missing files in
the WAL archive.
</para>
</step>
<step>

View File

@ -19,6 +19,7 @@
#include "catalog/pg_control.h"
#include "catalog/storage_xlog.h"
#include "commands/dbcommands_xlog.h"
#include "common/fe_archive.h"
#include "filemap.h"
#include "pg_rewind.h"
@ -41,6 +42,7 @@ static char xlogfpath[MAXPGPATH];
typedef struct XLogPageReadPrivate
{
const char *restoreCommand;
int tliIndex;
} XLogPageReadPrivate;
@ -55,7 +57,7 @@ static int SimpleXLogPageRead(XLogReaderState *xlogreader,
*/
void
extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
XLogRecPtr endpoint)
XLogRecPtr endpoint, const char *restoreCommand)
{
XLogRecord *record;
XLogReaderState *xlogreader;
@ -63,6 +65,7 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
XLogPageReadPrivate private;
private.tliIndex = tliIndex;
private.restoreCommand = restoreCommand;
xlogreader = XLogReaderAllocate(WalSegSz, datadir, &SimpleXLogPageRead,
&private);
if (xlogreader == NULL)
@ -146,7 +149,7 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex)
void
findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
XLogRecPtr *lastchkptredo)
XLogRecPtr *lastchkptredo, const char *restoreCommand)
{
/* Walk backwards, starting from the given record */
XLogRecord *record;
@ -170,6 +173,7 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
}
private.tliIndex = tliIndex;
private.restoreCommand = restoreCommand;
xlogreader = XLogReaderAllocate(WalSegSz, datadir, &SimpleXLogPageRead,
&private);
if (xlogreader == NULL)
@ -281,8 +285,29 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
if (xlogreadfd < 0)
{
pg_log_error("could not open file \"%s\": %m", xlogfpath);
return -1;
/*
* If we have no restore_command to execute, then exit.
*/
if (private->restoreCommand == NULL)
{
pg_log_error("could not open file \"%s\": %m", xlogfpath);
return -1;
}
/*
* Since we have restore_command, then try to retrieve missing WAL
* file from the archive.
*/
xlogreadfd = RestoreArchivedFile(xlogreader->segcxt.ws_dir,
xlogfname,
WalSegSz,
private->restoreCommand);
if (xlogreadfd < 0)
return -1;
else
pg_log_debug("using file \"%s\" restored from archive",
xlogfpath);
}
}

View File

@ -22,6 +22,7 @@
#include "common/file_perm.h"
#include "common/file_utils.h"
#include "common/restricted_token.h"
#include "common/string.h"
#include "fe_utils/recovery_gen.h"
#include "fetch.h"
#include "file_ops.h"
@ -38,6 +39,7 @@ static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
static void digestControlFile(ControlFileData *ControlFile, char *source,
size_t size);
static void syncTargetDirectory(void);
static void getRestoreCommand(const char *argv0);
static void sanityChecks(void);
static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
static void ensureCleanShutdown(const char *argv0);
@ -53,11 +55,13 @@ int WalSegSz;
char *datadir_target = NULL;
char *datadir_source = NULL;
char *connstr_source = NULL;
char *restore_command = NULL;
static bool debug = false;
bool showprogress = false;
bool dry_run = false;
bool do_sync = true;
bool restore_wal = false;
/* Target history */
TimeLineHistoryEntry *targetHistory;
@ -74,6 +78,8 @@ usage(const char *progname)
printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
printf(_("Options:\n"));
printf(_(" -c, --restore-target-wal use restore_command in target config\n"
" to retrieve WAL files from archives\n"));
printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
@ -103,6 +109,7 @@ main(int argc, char **argv)
{"source-server", required_argument, NULL, 2},
{"no-ensure-shutdown", no_argument, NULL, 4},
{"version", no_argument, NULL, 'V'},
{"restore-target-wal", no_argument, NULL, 'c'},
{"dry-run", no_argument, NULL, 'n'},
{"no-sync", no_argument, NULL, 'N'},
{"progress", no_argument, NULL, 'P'},
@ -144,7 +151,7 @@ main(int argc, char **argv)
}
}
while ((c = getopt_long(argc, argv, "D:nNPR", long_options, &option_index)) != -1)
while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
{
switch (c)
{
@ -152,6 +159,10 @@ main(int argc, char **argv)
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
case 'c':
restore_wal = true;
break;
case 'P':
showprogress = true;
break;
@ -255,6 +266,8 @@ main(int argc, char **argv)
umask(pg_mode_mask);
getRestoreCommand(argv[0]);
atexit(disconnect_atexit);
/* Connect to remote server */
@ -350,9 +363,8 @@ main(int argc, char **argv)
exit(0);
}
findLastCheckpoint(datadir_target, divergerec,
lastcommontliIndex,
&chkptrec, &chkpttli, &chkptredo);
findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
&chkptrec, &chkpttli, &chkptredo, restore_command);
pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
(uint32) (chkptrec >> 32), (uint32) chkptrec,
chkpttli);
@ -378,7 +390,7 @@ main(int argc, char **argv)
if (showprogress)
pg_log_info("reading WAL in target");
extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
ControlFile_target.checkPoint);
ControlFile_target.checkPoint, restore_command);
filemap_finalize();
if (showprogress)
@ -804,6 +816,71 @@ syncTargetDirectory(void)
fsync_pgdata(datadir_target, PG_VERSION_NUM);
}
/*
* Get value of GUC parameter restore_command from the target cluster.
*
* This uses a logic based on "postgres -C" to get the value from the
* cluster.
*/
static void
getRestoreCommand(const char *argv0)
{
int rc;
char postgres_exec_path[MAXPGPATH],
postgres_cmd[MAXPGPATH],
cmd_output[MAXPGPATH];
if (!restore_wal)
return;
/* find postgres executable */
rc = find_other_exec(argv0, "postgres",
PG_BACKEND_VERSIONSTR,
postgres_exec_path);
if (rc < 0)
{
char full_path[MAXPGPATH];
if (find_my_exec(argv0, full_path) < 0)
strlcpy(full_path, progname, sizeof(full_path));
if (rc == -1)
pg_log_error("The program \"postgres\" is needed by %s but was not found in the\n"
"same directory as \"%s\".\n"
"Check your installation.",
progname, full_path);
else
pg_log_error("The program \"postgres\" was found by \"%s\"\n"
"but was not the same version as %s.\n"
"Check your installation.",
full_path, progname);
exit(1);
}
/*
* Build a command able to retrieve the value of GUC parameter
* restore_command, if set.
*/
snprintf(postgres_cmd, sizeof(postgres_cmd),
"\"%s\" -D \"%s\" -C restore_command",
postgres_exec_path, datadir_target);
if (!pipe_read_line(postgres_cmd, cmd_output, sizeof(cmd_output)))
exit(1);
(void) pg_strip_crlf(cmd_output);
if (strcmp(cmd_output, "") == 0)
pg_fatal("restore_command is not set on the target cluster");
restore_command = pg_strdup(cmd_output);
pg_log_debug("using for rewind restore_command = \'%s\'",
restore_command);
}
/*
* Ensure clean shutdown of target instance by launching single-user mode
* postgres to do crash recovery.

View File

@ -42,11 +42,13 @@ extern uint64 fetch_done;
/* in parsexlog.c */
extern void extractPageMap(const char *datadir, XLogRecPtr startpoint,
int tliIndex, XLogRecPtr endpoint);
int tliIndex, XLogRecPtr endpoint,
const char *restoreCommand);
extern void findLastCheckpoint(const char *datadir, XLogRecPtr searchptr,
int tliIndex,
XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
XLogRecPtr *lastchkptredo);
XLogRecPtr *lastchkptredo,
const char *restoreCommand);
extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr,
int tliIndex);

View File

@ -1,7 +1,7 @@
use strict;
use warnings;
use TestLib;
use Test::More tests => 15;
use Test::More tests => 20;
use FindBin;
use lib $FindBin::RealBin;
@ -171,5 +171,6 @@ in master, before promotion
# Run the test in both modes
run_test('local');
run_test('remote');
run_test('archive');
exit(0);

View File

@ -38,6 +38,7 @@ use File::Copy;
use File::Path qw(rmtree);
use IPC::Run qw(run);
use PostgresNode;
use RecursiveCopy;
use TestLib;
use Test::More;
@ -227,10 +228,26 @@ sub run_pg_rewind
# Append the rewind-specific role to the connection string.
$standby_connstr = "$standby_connstr user=rewind_user";
# Stop the master and be ready to perform the rewind. The cluster
# needs recovery to finish once, and pg_rewind makes sure that it
# happens automatically.
$node_master->stop('immediate');
if ($test_mode eq 'archive')
{
# pg_rewind is tested with --restore-target-wal by moving all
# WAL files to a secondary location. Note that this leads to
# a failure in ensureCleanShutdown(), forcing to the use of
# --no-ensure-shutdown in this mode as the initial set of WAL
# files needed to ensure a clean restart is gone. This could
# be improved by keeping around only a minimum set of WAL
# segments but that would just make the test more costly,
# without improving the coverage. Hence, instead, stop
# gracefully the primary here.
$node_master->stop;
}
else
{
# Stop the master and be ready to perform the rewind. The cluster
# needs recovery to finish once, and pg_rewind makes sure that it
# happens automatically.
$node_master->stop('immediate');
}
# At this point, the rewind processing is ready to run.
# We now have a very simple scenario with a few diverged WAL record.
@ -284,6 +301,51 @@ sub run_pg_rewind
$node_standby->safe_psql('postgres',
"ALTER ROLE rewind_user WITH REPLICATION;");
}
elsif ($test_mode eq "archive")
{
# Do rewind using a local pgdata as source and specified
# directory with target WAL archive. The old master has
# to be stopped at this point.
# Remove the existing archive directory and move all WAL
# segments from the old master to the archives. These
# will be used by pg_rewind.
rmtree($node_master->archive_dir);
RecursiveCopy::copypath($node_master->data_dir . "/pg_wal",
$node_master->archive_dir);
# Fast way to remove entire directory content
rmtree($node_master->data_dir . "/pg_wal");
mkdir($node_master->data_dir . "/pg_wal");
# Make sure that directories have the right umask as this is
# required by a follow-up check on permissions, and better
# safe than sorry.
chmod(0700, $node_master->archive_dir);
chmod(0700, $node_master->data_dir . "/pg_wal");
# Add appropriate restore_command to the target cluster
$node_master->enable_restoring($node_master, 0);
# Stop the new master and be ready to perform the rewind.
$node_standby->stop;
# Note the use of --no-ensure-shutdown here. WAL files are
# gone in this mode and the primary has been stopped
# gracefully already.
command_ok(
[
'pg_rewind',
"--debug",
"--source-pgdata=$standby_pgdata",
"--target-pgdata=$master_pgdata",
"--no-sync",
"--no-ensure-shutdown",
"--restore-target-wal"
],
'pg_rewind archive');
}
else
{

View File

@ -88,6 +88,7 @@ endif
# (Mkvcbuild.pm has a copy of this list, too)
OBJS_FRONTEND = \
$(OBJS_COMMON) \
fe_archive.o \
fe_memutils.o \
file_utils.o \
logging.o \

View File

@ -51,7 +51,6 @@
static int validate_exec(const char *path);
static int resolve_symlinks(char *path);
static char *pipe_read_line(char *cmd, char *line, int maxsize);
#ifdef WIN32
static BOOL GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser);
@ -356,7 +355,7 @@ find_other_exec(const char *argv0, const char *target,
/*
* Execute a command in a pipe and read the first line from it.
*/
static char *
char *
pipe_read_line(char *cmd, char *line, int maxsize)
{
FILE *pgver;

128
src/common/fe_archive.c Normal file
View File

@ -0,0 +1,128 @@
/*-------------------------------------------------------------------------
*
* fe_archive.c
* Routines to access WAL archives from frontend
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/common/fe_archive.c
*
*-------------------------------------------------------------------------
*/
#ifndef FRONTEND
#error "This file is not expected to be compiled for backend code"
#endif
#include "postgres_fe.h"
#include <unistd.h>
#include <sys/stat.h>
#include "access/xlog_internal.h"
#include "common/archive.h"
#include "common/fe_archive.h"
#include "common/logging.h"
/*
* RestoreArchivedFile
*
* Attempt to retrieve the specified file from off-line archival storage.
* If successful, return a file descriptor of the restored file, else
* return -1.
*
* For fixed-size files, the caller may pass the expected size as an
* additional crosscheck on successful recovery. If the file size is not
* known, set expectedSize = 0.
*/
int
RestoreArchivedFile(const char *path, const char *xlogfname,
off_t expectedSize, const char *restoreCommand)
{
char xlogpath[MAXPGPATH];
char *xlogRestoreCmd;
int rc;
struct stat stat_buf;
snprintf(xlogpath, MAXPGPATH, "%s/" XLOGDIR "/%s", path, xlogfname);
xlogRestoreCmd = BuildRestoreCommand(restoreCommand, xlogpath,
xlogfname, NULL);
if (xlogRestoreCmd == NULL)
{
pg_log_fatal("could not use restore_command with %%r alias");
exit(1);
}
/*
* Execute restore_command, which should copy the missing file from
* archival storage.
*/
rc = system(xlogRestoreCmd);
pfree(xlogRestoreCmd);
if (rc == 0)
{
/*
* Command apparently succeeded, but let's make sure the file is
* really there now and has the correct size.
*/
if (stat(xlogpath, &stat_buf) == 0)
{
if (expectedSize > 0 && stat_buf.st_size != expectedSize)
{
pg_log_fatal("unexpected file size for \"%s\": %lu instead of %lu",
xlogfname, (unsigned long) stat_buf.st_size,
(unsigned long) expectedSize);
exit(1);
}
else
{
int xlogfd = open(xlogpath, O_RDONLY | PG_BINARY, 0);
if (xlogfd < 0)
{
pg_log_fatal("could not open file \"%s\" restored from archive: %m",
xlogpath);
exit(1);
}
else
return xlogfd;
}
}
else
{
if (errno != ENOENT)
{
pg_log_fatal("could not stat file \"%s\": %m",
xlogpath);
exit(1);
}
}
}
/*
* If the failure was due to a signal, then it would be misleading to
* return with a failure at restoring the file. So just bail out and
* exit. Hard shell errors such as "command not found" are treated as
* fatal too.
*/
if (wait_result_is_any_signal(rc, true))
{
pg_log_fatal("restore_command failed due to the signal: %s",
wait_result_to_str(rc));
exit(1);
}
/*
* The file is not available, so just let the caller decide what to do
* next.
*/
pg_log_error("could not restore file \"%s\" from archive",
xlogfname);
return -1;
}

View File

@ -0,0 +1,21 @@
/*-------------------------------------------------------------------------
*
* fe_archive.h
* Routines to access WAL archives from frontend
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/common/fe_archive.h
*
*-------------------------------------------------------------------------
*/
#ifndef FE_ARCHIVE_H
#define FE_ARCHIVE_H
extern int RestoreArchivedFile(const char *path,
const char *xlogfname,
off_t expectedSize,
const char *restoreCommand);
#endif /* FE_ARCHIVE_H */

View File

@ -102,10 +102,11 @@ extern void pgfnames_cleanup(char **filenames);
/* Portable locale initialization (in exec.c) */
extern void set_pglocale_pgservice(const char *argv0, const char *app);
/* Portable way to find binaries (in exec.c) */
/* Portable way to find and execute binaries (in exec.c) */
extern int find_my_exec(const char *argv0, char *retpath);
extern int find_other_exec(const char *argv0, const char *target,
const char *versionstr, char *retpath);
extern char *pipe_read_line(char *cmd, char *line, int maxsize);
/* Doesn't belong here, but this is used with find_other_exec(), so... */
#define PG_BACKEND_VERSIONSTR "postgres (PostgreSQL) " PG_VERSION "\n"

View File

@ -139,8 +139,8 @@ sub mkvcbuild
}
our @pgcommonfrontendfiles = (
@pgcommonallfiles, qw(fe_memutils.c file_utils.c
logging.c restricted_token.c));
@pgcommonallfiles, qw(fe_archive.c fe_memutils.c
file_utils.c logging.c restricted_token.c));
our @pgcommonbkndfiles = @pgcommonallfiles;