2012-10-02 12:37:19 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* xlogarchive.c
|
|
|
|
* Functions for archiving WAL files and restoring from the archive.
|
|
|
|
*
|
|
|
|
*
|
2019-01-02 18:44:25 +01:00
|
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
2012-10-02 12:37:19 +02:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* src/backend/access/transam/xlogarchive.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include <sys/stat.h>
|
2012-10-02 16:19:52 +02:00
|
|
|
#include <sys/wait.h>
|
2012-10-02 12:37:19 +02:00
|
|
|
#include <signal.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
2012-12-13 13:59:13 +01:00
|
|
|
#include "access/xlog.h"
|
2012-10-02 12:37:19 +02:00
|
|
|
#include "access/xlog_internal.h"
|
|
|
|
#include "miscadmin.h"
|
|
|
|
#include "postmaster/startup.h"
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
#include "replication/walsender.h"
|
2012-10-02 12:37:19 +02:00
|
|
|
#include "storage/fd.h"
|
|
|
|
#include "storage/ipc.h"
|
|
|
|
#include "storage/lwlock.h"
|
|
|
|
#include "storage/pmsignal.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Attempt to retrieve the specified file from off-line archival storage.
|
|
|
|
* If successful, fill "path" with its complete path (note that this will be
|
|
|
|
* a temp file name that doesn't follow the normal naming convention), and
|
2017-08-16 06:22:32 +02:00
|
|
|
* return true.
|
2012-10-02 12:37:19 +02:00
|
|
|
*
|
|
|
|
* If not successful, fill "path" with the name of the normal on-line file
|
|
|
|
* (which may or may not actually exist, but we'll try to use it), and return
|
2017-08-16 06:22:32 +02:00
|
|
|
* false.
|
2012-10-02 12:37:19 +02:00
|
|
|
*
|
|
|
|
* For fixed-size files, the caller may pass the expected size as an
|
|
|
|
* additional crosscheck on successful recovery. If the file size is not
|
|
|
|
* known, set expectedSize = 0.
|
2012-11-19 09:02:25 +01:00
|
|
|
*
|
|
|
|
* When 'cleanupEnabled' is false, refrain from deleting any old WAL segments
|
|
|
|
* in the archive. This is used when fetching the initial checkpoint record,
|
|
|
|
* when we are not yet sure how far back we need the WAL.
|
2012-10-02 12:37:19 +02:00
|
|
|
*/
|
|
|
|
bool
|
|
|
|
RestoreArchivedFile(char *path, const char *xlogfname,
|
2012-11-19 09:02:25 +01:00
|
|
|
const char *recovername, off_t expectedSize,
|
|
|
|
bool cleanupEnabled)
|
2012-10-02 12:37:19 +02:00
|
|
|
{
|
|
|
|
char xlogpath[MAXPGPATH];
|
|
|
|
char xlogRestoreCmd[MAXPGPATH];
|
|
|
|
char lastRestartPointFname[MAXPGPATH];
|
|
|
|
char *dp;
|
|
|
|
char *endp;
|
|
|
|
const char *sp;
|
|
|
|
int rc;
|
|
|
|
struct stat stat_buf;
|
|
|
|
XLogSegNo restartSegNo;
|
|
|
|
XLogRecPtr restartRedoPtr;
|
|
|
|
TimeLineID restartTli;
|
|
|
|
|
2019-10-11 08:47:59 +02:00
|
|
|
/*
|
|
|
|
* Ignore restore_command when not in archive recovery (meaning
|
|
|
|
* we are in crash recovery).
|
|
|
|
*/
|
|
|
|
if (!ArchiveRecoveryRequested)
|
|
|
|
goto not_available;
|
|
|
|
|
2012-10-02 12:37:19 +02:00
|
|
|
/* In standby mode, restore_command might not be supplied */
|
2018-11-25 16:31:16 +01:00
|
|
|
if (recoveryRestoreCommand == NULL || strcmp(recoveryRestoreCommand, "") == 0)
|
2012-10-02 12:37:19 +02:00
|
|
|
goto not_available;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When doing archive recovery, we always prefer an archived log file even
|
|
|
|
* if a file of the same name exists in XLOGDIR. The reason is that the
|
|
|
|
* file in XLOGDIR could be an old, un-filled or partly-filled version
|
|
|
|
* that was copied and restored as part of backing up $PGDATA.
|
|
|
|
*
|
|
|
|
* We could try to optimize this slightly by checking the local copy
|
|
|
|
* lastchange timestamp against the archived copy, but we have no API to
|
|
|
|
* do this, nor can we guarantee that the lastchange timestamp was
|
|
|
|
* preserved correctly when we copied to archive. Our aim is robustness,
|
|
|
|
* so we elect not to do this.
|
|
|
|
*
|
|
|
|
* If we cannot obtain the log file from the archive, however, we will try
|
|
|
|
* to use the XLOGDIR file if it exists. This is so that we can make use
|
|
|
|
* of log segments that weren't yet transferred to the archive.
|
|
|
|
*
|
|
|
|
* Notice that we don't actually overwrite any files when we copy back
|
2013-05-29 22:58:43 +02:00
|
|
|
* from archive because the restore_command may inadvertently restore
|
|
|
|
* inappropriate xlogs, or they may be corrupt, so we may wish to fallback
|
|
|
|
* to the segments remaining in current XLOGDIR later. The
|
2012-10-02 12:37:19 +02:00
|
|
|
* copy-from-archive filename is always the same, ensuring that we don't
|
|
|
|
* run out of disk space on long recoveries.
|
|
|
|
*/
|
|
|
|
snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make sure there is no existing file named recovername.
|
|
|
|
*/
|
|
|
|
if (stat(xlogpath, &stat_buf) != 0)
|
|
|
|
{
|
|
|
|
if (errno != ENOENT)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not stat file \"%s\": %m",
|
|
|
|
xlogpath)));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (unlink(xlogpath) != 0)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not remove file \"%s\": %m",
|
|
|
|
xlogpath)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Calculate the archive file cutoff point for use during log shipping
|
|
|
|
* replication. All files earlier than this point can be deleted from the
|
|
|
|
* archive, though there is no requirement to do so.
|
|
|
|
*
|
2012-11-19 09:02:25 +01:00
|
|
|
* If cleanup is not enabled, initialise this with the filename of
|
|
|
|
* InvalidXLogRecPtr, which will prevent the deletion of any WAL files
|
|
|
|
* from the archive because of the alphabetic sorting property of WAL
|
|
|
|
* filenames.
|
2012-10-02 12:37:19 +02:00
|
|
|
*
|
|
|
|
* Once we have successfully located the redo pointer of the checkpoint
|
|
|
|
* from which we start recovery we never request a file prior to the redo
|
|
|
|
* pointer of the last restartpoint. When redo begins we know that we have
|
|
|
|
* successfully located it, so there is no need for additional status
|
|
|
|
* flags to signify the point when we can begin deleting WAL files from
|
|
|
|
* the archive.
|
|
|
|
*/
|
2012-11-19 09:02:25 +01:00
|
|
|
if (cleanupEnabled)
|
2012-10-02 12:37:19 +02:00
|
|
|
{
|
2012-11-19 09:02:25 +01:00
|
|
|
GetOldestRestartPoint(&restartRedoPtr, &restartTli);
|
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB
can be useful. A larger segment size has two main benefits: Firstly,
in setups using archiving, it makes it easier to write scripts that
can keep up with higher amounts of WAL, secondly, the WAL has to be
written and synced to disk less frequently.
But at the same time large segment size are disadvantageous for
smaller databases. So far the segment size had to be configured at
compile time, often making it unrealistic to choose one fitting to a
particularly load. Therefore change it to a initdb time setting.
This includes a breaking changes to the xlogreader.h API, which now
requires the current segment size to be configured. For that and
similar reasons a number of binaries had to be taught how to recognize
the current segment size.
Author: Beena Emerson, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael
Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja
Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
2017-09-20 07:03:48 +02:00
|
|
|
XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
|
|
|
|
XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
|
|
|
|
wal_segment_size);
|
2012-10-02 12:37:19 +02:00
|
|
|
/* we shouldn't need anything earlier than last restart point */
|
|
|
|
Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
|
|
|
|
}
|
|
|
|
else
|
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB
can be useful. A larger segment size has two main benefits: Firstly,
in setups using archiving, it makes it easier to write scripts that
can keep up with higher amounts of WAL, secondly, the WAL has to be
written and synced to disk less frequently.
But at the same time large segment size are disadvantageous for
smaller databases. So far the segment size had to be configured at
compile time, often making it unrealistic to choose one fitting to a
particularly load. Therefore change it to a initdb time setting.
This includes a breaking changes to the xlogreader.h API, which now
requires the current segment size to be configured. For that and
similar reasons a number of binaries had to be taught how to recognize
the current segment size.
Author: Beena Emerson, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael
Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja
Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
2017-09-20 07:03:48 +02:00
|
|
|
XLogFileName(lastRestartPointFname, 0, 0L, wal_segment_size);
|
2012-10-02 12:37:19 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* construct the command to be executed
|
|
|
|
*/
|
|
|
|
dp = xlogRestoreCmd;
|
|
|
|
endp = xlogRestoreCmd + MAXPGPATH - 1;
|
|
|
|
*endp = '\0';
|
|
|
|
|
|
|
|
for (sp = recoveryRestoreCommand; *sp; sp++)
|
|
|
|
{
|
|
|
|
if (*sp == '%')
|
|
|
|
{
|
|
|
|
switch (sp[1])
|
|
|
|
{
|
|
|
|
case 'p':
|
|
|
|
/* %p: relative path of target file */
|
|
|
|
sp++;
|
|
|
|
StrNCpy(dp, xlogpath, endp - dp);
|
|
|
|
make_native_path(dp);
|
|
|
|
dp += strlen(dp);
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
/* %f: filename of desired file */
|
|
|
|
sp++;
|
|
|
|
StrNCpy(dp, xlogfname, endp - dp);
|
|
|
|
dp += strlen(dp);
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
/* %r: filename of last restartpoint */
|
|
|
|
sp++;
|
|
|
|
StrNCpy(dp, lastRestartPointFname, endp - dp);
|
|
|
|
dp += strlen(dp);
|
|
|
|
break;
|
|
|
|
case '%':
|
|
|
|
/* convert %% to a single % */
|
|
|
|
sp++;
|
|
|
|
if (dp < endp)
|
|
|
|
*dp++ = *sp;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* otherwise treat the % as not special */
|
|
|
|
if (dp < endp)
|
|
|
|
*dp++ = *sp;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (dp < endp)
|
|
|
|
*dp++ = *sp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*dp = '\0';
|
|
|
|
|
|
|
|
ereport(DEBUG3,
|
|
|
|
(errmsg_internal("executing restore command \"%s\"",
|
|
|
|
xlogRestoreCmd)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check signals before restore command and reset afterwards.
|
|
|
|
*/
|
|
|
|
PreRestoreCommand();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy xlog from archival storage to XLOGDIR
|
|
|
|
*/
|
|
|
|
rc = system(xlogRestoreCmd);
|
|
|
|
|
|
|
|
PostRestoreCommand();
|
|
|
|
|
|
|
|
if (rc == 0)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* command apparently succeeded, but let's make sure the file is
|
|
|
|
* really there now and has the correct size.
|
|
|
|
*/
|
|
|
|
if (stat(xlogpath, &stat_buf) == 0)
|
|
|
|
{
|
|
|
|
if (expectedSize > 0 && stat_buf.st_size != expectedSize)
|
|
|
|
{
|
|
|
|
int elevel;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we find a partial file in standby mode, we assume it's
|
|
|
|
* because it's just being copied to the archive, and keep
|
|
|
|
* trying.
|
|
|
|
*
|
|
|
|
* Otherwise treat a wrong-sized file as FATAL to ensure the
|
|
|
|
* DBA would notice it, but is that too strong? We could try
|
|
|
|
* to plow ahead with a local copy of the file ... but the
|
|
|
|
* problem is that there probably isn't one, and we'd
|
|
|
|
* incorrectly conclude we've reached the end of WAL and we're
|
|
|
|
* done recovering ...
|
|
|
|
*/
|
|
|
|
if (StandbyMode && stat_buf.st_size < expectedSize)
|
|
|
|
elevel = DEBUG1;
|
|
|
|
else
|
|
|
|
elevel = FATAL;
|
|
|
|
ereport(elevel,
|
|
|
|
(errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
|
|
|
|
xlogfname,
|
|
|
|
(unsigned long) stat_buf.st_size,
|
|
|
|
(unsigned long) expectedSize)));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("restored log file \"%s\" from archive",
|
|
|
|
xlogfname)));
|
|
|
|
strcpy(path, xlogpath);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* stat failed */
|
|
|
|
if (errno != ENOENT)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not stat file \"%s\": %m",
|
|
|
|
xlogpath)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remember, we rollforward UNTIL the restore fails so failure here is
|
|
|
|
* just part of the process... that makes it difficult to determine
|
|
|
|
* whether the restore failed because there isn't an archive to restore,
|
|
|
|
* or because the administrator has specified the restore program
|
|
|
|
* incorrectly. We have to assume the former.
|
|
|
|
*
|
|
|
|
* However, if the failure was due to any sort of signal, it's best to
|
|
|
|
* punt and abort recovery. (If we "return false" here, upper levels will
|
|
|
|
* assume that recovery is complete and start up the database!) It's
|
|
|
|
* essential to abort on child SIGINT and SIGQUIT, because per spec
|
|
|
|
* system() ignores SIGINT and SIGQUIT while waiting; if we see one of
|
|
|
|
* those it's a good bet we should have gotten it too.
|
|
|
|
*
|
|
|
|
* On SIGTERM, assume we have received a fast shutdown request, and exit
|
|
|
|
* cleanly. It's pure chance whether we receive the SIGTERM first, or the
|
|
|
|
* child process. If we receive it first, the signal handler will call
|
|
|
|
* proc_exit, otherwise we do it here. If we or the child process received
|
|
|
|
* SIGTERM for any other reason than a fast shutdown request, postmaster
|
|
|
|
* will perform an immediate shutdown when it sees us exiting
|
|
|
|
* unexpectedly.
|
|
|
|
*
|
Improve detection of child-process SIGPIPE failures.
Commit ffa4cbd62 added logic to detect SIGPIPE failure of a COPY child
process, but it only worked correctly if the SIGPIPE occurred in the
immediate child process. Depending on the shell in use and the
complexity of the shell command string, we might instead get back
an exit code of 128 + SIGPIPE, representing a shell error exit
reporting SIGPIPE in the child process.
We could just hack up ClosePipeToProgram() to add the extra case,
but it seems like this is a fairly general issue deserving a more
general and better-documented solution. I chose to add a couple
of functions in src/common/wait_error.c, which is a natural place
to know about wait-result encodings, that will test for either a
specific child-process signal type or any child-process signal failure.
Then, adjust other places that were doing ad-hoc tests of this type
to use the common functions.
In RestoreArchivedFile, this fixes a race condition affecting whether
the process will report an error or just silently proc_exit(1): before,
that depended on whether the intermediate shell got SIGTERM'd itself
or reported a child process failing on SIGTERM.
Like the previous patch, back-patch to v10; we could go further
but there seems no real need to.
Per report from Erik Rijkers.
Discussion: https://postgr.es/m/f3683f87ab1701bea5d86a7742b22432@xs4all.nl
2018-12-16 20:32:14 +01:00
|
|
|
* We treat hard shell errors such as "command not found" as fatal, too.
|
2012-10-02 12:37:19 +02:00
|
|
|
*/
|
Improve detection of child-process SIGPIPE failures.
Commit ffa4cbd62 added logic to detect SIGPIPE failure of a COPY child
process, but it only worked correctly if the SIGPIPE occurred in the
immediate child process. Depending on the shell in use and the
complexity of the shell command string, we might instead get back
an exit code of 128 + SIGPIPE, representing a shell error exit
reporting SIGPIPE in the child process.
We could just hack up ClosePipeToProgram() to add the extra case,
but it seems like this is a fairly general issue deserving a more
general and better-documented solution. I chose to add a couple
of functions in src/common/wait_error.c, which is a natural place
to know about wait-result encodings, that will test for either a
specific child-process signal type or any child-process signal failure.
Then, adjust other places that were doing ad-hoc tests of this type
to use the common functions.
In RestoreArchivedFile, this fixes a race condition affecting whether
the process will report an error or just silently proc_exit(1): before,
that depended on whether the intermediate shell got SIGTERM'd itself
or reported a child process failing on SIGTERM.
Like the previous patch, back-patch to v10; we could go further
but there seems no real need to.
Per report from Erik Rijkers.
Discussion: https://postgr.es/m/f3683f87ab1701bea5d86a7742b22432@xs4all.nl
2018-12-16 20:32:14 +01:00
|
|
|
if (wait_result_is_signal(rc, SIGTERM))
|
2012-10-02 12:37:19 +02:00
|
|
|
proc_exit(1);
|
|
|
|
|
Improve detection of child-process SIGPIPE failures.
Commit ffa4cbd62 added logic to detect SIGPIPE failure of a COPY child
process, but it only worked correctly if the SIGPIPE occurred in the
immediate child process. Depending on the shell in use and the
complexity of the shell command string, we might instead get back
an exit code of 128 + SIGPIPE, representing a shell error exit
reporting SIGPIPE in the child process.
We could just hack up ClosePipeToProgram() to add the extra case,
but it seems like this is a fairly general issue deserving a more
general and better-documented solution. I chose to add a couple
of functions in src/common/wait_error.c, which is a natural place
to know about wait-result encodings, that will test for either a
specific child-process signal type or any child-process signal failure.
Then, adjust other places that were doing ad-hoc tests of this type
to use the common functions.
In RestoreArchivedFile, this fixes a race condition affecting whether
the process will report an error or just silently proc_exit(1): before,
that depended on whether the intermediate shell got SIGTERM'd itself
or reported a child process failing on SIGTERM.
Like the previous patch, back-patch to v10; we could go further
but there seems no real need to.
Per report from Erik Rijkers.
Discussion: https://postgr.es/m/f3683f87ab1701bea5d86a7742b22432@xs4all.nl
2018-12-16 20:32:14 +01:00
|
|
|
ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
|
2014-05-06 18:12:18 +02:00
|
|
|
(errmsg("could not restore file \"%s\" from archive: %s",
|
|
|
|
xlogfname, wait_result_to_str(rc))));
|
2012-10-02 12:37:19 +02:00
|
|
|
|
|
|
|
not_available:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if an archived file is not available, there might still be a version of
|
|
|
|
* this file in XLOGDIR, so return that as the filename to open.
|
|
|
|
*
|
|
|
|
* In many recovery scenarios we expect this to fail also, but if so that
|
|
|
|
* just means we've reached the end of WAL.
|
|
|
|
*/
|
|
|
|
snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Attempt to execute an external shell command during recovery.
|
|
|
|
*
|
|
|
|
* 'command' is the shell command to be executed, 'commandName' is a
|
|
|
|
* human-readable name describing the command emitted in the logs. If
|
|
|
|
* 'failOnSignal' is true and the command is killed by a signal, a FATAL
|
|
|
|
* error is thrown. Otherwise a WARNING is emitted.
|
|
|
|
*
|
|
|
|
* This is currently used for recovery_end_command and archive_cleanup_command.
|
|
|
|
*/
|
|
|
|
void
|
2017-10-31 15:34:31 +01:00
|
|
|
ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal)
|
2012-10-02 12:37:19 +02:00
|
|
|
{
|
|
|
|
char xlogRecoveryCmd[MAXPGPATH];
|
|
|
|
char lastRestartPointFname[MAXPGPATH];
|
|
|
|
char *dp;
|
|
|
|
char *endp;
|
|
|
|
const char *sp;
|
|
|
|
int rc;
|
|
|
|
XLogSegNo restartSegNo;
|
|
|
|
XLogRecPtr restartRedoPtr;
|
|
|
|
TimeLineID restartTli;
|
|
|
|
|
|
|
|
Assert(command && commandName);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Calculate the archive file cutoff point for use during log shipping
|
|
|
|
* replication. All files earlier than this point can be deleted from the
|
|
|
|
* archive, though there is no requirement to do so.
|
|
|
|
*/
|
|
|
|
GetOldestRestartPoint(&restartRedoPtr, &restartTli);
|
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB
can be useful. A larger segment size has two main benefits: Firstly,
in setups using archiving, it makes it easier to write scripts that
can keep up with higher amounts of WAL, secondly, the WAL has to be
written and synced to disk less frequently.
But at the same time large segment size are disadvantageous for
smaller databases. So far the segment size had to be configured at
compile time, often making it unrealistic to choose one fitting to a
particularly load. Therefore change it to a initdb time setting.
This includes a breaking changes to the xlogreader.h API, which now
requires the current segment size to be configured. For that and
similar reasons a number of binaries had to be taught how to recognize
the current segment size.
Author: Beena Emerson, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael
Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja
Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
2017-09-20 07:03:48 +02:00
|
|
|
XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
|
|
|
|
XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
|
|
|
|
wal_segment_size);
|
2012-10-02 12:37:19 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* construct the command to be executed
|
|
|
|
*/
|
|
|
|
dp = xlogRecoveryCmd;
|
|
|
|
endp = xlogRecoveryCmd + MAXPGPATH - 1;
|
|
|
|
*endp = '\0';
|
|
|
|
|
|
|
|
for (sp = command; *sp; sp++)
|
|
|
|
{
|
|
|
|
if (*sp == '%')
|
|
|
|
{
|
|
|
|
switch (sp[1])
|
|
|
|
{
|
|
|
|
case 'r':
|
|
|
|
/* %r: filename of last restartpoint */
|
|
|
|
sp++;
|
|
|
|
StrNCpy(dp, lastRestartPointFname, endp - dp);
|
|
|
|
dp += strlen(dp);
|
|
|
|
break;
|
|
|
|
case '%':
|
|
|
|
/* convert %% to a single % */
|
|
|
|
sp++;
|
|
|
|
if (dp < endp)
|
|
|
|
*dp++ = *sp;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* otherwise treat the % as not special */
|
|
|
|
if (dp < endp)
|
|
|
|
*dp++ = *sp;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (dp < endp)
|
|
|
|
*dp++ = *sp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*dp = '\0';
|
|
|
|
|
|
|
|
ereport(DEBUG3,
|
|
|
|
(errmsg_internal("executing %s \"%s\"", commandName, command)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* execute the constructed command
|
|
|
|
*/
|
|
|
|
rc = system(xlogRecoveryCmd);
|
|
|
|
if (rc != 0)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If the failure was due to any sort of signal, it's best to punt and
|
Improve detection of child-process SIGPIPE failures.
Commit ffa4cbd62 added logic to detect SIGPIPE failure of a COPY child
process, but it only worked correctly if the SIGPIPE occurred in the
immediate child process. Depending on the shell in use and the
complexity of the shell command string, we might instead get back
an exit code of 128 + SIGPIPE, representing a shell error exit
reporting SIGPIPE in the child process.
We could just hack up ClosePipeToProgram() to add the extra case,
but it seems like this is a fairly general issue deserving a more
general and better-documented solution. I chose to add a couple
of functions in src/common/wait_error.c, which is a natural place
to know about wait-result encodings, that will test for either a
specific child-process signal type or any child-process signal failure.
Then, adjust other places that were doing ad-hoc tests of this type
to use the common functions.
In RestoreArchivedFile, this fixes a race condition affecting whether
the process will report an error or just silently proc_exit(1): before,
that depended on whether the intermediate shell got SIGTERM'd itself
or reported a child process failing on SIGTERM.
Like the previous patch, back-patch to v10; we could go further
but there seems no real need to.
Per report from Erik Rijkers.
Discussion: https://postgr.es/m/f3683f87ab1701bea5d86a7742b22432@xs4all.nl
2018-12-16 20:32:14 +01:00
|
|
|
* abort recovery. See comments in RestoreArchivedFile().
|
2012-10-02 12:37:19 +02:00
|
|
|
*/
|
Improve detection of child-process SIGPIPE failures.
Commit ffa4cbd62 added logic to detect SIGPIPE failure of a COPY child
process, but it only worked correctly if the SIGPIPE occurred in the
immediate child process. Depending on the shell in use and the
complexity of the shell command string, we might instead get back
an exit code of 128 + SIGPIPE, representing a shell error exit
reporting SIGPIPE in the child process.
We could just hack up ClosePipeToProgram() to add the extra case,
but it seems like this is a fairly general issue deserving a more
general and better-documented solution. I chose to add a couple
of functions in src/common/wait_error.c, which is a natural place
to know about wait-result encodings, that will test for either a
specific child-process signal type or any child-process signal failure.
Then, adjust other places that were doing ad-hoc tests of this type
to use the common functions.
In RestoreArchivedFile, this fixes a race condition affecting whether
the process will report an error or just silently proc_exit(1): before,
that depended on whether the intermediate shell got SIGTERM'd itself
or reported a child process failing on SIGTERM.
Like the previous patch, back-patch to v10; we could go further
but there seems no real need to.
Per report from Erik Rijkers.
Discussion: https://postgr.es/m/f3683f87ab1701bea5d86a7742b22432@xs4all.nl
2018-12-16 20:32:14 +01:00
|
|
|
ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
|
2012-10-02 12:37:19 +02:00
|
|
|
/*------
|
2018-11-25 16:31:16 +01:00
|
|
|
translator: First %s represents a postgresql.conf parameter name like
|
2013-11-13 12:38:18 +01:00
|
|
|
"recovery_end_command", the 2nd is the value of that parameter, the
|
|
|
|
third an already translated error message. */
|
|
|
|
(errmsg("%s \"%s\": %s", commandName,
|
|
|
|
command, wait_result_to_str(rc))));
|
2012-10-02 12:37:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
/*
|
|
|
|
* A file was restored from the archive under a temporary filename (path),
|
|
|
|
* and now we want to keep it. Rename it under the permanent filename in
|
2018-09-08 21:24:19 +02:00
|
|
|
* pg_wal (xlogfname), replacing any existing file with the same name.
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
*/
|
|
|
|
void
|
2017-10-31 15:34:31 +01:00
|
|
|
KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
{
|
|
|
|
char xlogfpath[MAXPGPATH];
|
|
|
|
bool reload = false;
|
|
|
|
struct stat statbuf;
|
|
|
|
|
|
|
|
snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
|
|
|
|
|
|
|
|
if (stat(xlogfpath, &statbuf) == 0)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
char oldpath[MAXPGPATH];
|
|
|
|
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
#ifdef WIN32
|
|
|
|
static unsigned int deletedcounter = 1;
|
2013-05-29 22:58:43 +02:00
|
|
|
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
/*
|
2013-05-29 22:58:43 +02:00
|
|
|
* On Windows, if another process (e.g a walsender process) holds the
|
|
|
|
* file open in FILE_SHARE_DELETE mode, unlink will succeed, but the
|
|
|
|
* file will still show up in directory listing until the last handle
|
|
|
|
* is closed, and we cannot rename the new file in its place until
|
|
|
|
* that. To avoid that problem, rename the old file to a temporary
|
|
|
|
* name first. Use a counter to create a unique filename, because the
|
|
|
|
* same file might be restored from the archive multiple times, and a
|
|
|
|
* walsender could still be holding onto an old deleted version of it.
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
*/
|
|
|
|
snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
|
|
|
|
xlogfpath, deletedcounter++);
|
|
|
|
if (rename(xlogfpath, oldpath) != 0)
|
|
|
|
{
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not rename file \"%s\" to \"%s\": %m",
|
|
|
|
xlogfpath, oldpath)));
|
|
|
|
}
|
|
|
|
#else
|
2014-08-19 04:59:31 +02:00
|
|
|
/* same-size buffers, so this never truncates */
|
|
|
|
strlcpy(oldpath, xlogfpath, MAXPGPATH);
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
#endif
|
|
|
|
if (unlink(oldpath) != 0)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not remove file \"%s\": %m",
|
|
|
|
xlogfpath)));
|
|
|
|
reload = true;
|
|
|
|
}
|
|
|
|
|
2016-03-10 03:53:53 +01:00
|
|
|
durable_rename(path, xlogfpath, ERROR);
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
|
2012-08-09 00:58:49 +02:00
|
|
|
/*
|
2013-05-29 22:58:43 +02:00
|
|
|
* Create .done file forcibly to prevent the restored segment from being
|
|
|
|
* archived again later.
|
2012-08-09 00:58:49 +02:00
|
|
|
*/
|
2015-05-15 17:55:24 +02:00
|
|
|
if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
|
|
|
|
XLogArchiveForceDone(xlogfname);
|
|
|
|
else
|
|
|
|
XLogArchiveNotify(xlogfname);
|
2012-08-09 00:58:49 +02:00
|
|
|
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
/*
|
2013-05-29 22:58:43 +02:00
|
|
|
* If the existing file was replaced, since walsenders might have it open,
|
|
|
|
* request them to reload a currently-open segment. This is only required
|
|
|
|
* for WAL segments, walsenders don't hold other files open, but there's
|
|
|
|
* no harm in doing this too often, and we don't know what kind of a file
|
|
|
|
* we're dealing with here.
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
*/
|
|
|
|
if (reload)
|
|
|
|
WalSndRqstFileReload();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Signal walsender that new WAL has arrived. Again, this isn't necessary
|
|
|
|
* if we restored something other than a WAL segment, but it does no harm
|
|
|
|
* either.
|
|
|
|
*/
|
|
|
|
WalSndWakeup();
|
|
|
|
}
|
|
|
|
|
2012-10-02 12:37:19 +02:00
|
|
|
/*
|
|
|
|
* XLogArchiveNotify
|
|
|
|
*
|
|
|
|
* Create an archive notification file
|
|
|
|
*
|
|
|
|
* The name of the notification file is the message that will be picked up
|
|
|
|
* by the archiver, e.g. we write 0000000100000001000000C6.ready
|
|
|
|
* and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
|
|
|
|
* then when complete, rename it to 0000000100000001000000C6.done
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
XLogArchiveNotify(const char *xlog)
|
|
|
|
{
|
|
|
|
char archiveStatusPath[MAXPGPATH];
|
|
|
|
FILE *fd;
|
|
|
|
|
|
|
|
/* insert an otherwise empty file called <XLOG>.ready */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".ready");
|
|
|
|
fd = AllocateFile(archiveStatusPath, "w");
|
|
|
|
if (fd == NULL)
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not create archive status file \"%s\": %m",
|
|
|
|
archiveStatusPath)));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (FreeFile(fd))
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not write archive status file \"%s\": %m",
|
|
|
|
archiveStatusPath)));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Notify archiver that it's got something to do */
|
|
|
|
if (IsUnderPostmaster)
|
|
|
|
SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convenience routine to notify using segment number representation of filename
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
XLogArchiveNotifySeg(XLogSegNo segno)
|
|
|
|
{
|
|
|
|
char xlog[MAXFNAMELEN];
|
|
|
|
|
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB
can be useful. A larger segment size has two main benefits: Firstly,
in setups using archiving, it makes it easier to write scripts that
can keep up with higher amounts of WAL, secondly, the WAL has to be
written and synced to disk less frequently.
But at the same time large segment size are disadvantageous for
smaller databases. So far the segment size had to be configured at
compile time, often making it unrealistic to choose one fitting to a
particularly load. Therefore change it to a initdb time setting.
This includes a breaking changes to the xlogreader.h API, which now
requires the current segment size to be configured. For that and
similar reasons a number of binaries had to be taught how to recognize
the current segment size.
Author: Beena Emerson, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael
Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja
Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
2017-09-20 07:03:48 +02:00
|
|
|
XLogFileName(xlog, ThisTimeLineID, segno, wal_segment_size);
|
2012-10-02 12:37:19 +02:00
|
|
|
XLogArchiveNotify(xlog);
|
|
|
|
}
|
|
|
|
|
2012-08-09 00:58:49 +02:00
|
|
|
/*
|
|
|
|
* XLogArchiveForceDone
|
|
|
|
*
|
|
|
|
* Emit notification forcibly that an XLOG segment file has been successfully
|
|
|
|
* archived, by creating <XLOG>.done regardless of whether <XLOG>.ready
|
|
|
|
* exists or not.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
XLogArchiveForceDone(const char *xlog)
|
|
|
|
{
|
|
|
|
char archiveReady[MAXPGPATH];
|
|
|
|
char archiveDone[MAXPGPATH];
|
|
|
|
struct stat stat_buf;
|
|
|
|
FILE *fd;
|
|
|
|
|
|
|
|
/* Exit if already known done */
|
|
|
|
StatusFilePath(archiveDone, xlog, ".done");
|
|
|
|
if (stat(archiveDone, &stat_buf) == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* If .ready exists, rename it to .done */
|
|
|
|
StatusFilePath(archiveReady, xlog, ".ready");
|
|
|
|
if (stat(archiveReady, &stat_buf) == 0)
|
|
|
|
{
|
2016-03-10 03:53:53 +01:00
|
|
|
(void) durable_rename(archiveReady, archiveDone, WARNING);
|
2012-08-09 00:58:49 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* insert an otherwise empty file called <XLOG>.done */
|
|
|
|
fd = AllocateFile(archiveDone, "w");
|
|
|
|
if (fd == NULL)
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not create archive status file \"%s\": %m",
|
|
|
|
archiveDone)));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (FreeFile(fd))
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not write archive status file \"%s\": %m",
|
|
|
|
archiveDone)));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-02 12:37:19 +02:00
|
|
|
/*
|
|
|
|
* XLogArchiveCheckDone
|
|
|
|
*
|
|
|
|
* This is called when we are ready to delete or recycle an old XLOG segment
|
|
|
|
* file or backup history file. If it is okay to delete it then return true.
|
|
|
|
* If it is not time to delete it, make sure a .ready file exists, and return
|
|
|
|
* false.
|
|
|
|
*
|
|
|
|
* If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
|
|
|
|
* then return false; else create <XLOG>.ready and return false.
|
|
|
|
*
|
|
|
|
* The reason we do things this way is so that if the original attempt to
|
|
|
|
* create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
XLogArchiveCheckDone(const char *xlog)
|
|
|
|
{
|
|
|
|
char archiveStatusPath[MAXPGPATH];
|
|
|
|
struct stat stat_buf;
|
2018-09-28 04:54:38 +02:00
|
|
|
bool inRecovery = RecoveryInProgress();
|
2012-10-02 12:37:19 +02:00
|
|
|
|
2018-09-28 04:54:38 +02:00
|
|
|
/*
|
|
|
|
* The file is always deletable if archive_mode is "off". On standbys
|
|
|
|
* archiving is disabled if archive_mode is "on", and enabled with
|
|
|
|
* "always". On a primary, archiving is enabled if archive_mode is "on"
|
|
|
|
* or "always".
|
|
|
|
*/
|
|
|
|
if (!((XLogArchivingActive() && !inRecovery) ||
|
|
|
|
(XLogArchivingAlways() && inRecovery)))
|
2012-10-02 12:37:19 +02:00
|
|
|
return true;
|
|
|
|
|
|
|
|
/* First check for .done --- this means archiver is done with it */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".done");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* check for .ready --- this means archiver is still busy with it */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".ready");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Race condition --- maybe archiver just finished, so recheck */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".done");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* Retry creation of the .ready file */
|
|
|
|
XLogArchiveNotify(xlog);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XLogArchiveIsBusy
|
|
|
|
*
|
|
|
|
* Check to see if an XLOG segment file is still unarchived.
|
|
|
|
* This is almost but not quite the inverse of XLogArchiveCheckDone: in
|
|
|
|
* the first place we aren't chartered to recreate the .ready file, and
|
|
|
|
* in the second place we should consider that if the file is already gone
|
|
|
|
* then it's not busy. (This check is needed to handle the race condition
|
|
|
|
* that a checkpoint already deleted the no-longer-needed file.)
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
XLogArchiveIsBusy(const char *xlog)
|
|
|
|
{
|
|
|
|
char archiveStatusPath[MAXPGPATH];
|
|
|
|
struct stat stat_buf;
|
|
|
|
|
|
|
|
/* First check for .done --- this means archiver is done with it */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".done");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* check for .ready --- this means archiver is still busy with it */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".ready");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* Race condition --- maybe archiver just finished, so recheck */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".done");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check to see if the WAL file has been removed by checkpoint, which
|
|
|
|
* implies it has already been archived, and explains why we can't see a
|
|
|
|
* status file for it.
|
|
|
|
*/
|
|
|
|
snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) != 0 &&
|
|
|
|
errno == ENOENT)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
At promotion, don't leave behind a partial segment on the old timeline.
With commit de768844, a copy of the partial segment was archived with the
.partial suffix, but the original file was still left in pg_xlog, so it
didn't actually solve the problems with archiving the partial segment that
it was supposed to solve. With this patch, the partial segment is renamed
rather than copied, so we only archive it with the .partial suffix.
Also be more robust in detecting if the last segment is already being
archived. Previously I used XLogArchiveIsBusy() for that, but that's not
quite right. With archive_mode='always', there might be a .ready file for
it, and we don't want to rename it to .partial in that case.
The old segment is needed until we're fully committed to the new timeline,
i.e. until we've written the end-of-recovery WAL record and updated the
min recovery point and timeline in the control file. So move the renaming
later in the startup sequence, after all that's been done.
2015-05-21 14:28:22 +02:00
|
|
|
/*
|
|
|
|
* XLogArchiveIsReadyOrDone
|
|
|
|
*
|
|
|
|
* Check to see if an XLOG segment file has a .ready or .done file.
|
|
|
|
* This is similar to XLogArchiveIsBusy(), but returns true if the file
|
|
|
|
* is already archived or is about to be archived.
|
|
|
|
*
|
|
|
|
* This is currently only used at recovery. During normal operation this
|
|
|
|
* would be racy: the file might get removed or marked with .ready as we're
|
|
|
|
* checking it, or immediately after we return.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
XLogArchiveIsReadyOrDone(const char *xlog)
|
|
|
|
{
|
|
|
|
char archiveStatusPath[MAXPGPATH];
|
|
|
|
struct stat stat_buf;
|
|
|
|
|
|
|
|
/* First check for .done --- this means archiver is done with it */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".done");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* check for .ready --- this means archiver is still busy with it */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".ready");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* Race condition --- maybe archiver just finished, so recheck */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".done");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Don't archive bogus recycled or preallocated files after timeline switch.
After a timeline switch, we would leave behind recycled WAL segments that
are in the future, but on the old timeline. After promotion, and after they
become old enough to be recycled again, we would notice that they don't have
a .ready or .done file, create a .ready file for them, and archive them.
That's bogus, because the files contain garbage, recycled from an older
timeline (or prealloced as zeros). We shouldn't archive such files.
This could happen when we're following a timeline switch during replay, or
when we switch to new timeline at end-of-recovery.
To fix, whenever we switch to a new timeline, scan the data directory for
WAL segments on the old timeline, but with a higher segment number, and
remove them. Those don't belong to our timeline history, and are most
likely bogus recycled or preallocated files. They could also be valid files
that we streamed from the primary ahead of time, but in any case, they're
not needed to recover to the new timeline.
2015-04-13 15:53:49 +02:00
|
|
|
/*
|
|
|
|
* XLogArchiveIsReady
|
|
|
|
*
|
|
|
|
* Check to see if an XLOG segment file has an archive notification (.ready)
|
|
|
|
* file.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
XLogArchiveIsReady(const char *xlog)
|
|
|
|
{
|
|
|
|
char archiveStatusPath[MAXPGPATH];
|
|
|
|
struct stat stat_buf;
|
|
|
|
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".ready");
|
|
|
|
if (stat(archiveStatusPath, &stat_buf) == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-10-02 12:37:19 +02:00
|
|
|
/*
|
|
|
|
* XLogArchiveCleanup
|
|
|
|
*
|
|
|
|
* Cleanup archive notification file(s) for a particular xlog segment
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
XLogArchiveCleanup(const char *xlog)
|
|
|
|
{
|
|
|
|
char archiveStatusPath[MAXPGPATH];
|
|
|
|
|
|
|
|
/* Remove the .done file */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".done");
|
|
|
|
unlink(archiveStatusPath);
|
|
|
|
/* should we complain about failure? */
|
|
|
|
|
|
|
|
/* Remove the .ready file if present --- normally it shouldn't be */
|
|
|
|
StatusFilePath(archiveStatusPath, xlog, ".ready");
|
|
|
|
unlink(archiveStatusPath);
|
|
|
|
/* should we complain about failure? */
|
|
|
|
}
|