2012-10-02 12:37:19 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* timeline.c
|
|
|
|
* Functions for reading and writing timeline history files.
|
|
|
|
*
|
|
|
|
* A timeline history file lists the timeline changes of the timeline, in
|
|
|
|
* a simple text format. They are archived along with the WAL segments.
|
|
|
|
*
|
2012-10-03 08:08:13 +02:00
|
|
|
* The files are named like "<tli>.history". For example, if the database
|
|
|
|
* starts up and switches to timeline 5, the timeline history file would be
|
|
|
|
* called "00000005.history".
|
2012-10-02 12:37:19 +02:00
|
|
|
*
|
|
|
|
* Each line in the file represents a timeline switch:
|
|
|
|
*
|
2012-12-04 14:28:58 +01:00
|
|
|
* <parentTLI> <switchpoint> <reason>
|
2012-10-02 12:37:19 +02:00
|
|
|
*
|
|
|
|
* parentTLI ID of the parent timeline
|
2012-12-04 14:28:58 +01:00
|
|
|
* switchpoint XLogRecPtr of the WAL position where the switch happened
|
2012-10-02 12:37:19 +02:00
|
|
|
* reason human-readable explanation of why the timeline was changed
|
|
|
|
*
|
|
|
|
* The fields are separated by tabs. Lines beginning with # are comments, and
|
|
|
|
* are ignored. Empty lines are also ignored.
|
|
|
|
*
|
|
|
|
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* src/backend/access/transam/timeline.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2012-10-02 16:19:52 +02:00
|
|
|
#include <sys/stat.h>
|
2012-10-02 12:37:19 +02:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "access/timeline.h"
|
|
|
|
#include "access/xlog_internal.h"
|
|
|
|
#include "access/xlogdefs.h"
|
|
|
|
#include "storage/fd.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to read a timeline's history file.
|
|
|
|
*
|
|
|
|
* If successful, return the list of component TLIs (the given TLI followed by
|
|
|
|
* its ancestor TLIs). If we can't find the history file, assume that the
|
|
|
|
* timeline has no parents, and return a list of just the specified timeline
|
|
|
|
* ID.
|
|
|
|
*/
|
|
|
|
List *
|
|
|
|
readTimeLineHistory(TimeLineID targetTLI)
|
|
|
|
{
|
|
|
|
List *result;
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
char histfname[MAXFNAMELEN];
|
|
|
|
char fline[MAXPGPATH];
|
|
|
|
FILE *fd;
|
2012-12-04 14:28:58 +01:00
|
|
|
TimeLineHistoryEntry *entry;
|
|
|
|
TimeLineID lasttli = 0;
|
|
|
|
XLogRecPtr prevend;
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
bool fromArchive = false;
|
2012-10-02 12:37:19 +02:00
|
|
|
|
|
|
|
/* Timeline 1 does not have a history file, so no need to check */
|
|
|
|
if (targetTLI == 1)
|
2012-12-04 14:28:58 +01:00
|
|
|
{
|
|
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
|
|
entry->tli = targetTLI;
|
|
|
|
entry->begin = entry->end = InvalidXLogRecPtr;
|
|
|
|
return list_make1(entry);
|
|
|
|
}
|
2012-10-02 12:37:19 +02:00
|
|
|
|
|
|
|
if (InArchiveRecovery)
|
|
|
|
{
|
|
|
|
TLHistoryFileName(histfname, targetTLI);
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
fromArchive =
|
|
|
|
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
|
2012-10-02 12:37:19 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
TLHistoryFilePath(path, targetTLI);
|
|
|
|
|
|
|
|
fd = AllocateFile(path, "r");
|
|
|
|
if (fd == NULL)
|
|
|
|
{
|
|
|
|
if (errno != ENOENT)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not open file \"%s\": %m", path)));
|
|
|
|
/* Not there, so assume no parents */
|
2012-12-04 14:28:58 +01:00
|
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
|
|
entry->tli = targetTLI;
|
|
|
|
entry->begin = entry->end = InvalidXLogRecPtr;
|
|
|
|
return list_make1(entry);
|
2012-10-02 12:37:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
result = NIL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parse the file...
|
|
|
|
*/
|
2012-12-04 14:28:58 +01:00
|
|
|
prevend = InvalidXLogRecPtr;
|
2012-10-02 12:37:19 +02:00
|
|
|
while (fgets(fline, sizeof(fline), fd) != NULL)
|
|
|
|
{
|
|
|
|
/* skip leading whitespace and check for # comment */
|
|
|
|
char *ptr;
|
|
|
|
TimeLineID tli;
|
2012-12-04 14:28:58 +01:00
|
|
|
uint32 switchpoint_hi;
|
|
|
|
uint32 switchpoint_lo;
|
|
|
|
int nfields;
|
2012-10-02 12:37:19 +02:00
|
|
|
|
|
|
|
for (ptr = fline; *ptr; ptr++)
|
|
|
|
{
|
|
|
|
if (!isspace((unsigned char) *ptr))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (*ptr == '\0' || *ptr == '#')
|
|
|
|
continue;
|
|
|
|
|
2012-12-04 14:28:58 +01:00
|
|
|
nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
|
|
|
|
|
|
|
|
if (nfields < 1)
|
|
|
|
{
|
|
|
|
/* expect a numeric timeline ID as first field of line */
|
2012-10-02 12:37:19 +02:00
|
|
|
ereport(FATAL,
|
|
|
|
(errmsg("syntax error in history file: %s", fline),
|
|
|
|
errhint("Expected a numeric timeline ID.")));
|
2012-12-04 14:28:58 +01:00
|
|
|
}
|
|
|
|
if (nfields != 3)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errmsg("syntax error in history file: %s", fline),
|
|
|
|
errhint("Expected an XLOG switchpoint location.")));
|
2012-10-02 12:37:19 +02:00
|
|
|
|
2012-12-04 14:28:58 +01:00
|
|
|
if (result && tli <= lasttli)
|
2012-10-02 12:37:19 +02:00
|
|
|
ereport(FATAL,
|
|
|
|
(errmsg("invalid data in history file: %s", fline),
|
|
|
|
errhint("Timeline IDs must be in increasing sequence.")));
|
|
|
|
|
2012-12-04 14:28:58 +01:00
|
|
|
lasttli = tli;
|
|
|
|
|
|
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
|
|
entry->tli = tli;
|
|
|
|
entry->begin = prevend;
|
|
|
|
entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
|
|
|
|
prevend = entry->end;
|
|
|
|
|
2012-10-02 12:37:19 +02:00
|
|
|
/* Build list with newest item first */
|
2012-12-04 14:28:58 +01:00
|
|
|
result = lcons(entry, result);
|
2012-10-02 12:37:19 +02:00
|
|
|
|
|
|
|
/* we ignore the remainder of each line */
|
|
|
|
}
|
|
|
|
|
|
|
|
FreeFile(fd);
|
|
|
|
|
2012-12-04 14:28:58 +01:00
|
|
|
if (result && targetTLI <= lasttli)
|
2012-10-02 12:37:19 +02:00
|
|
|
ereport(FATAL,
|
|
|
|
(errmsg("invalid data in history file \"%s\"", path),
|
|
|
|
errhint("Timeline IDs must be less than child timeline's ID.")));
|
|
|
|
|
2012-12-04 14:28:58 +01:00
|
|
|
/*
|
|
|
|
* Create one more entry for the "tip" of the timeline, which has no
|
|
|
|
* entry in the history file.
|
|
|
|
*/
|
|
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
|
|
entry->tli = targetTLI;
|
|
|
|
entry->begin = prevend;
|
|
|
|
entry->end = InvalidXLogRecPtr;
|
2012-10-02 12:37:19 +02:00
|
|
|
|
2012-12-04 14:28:58 +01:00
|
|
|
result = lcons(entry, result);
|
2012-10-02 12:37:19 +02:00
|
|
|
|
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated
when restored from the archive. Before, they were restored under a temporary
filename, and not kept in pg_xlog, but after the patch, they were copied
under pg_xlog. This is necessary for a cascading standby to find them, but
it also means that if the archive goes offline and a standby is restarted,
it can recover back to where it was using the files in pg_xlog. It also
means that if you take an offline backup from a standby server, it includes
all the required WAL files in pg_xlog.
However, the same change was not made to timeline history files, so if the
WAL segment containing the checkpoint record contains a timeline switch, you
will still get an error if you try to restart recovery without the archive,
or recover from an offline backup taken from the standby.
With this patch, timeline history files restored from archive are copied
into pg_xlog like WAL files are, so that pg_xlog contains all the files
required to recover. This is a corner-case pre-existing issue in 9.2, but
even more important in master where it's possible for a standby to follow a
timeline switch through streaming replication. To make that possible, the
timeline history files must be present in pg_xlog.
2012-12-30 13:26:47 +01:00
|
|
|
/*
|
|
|
|
* If the history file was fetched from archive, save it in pg_xlog for
|
|
|
|
* future reference.
|
|
|
|
*/
|
|
|
|
if (fromArchive)
|
|
|
|
KeepFileRestoredFromArchive(path, histfname);
|
|
|
|
|
2012-10-02 12:37:19 +02:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Probe whether a timeline history file exists for the given timeline ID
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
existsTimeLineHistory(TimeLineID probeTLI)
|
|
|
|
{
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
char histfname[MAXFNAMELEN];
|
|
|
|
FILE *fd;
|
|
|
|
|
|
|
|
/* Timeline 1 does not have a history file, so no need to check */
|
|
|
|
if (probeTLI == 1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (InArchiveRecovery)
|
|
|
|
{
|
|
|
|
TLHistoryFileName(histfname, probeTLI);
|
2012-11-19 09:02:25 +01:00
|
|
|
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
|
2012-10-02 12:37:19 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
TLHistoryFilePath(path, probeTLI);
|
|
|
|
|
|
|
|
fd = AllocateFile(path, "r");
|
|
|
|
if (fd != NULL)
|
|
|
|
{
|
|
|
|
FreeFile(fd);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (errno != ENOENT)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not open file \"%s\": %m", path)));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the newest existing timeline, assuming that startTLI exists.
|
|
|
|
*
|
|
|
|
* Note: while this is somewhat heuristic, it does positively guarantee
|
|
|
|
* that (result + 1) is not a known timeline, and therefore it should
|
|
|
|
* be safe to assign that ID to a new timeline.
|
|
|
|
*/
|
|
|
|
TimeLineID
|
|
|
|
findNewestTimeLine(TimeLineID startTLI)
|
|
|
|
{
|
|
|
|
TimeLineID newestTLI;
|
|
|
|
TimeLineID probeTLI;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The algorithm is just to probe for the existence of timeline history
|
|
|
|
* files. XXX is it useful to allow gaps in the sequence?
|
|
|
|
*/
|
|
|
|
newestTLI = startTLI;
|
|
|
|
|
|
|
|
for (probeTLI = startTLI + 1;; probeTLI++)
|
|
|
|
{
|
|
|
|
if (existsTimeLineHistory(probeTLI))
|
|
|
|
{
|
|
|
|
newestTLI = probeTLI; /* probeTLI exists */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* doesn't exist, assume we're done */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return newestTLI;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a new timeline history file.
|
|
|
|
*
|
|
|
|
* newTLI: ID of the new timeline
|
|
|
|
* parentTLI: ID of its immediate parent
|
2012-12-04 14:28:58 +01:00
|
|
|
* switchpoint: XLOG position where the system switched to the new timeline
|
2012-10-02 12:37:19 +02:00
|
|
|
* reason: human-readable explanation of why the timeline was switched
|
|
|
|
*
|
|
|
|
* Currently this is only used at the end recovery, and so there are no locking
|
|
|
|
* considerations. But we should be just as tense as XLogFileInit to avoid
|
|
|
|
* emplacing a bogus file.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
|
2012-12-04 14:28:58 +01:00
|
|
|
XLogRecPtr switchpoint, char *reason)
|
2012-10-02 12:37:19 +02:00
|
|
|
{
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
char tmppath[MAXPGPATH];
|
|
|
|
char histfname[MAXFNAMELEN];
|
|
|
|
char buffer[BLCKSZ];
|
|
|
|
int srcfd;
|
|
|
|
int fd;
|
|
|
|
int nbytes;
|
|
|
|
|
|
|
|
Assert(newTLI > parentTLI); /* else bad selection of newTLI */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write into a temp file name.
|
|
|
|
*/
|
|
|
|
snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
|
|
|
|
|
|
|
|
unlink(tmppath);
|
|
|
|
|
|
|
|
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
|
Add OpenTransientFile, with automatic cleanup at end-of-xact.
Files opened with BasicOpenFile or PathNameOpenFile are not automatically
cleaned up on error. That puts unnecessary burden on callers that only want
to keep the file open for a short time. There is AllocateFile, but that
returns a buffered FILE * stream, which in many cases is not the nicest API
to work with. So add function called OpenTransientFile, which returns a
unbuffered fd that's cleaned up like the FILE* returned by AllocateFile().
This plugs a few rare fd leaks in error cases:
1. copy_file() - fixed by by using OpenTransientFile instead of BasicOpenFile
2. XLogFileInit() - fixed by adding close() calls to the error cases. Can't
use OpenTransientFile here because the fd is supposed to persist over
transaction boundaries.
3. lo_import/lo_export - fixed by using OpenTransientFile instead of
PathNameOpenFile.
In addition to plugging those leaks, this replaces many BasicOpenFile() calls
with OpenTransientFile() that were not leaking, because the code meticulously
closed the file on error. That wasn't strictly necessary, but IMHO it's good
for robustness.
The same leaks exist in older versions, but given the rarity of the issues,
I'm not backpatching this. Not yet, anyway - it might be good to backpatch
later, after this mechanism has had some more testing in master branch.
2012-11-27 09:25:50 +01:00
|
|
|
fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
|
|
|
|
S_IRUSR | S_IWUSR);
|
2012-10-02 12:37:19 +02:00
|
|
|
if (fd < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not create file \"%s\": %m", tmppath)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If a history file exists for the parent, copy it verbatim
|
|
|
|
*/
|
|
|
|
if (InArchiveRecovery)
|
|
|
|
{
|
|
|
|
TLHistoryFileName(histfname, parentTLI);
|
2012-11-19 09:02:25 +01:00
|
|
|
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
|
2012-10-02 12:37:19 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
TLHistoryFilePath(path, parentTLI);
|
|
|
|
|
Add OpenTransientFile, with automatic cleanup at end-of-xact.
Files opened with BasicOpenFile or PathNameOpenFile are not automatically
cleaned up on error. That puts unnecessary burden on callers that only want
to keep the file open for a short time. There is AllocateFile, but that
returns a buffered FILE * stream, which in many cases is not the nicest API
to work with. So add function called OpenTransientFile, which returns a
unbuffered fd that's cleaned up like the FILE* returned by AllocateFile().
This plugs a few rare fd leaks in error cases:
1. copy_file() - fixed by by using OpenTransientFile instead of BasicOpenFile
2. XLogFileInit() - fixed by adding close() calls to the error cases. Can't
use OpenTransientFile here because the fd is supposed to persist over
transaction boundaries.
3. lo_import/lo_export - fixed by using OpenTransientFile instead of
PathNameOpenFile.
In addition to plugging those leaks, this replaces many BasicOpenFile() calls
with OpenTransientFile() that were not leaking, because the code meticulously
closed the file on error. That wasn't strictly necessary, but IMHO it's good
for robustness.
The same leaks exist in older versions, but given the rarity of the issues,
I'm not backpatching this. Not yet, anyway - it might be good to backpatch
later, after this mechanism has had some more testing in master branch.
2012-11-27 09:25:50 +01:00
|
|
|
srcfd = OpenTransientFile(path, O_RDONLY, 0);
|
2012-10-02 12:37:19 +02:00
|
|
|
if (srcfd < 0)
|
|
|
|
{
|
|
|
|
if (errno != ENOENT)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not open file \"%s\": %m", path)));
|
|
|
|
/* Not there, so assume parent has no parents */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
errno = 0;
|
|
|
|
nbytes = (int) read(srcfd, buffer, sizeof(buffer));
|
|
|
|
if (nbytes < 0 || errno != 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not read file \"%s\": %m", path)));
|
|
|
|
if (nbytes == 0)
|
|
|
|
break;
|
|
|
|
errno = 0;
|
|
|
|
if ((int) write(fd, buffer, nbytes) != nbytes)
|
|
|
|
{
|
|
|
|
int save_errno = errno;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we fail to make the file, delete it to release disk
|
|
|
|
* space
|
|
|
|
*/
|
|
|
|
unlink(tmppath);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if write didn't set errno, assume problem is no disk space
|
|
|
|
*/
|
|
|
|
errno = save_errno ? save_errno : ENOSPC;
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not write to file \"%s\": %m", tmppath)));
|
|
|
|
}
|
|
|
|
}
|
Add OpenTransientFile, with automatic cleanup at end-of-xact.
Files opened with BasicOpenFile or PathNameOpenFile are not automatically
cleaned up on error. That puts unnecessary burden on callers that only want
to keep the file open for a short time. There is AllocateFile, but that
returns a buffered FILE * stream, which in many cases is not the nicest API
to work with. So add function called OpenTransientFile, which returns a
unbuffered fd that's cleaned up like the FILE* returned by AllocateFile().
This plugs a few rare fd leaks in error cases:
1. copy_file() - fixed by by using OpenTransientFile instead of BasicOpenFile
2. XLogFileInit() - fixed by adding close() calls to the error cases. Can't
use OpenTransientFile here because the fd is supposed to persist over
transaction boundaries.
3. lo_import/lo_export - fixed by using OpenTransientFile instead of
PathNameOpenFile.
In addition to plugging those leaks, this replaces many BasicOpenFile() calls
with OpenTransientFile() that were not leaking, because the code meticulously
closed the file on error. That wasn't strictly necessary, but IMHO it's good
for robustness.
The same leaks exist in older versions, but given the rarity of the issues,
I'm not backpatching this. Not yet, anyway - it might be good to backpatch
later, after this mechanism has had some more testing in master branch.
2012-11-27 09:25:50 +01:00
|
|
|
CloseTransientFile(srcfd);
|
2012-10-02 12:37:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Append one line with the details of this timeline split.
|
|
|
|
*
|
|
|
|
* If we did have a parent file, insert an extra newline just in case the
|
|
|
|
* parent file failed to end with one.
|
|
|
|
*/
|
|
|
|
snprintf(buffer, sizeof(buffer),
|
2012-12-04 14:28:58 +01:00
|
|
|
"%s%u\t%X/%X\t%s\n",
|
2012-10-02 12:37:19 +02:00
|
|
|
(srcfd < 0) ? "" : "\n",
|
|
|
|
parentTLI,
|
2012-12-04 14:28:58 +01:00
|
|
|
(uint32) (switchpoint >> 32), (uint32) (switchpoint),
|
2012-10-02 12:37:19 +02:00
|
|
|
reason);
|
|
|
|
|
|
|
|
nbytes = strlen(buffer);
|
|
|
|
errno = 0;
|
|
|
|
if ((int) write(fd, buffer, nbytes) != nbytes)
|
|
|
|
{
|
|
|
|
int save_errno = errno;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we fail to make the file, delete it to release disk space
|
|
|
|
*/
|
|
|
|
unlink(tmppath);
|
|
|
|
/* if write didn't set errno, assume problem is no disk space */
|
|
|
|
errno = save_errno ? save_errno : ENOSPC;
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not write to file \"%s\": %m", tmppath)));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pg_fsync(fd) != 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not fsync file \"%s\": %m", tmppath)));
|
|
|
|
|
Add OpenTransientFile, with automatic cleanup at end-of-xact.
Files opened with BasicOpenFile or PathNameOpenFile are not automatically
cleaned up on error. That puts unnecessary burden on callers that only want
to keep the file open for a short time. There is AllocateFile, but that
returns a buffered FILE * stream, which in many cases is not the nicest API
to work with. So add function called OpenTransientFile, which returns a
unbuffered fd that's cleaned up like the FILE* returned by AllocateFile().
This plugs a few rare fd leaks in error cases:
1. copy_file() - fixed by by using OpenTransientFile instead of BasicOpenFile
2. XLogFileInit() - fixed by adding close() calls to the error cases. Can't
use OpenTransientFile here because the fd is supposed to persist over
transaction boundaries.
3. lo_import/lo_export - fixed by using OpenTransientFile instead of
PathNameOpenFile.
In addition to plugging those leaks, this replaces many BasicOpenFile() calls
with OpenTransientFile() that were not leaking, because the code meticulously
closed the file on error. That wasn't strictly necessary, but IMHO it's good
for robustness.
The same leaks exist in older versions, but given the rarity of the issues,
I'm not backpatching this. Not yet, anyway - it might be good to backpatch
later, after this mechanism has had some more testing in master branch.
2012-11-27 09:25:50 +01:00
|
|
|
if (CloseTransientFile(fd))
|
2012-10-02 12:37:19 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not close file \"%s\": %m", tmppath)));
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now move the completed history file into place with its final name.
|
|
|
|
*/
|
|
|
|
TLHistoryFilePath(path, newTLI);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prefer link() to rename() here just to be really sure that we don't
|
2012-12-05 20:00:59 +01:00
|
|
|
* overwrite an existing file. However, there shouldn't be one, so
|
2012-10-02 12:37:19 +02:00
|
|
|
* rename() is an acceptable substitute except for the truly paranoid.
|
|
|
|
*/
|
|
|
|
#if HAVE_WORKING_LINK
|
|
|
|
if (link(tmppath, path) < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not link file \"%s\" to \"%s\": %m",
|
|
|
|
tmppath, path)));
|
|
|
|
unlink(tmppath);
|
|
|
|
#else
|
|
|
|
if (rename(tmppath, path) < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not rename file \"%s\" to \"%s\": %m",
|
|
|
|
tmppath, path)));
|
|
|
|
#endif
|
2012-10-03 08:08:13 +02:00
|
|
|
|
|
|
|
/* The history file can be archived immediately. */
|
|
|
|
TLHistoryFileName(histfname, newTLI);
|
|
|
|
XLogArchiveNotify(histfname);
|
2012-10-02 12:37:19 +02:00
|
|
|
}
|
2012-12-04 14:28:58 +01:00
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Writes a history file for given timeline and contents.
|
|
|
|
*
|
|
|
|
* Currently this is only used in the walreceiver process, and so there are
|
|
|
|
* no locking considerations. But we should be just as tense as XLogFileInit
|
|
|
|
* to avoid emplacing a bogus file.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
|
|
|
|
{
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
char tmppath[MAXPGPATH];
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write into a temp file name.
|
|
|
|
*/
|
|
|
|
snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
|
|
|
|
|
|
|
|
unlink(tmppath);
|
|
|
|
|
|
|
|
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
|
|
|
|
fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
|
|
|
|
S_IRUSR | S_IWUSR);
|
|
|
|
if (fd < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not create file \"%s\": %m", tmppath)));
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
if ((int) write(fd, content, size) != size)
|
|
|
|
{
|
|
|
|
int save_errno = errno;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we fail to make the file, delete it to release disk space
|
|
|
|
*/
|
|
|
|
unlink(tmppath);
|
|
|
|
/* if write didn't set errno, assume problem is no disk space */
|
|
|
|
errno = save_errno ? save_errno : ENOSPC;
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not write to file \"%s\": %m", tmppath)));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pg_fsync(fd) != 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not fsync file \"%s\": %m", tmppath)));
|
|
|
|
|
|
|
|
if (CloseTransientFile(fd))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not close file \"%s\": %m", tmppath)));
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now move the completed history file into place with its final name.
|
|
|
|
*/
|
|
|
|
TLHistoryFilePath(path, tli);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prefer link() to rename() here just to be really sure that we don't
|
|
|
|
* overwrite an existing logfile. However, there shouldn't be one, so
|
|
|
|
* rename() is an acceptable substitute except for the truly paranoid.
|
|
|
|
*/
|
|
|
|
#if HAVE_WORKING_LINK
|
|
|
|
if (link(tmppath, path) < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not link file \"%s\" to \"%s\": %m",
|
|
|
|
tmppath, path)));
|
|
|
|
unlink(tmppath);
|
|
|
|
#else
|
|
|
|
if (rename(tmppath, path) < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not rename file \"%s\" to \"%s\": %m",
|
|
|
|
tmppath, path)));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2012-12-04 14:28:58 +01:00
|
|
|
/*
|
|
|
|
* Returns true if 'expectedTLEs' contains a timeline with id 'tli'
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
tliInHistory(TimeLineID tli, List *expectedTLEs)
|
|
|
|
{
|
|
|
|
ListCell *cell;
|
|
|
|
|
|
|
|
foreach(cell, expectedTLEs)
|
|
|
|
{
|
|
|
|
if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns the ID of the timeline in use at a particular point in time, in
|
|
|
|
* the given timeline history.
|
|
|
|
*/
|
|
|
|
TimeLineID
|
|
|
|
tliOfPointInHistory(XLogRecPtr ptr, List *history)
|
|
|
|
{
|
|
|
|
ListCell *cell;
|
|
|
|
|
|
|
|
foreach(cell, history)
|
|
|
|
{
|
|
|
|
TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
|
2012-12-28 17:06:15 +01:00
|
|
|
if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
|
|
|
|
(XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
|
2012-12-04 14:28:58 +01:00
|
|
|
{
|
|
|
|
/* found it */
|
|
|
|
return tle->tli;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* shouldn't happen. */
|
|
|
|
elog(ERROR, "timeline history was not contiguous");
|
|
|
|
return 0; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns the point in history where we branched off the given timeline.
|
|
|
|
* Returns InvalidXLogRecPtr if the timeline is current (= we have not
|
|
|
|
* branched off from it), and throws an error if the timeline is not part of
|
|
|
|
* this server's history.
|
|
|
|
*/
|
|
|
|
XLogRecPtr
|
|
|
|
tliSwitchPoint(TimeLineID tli, List *history)
|
|
|
|
{
|
|
|
|
ListCell *cell;
|
|
|
|
|
|
|
|
foreach (cell, history)
|
|
|
|
{
|
|
|
|
TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
|
|
|
|
|
|
|
|
if (tle->tli == tli)
|
|
|
|
return tle->end;
|
|
|
|
}
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("requested timeline %u is not in this server's history",
|
|
|
|
tli)));
|
|
|
|
return InvalidXLogRecPtr; /* keep compiler quiet */
|
|
|
|
}
|