601 lines
15 KiB
C
601 lines
15 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* timeline.c
|
|
* Functions for reading and writing timeline history files.
|
|
*
|
|
* A timeline history file lists the timeline changes of the timeline, in
|
|
* a simple text format. They are archived along with the WAL segments.
|
|
*
|
|
* The files are named like "<tli>.history". For example, if the database
|
|
* starts up and switches to timeline 5, the timeline history file would be
|
|
* called "00000005.history".
|
|
*
|
|
* Each line in the file represents a timeline switch:
|
|
*
|
|
* <parentTLI> <switchpoint> <reason>
|
|
*
|
|
* parentTLI ID of the parent timeline
|
|
* switchpoint XLogRecPtr of the WAL location where the switch happened
|
|
* reason human-readable explanation of why the timeline was changed
|
|
*
|
|
* The fields are separated by tabs. Lines beginning with # are comments, and
|
|
* are ignored. Empty lines are also ignored.
|
|
*
|
|
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* src/backend/access/transam/timeline.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
|
|
#include "access/timeline.h"
|
|
#include "access/xlog.h"
|
|
#include "access/xlog_internal.h"
|
|
#include "access/xlogarchive.h"
|
|
#include "access/xlogdefs.h"
|
|
#include "pgstat.h"
|
|
#include "storage/fd.h"
|
|
|
|
/*
|
|
* Copies all timeline history files with id's between 'begin' and 'end'
|
|
* from archive to pg_wal.
|
|
*/
|
|
void
|
|
restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
|
|
{
|
|
char path[MAXPGPATH];
|
|
char histfname[MAXFNAMELEN];
|
|
TimeLineID tli;
|
|
|
|
for (tli = begin; tli < end; tli++)
|
|
{
|
|
if (tli == 1)
|
|
continue;
|
|
|
|
TLHistoryFileName(histfname, tli);
|
|
if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
|
|
KeepFileRestoredFromArchive(path, histfname);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Try to read a timeline's history file.
|
|
*
|
|
* If successful, return the list of component TLIs (the given TLI followed by
|
|
* its ancestor TLIs). If we can't find the history file, assume that the
|
|
* timeline has no parents, and return a list of just the specified timeline
|
|
* ID.
|
|
*/
|
|
List *
|
|
readTimeLineHistory(TimeLineID targetTLI)
|
|
{
|
|
List *result;
|
|
char path[MAXPGPATH];
|
|
char histfname[MAXFNAMELEN];
|
|
FILE *fd;
|
|
TimeLineHistoryEntry *entry;
|
|
TimeLineID lasttli = 0;
|
|
XLogRecPtr prevend;
|
|
bool fromArchive = false;
|
|
|
|
/* Timeline 1 does not have a history file, so no need to check */
|
|
if (targetTLI == 1)
|
|
{
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
entry->tli = targetTLI;
|
|
entry->begin = entry->end = InvalidXLogRecPtr;
|
|
return list_make1(entry);
|
|
}
|
|
|
|
if (ArchiveRecoveryRequested)
|
|
{
|
|
TLHistoryFileName(histfname, targetTLI);
|
|
fromArchive =
|
|
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
|
|
}
|
|
else
|
|
TLHistoryFilePath(path, targetTLI);
|
|
|
|
fd = AllocateFile(path, "r");
|
|
if (fd == NULL)
|
|
{
|
|
if (errno != ENOENT)
|
|
ereport(FATAL,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open file \"%s\": %m", path)));
|
|
/* Not there, so assume no parents */
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
entry->tli = targetTLI;
|
|
entry->begin = entry->end = InvalidXLogRecPtr;
|
|
return list_make1(entry);
|
|
}
|
|
|
|
result = NIL;
|
|
|
|
/*
|
|
* Parse the file...
|
|
*/
|
|
prevend = InvalidXLogRecPtr;
|
|
for (;;)
|
|
{
|
|
char fline[MAXPGPATH];
|
|
char *res;
|
|
char *ptr;
|
|
TimeLineID tli;
|
|
uint32 switchpoint_hi;
|
|
uint32 switchpoint_lo;
|
|
int nfields;
|
|
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
|
|
res = fgets(fline, sizeof(fline), fd);
|
|
pgstat_report_wait_end();
|
|
if (res == NULL)
|
|
{
|
|
if (ferror(fd))
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not read file \"%s\": %m", path)));
|
|
|
|
break;
|
|
}
|
|
|
|
/* skip leading whitespace and check for # comment */
|
|
for (ptr = fline; *ptr; ptr++)
|
|
{
|
|
if (!isspace((unsigned char) *ptr))
|
|
break;
|
|
}
|
|
if (*ptr == '\0' || *ptr == '#')
|
|
continue;
|
|
|
|
nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
|
|
|
|
if (nfields < 1)
|
|
{
|
|
/* expect a numeric timeline ID as first field of line */
|
|
ereport(FATAL,
|
|
(errmsg("syntax error in history file: %s", fline),
|
|
errhint("Expected a numeric timeline ID.")));
|
|
}
|
|
if (nfields != 3)
|
|
ereport(FATAL,
|
|
(errmsg("syntax error in history file: %s", fline),
|
|
errhint("Expected a write-ahead log switchpoint location.")));
|
|
|
|
if (result && tli <= lasttli)
|
|
ereport(FATAL,
|
|
(errmsg("invalid data in history file: %s", fline),
|
|
errhint("Timeline IDs must be in increasing sequence.")));
|
|
|
|
lasttli = tli;
|
|
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
entry->tli = tli;
|
|
entry->begin = prevend;
|
|
entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
|
|
prevend = entry->end;
|
|
|
|
/* Build list with newest item first */
|
|
result = lcons(entry, result);
|
|
|
|
/* we ignore the remainder of each line */
|
|
}
|
|
|
|
FreeFile(fd);
|
|
|
|
if (result && targetTLI <= lasttli)
|
|
ereport(FATAL,
|
|
(errmsg("invalid data in history file \"%s\"", path),
|
|
errhint("Timeline IDs must be less than child timeline's ID.")));
|
|
|
|
/*
|
|
* Create one more entry for the "tip" of the timeline, which has no entry
|
|
* in the history file.
|
|
*/
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
entry->tli = targetTLI;
|
|
entry->begin = prevend;
|
|
entry->end = InvalidXLogRecPtr;
|
|
|
|
result = lcons(entry, result);
|
|
|
|
/*
|
|
* If the history file was fetched from archive, save it in pg_wal for
|
|
* future reference.
|
|
*/
|
|
if (fromArchive)
|
|
KeepFileRestoredFromArchive(path, histfname);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Probe whether a timeline history file exists for the given timeline ID
|
|
*/
|
|
bool
|
|
existsTimeLineHistory(TimeLineID probeTLI)
|
|
{
|
|
char path[MAXPGPATH];
|
|
char histfname[MAXFNAMELEN];
|
|
FILE *fd;
|
|
|
|
/* Timeline 1 does not have a history file, so no need to check */
|
|
if (probeTLI == 1)
|
|
return false;
|
|
|
|
if (ArchiveRecoveryRequested)
|
|
{
|
|
TLHistoryFileName(histfname, probeTLI);
|
|
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
|
|
}
|
|
else
|
|
TLHistoryFilePath(path, probeTLI);
|
|
|
|
fd = AllocateFile(path, "r");
|
|
if (fd != NULL)
|
|
{
|
|
FreeFile(fd);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
if (errno != ENOENT)
|
|
ereport(FATAL,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open file \"%s\": %m", path)));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Find the newest existing timeline, assuming that startTLI exists.
|
|
*
|
|
* Note: while this is somewhat heuristic, it does positively guarantee
|
|
* that (result + 1) is not a known timeline, and therefore it should
|
|
* be safe to assign that ID to a new timeline.
|
|
*/
|
|
TimeLineID
|
|
findNewestTimeLine(TimeLineID startTLI)
|
|
{
|
|
TimeLineID newestTLI;
|
|
TimeLineID probeTLI;
|
|
|
|
/*
|
|
* The algorithm is just to probe for the existence of timeline history
|
|
* files. XXX is it useful to allow gaps in the sequence?
|
|
*/
|
|
newestTLI = startTLI;
|
|
|
|
for (probeTLI = startTLI + 1;; probeTLI++)
|
|
{
|
|
if (existsTimeLineHistory(probeTLI))
|
|
{
|
|
newestTLI = probeTLI; /* probeTLI exists */
|
|
}
|
|
else
|
|
{
|
|
/* doesn't exist, assume we're done */
|
|
break;
|
|
}
|
|
}
|
|
|
|
return newestTLI;
|
|
}
|
|
|
|
/*
|
|
* Create a new timeline history file.
|
|
*
|
|
* newTLI: ID of the new timeline
|
|
* parentTLI: ID of its immediate parent
|
|
* switchpoint: WAL location where the system switched to the new timeline
|
|
* reason: human-readable explanation of why the timeline was switched
|
|
*
|
|
* Currently this is only used at the end recovery, and so there are no locking
|
|
* considerations. But we should be just as tense as XLogFileInit to avoid
|
|
* emplacing a bogus file.
|
|
*/
|
|
void
|
|
writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
|
|
XLogRecPtr switchpoint, char *reason)
|
|
{
|
|
char path[MAXPGPATH];
|
|
char tmppath[MAXPGPATH];
|
|
char histfname[MAXFNAMELEN];
|
|
char buffer[BLCKSZ];
|
|
int srcfd;
|
|
int fd;
|
|
int nbytes;
|
|
|
|
Assert(newTLI > parentTLI); /* else bad selection of newTLI */
|
|
|
|
/*
|
|
* Write into a temp file name.
|
|
*/
|
|
snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
|
|
|
|
unlink(tmppath);
|
|
|
|
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
|
|
fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
|
|
if (fd < 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not create file \"%s\": %m", tmppath)));
|
|
|
|
/*
|
|
* If a history file exists for the parent, copy it verbatim
|
|
*/
|
|
if (ArchiveRecoveryRequested)
|
|
{
|
|
TLHistoryFileName(histfname, parentTLI);
|
|
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
|
|
}
|
|
else
|
|
TLHistoryFilePath(path, parentTLI);
|
|
|
|
srcfd = OpenTransientFile(path, O_RDONLY);
|
|
if (srcfd < 0)
|
|
{
|
|
if (errno != ENOENT)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open file \"%s\": %m", path)));
|
|
/* Not there, so assume parent has no parents */
|
|
}
|
|
else
|
|
{
|
|
for (;;)
|
|
{
|
|
errno = 0;
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
|
|
nbytes = (int) read(srcfd, buffer, sizeof(buffer));
|
|
pgstat_report_wait_end();
|
|
if (nbytes < 0 || errno != 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not read file \"%s\": %m", path)));
|
|
if (nbytes == 0)
|
|
break;
|
|
errno = 0;
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
|
|
if ((int) write(fd, buffer, nbytes) != nbytes)
|
|
{
|
|
int save_errno = errno;
|
|
|
|
/*
|
|
* If we fail to make the file, delete it to release disk
|
|
* space
|
|
*/
|
|
unlink(tmppath);
|
|
|
|
/*
|
|
* if write didn't set errno, assume problem is no disk space
|
|
*/
|
|
errno = save_errno ? save_errno : ENOSPC;
|
|
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not write to file \"%s\": %m", tmppath)));
|
|
}
|
|
pgstat_report_wait_end();
|
|
}
|
|
|
|
if (CloseTransientFile(srcfd) != 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not close file \"%s\": %m", path)));
|
|
}
|
|
|
|
/*
|
|
* Append one line with the details of this timeline split.
|
|
*
|
|
* If we did have a parent file, insert an extra newline just in case the
|
|
* parent file failed to end with one.
|
|
*/
|
|
snprintf(buffer, sizeof(buffer),
|
|
"%s%u\t%X/%X\t%s\n",
|
|
(srcfd < 0) ? "" : "\n",
|
|
parentTLI,
|
|
(uint32) (switchpoint >> 32), (uint32) (switchpoint),
|
|
reason);
|
|
|
|
nbytes = strlen(buffer);
|
|
errno = 0;
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
|
|
if ((int) write(fd, buffer, nbytes) != nbytes)
|
|
{
|
|
int save_errno = errno;
|
|
|
|
/*
|
|
* If we fail to make the file, delete it to release disk space
|
|
*/
|
|
unlink(tmppath);
|
|
/* if write didn't set errno, assume problem is no disk space */
|
|
errno = save_errno ? save_errno : ENOSPC;
|
|
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not write to file \"%s\": %m", tmppath)));
|
|
}
|
|
pgstat_report_wait_end();
|
|
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
|
|
if (pg_fsync(fd) != 0)
|
|
ereport(data_sync_elevel(ERROR),
|
|
(errcode_for_file_access(),
|
|
errmsg("could not fsync file \"%s\": %m", tmppath)));
|
|
pgstat_report_wait_end();
|
|
|
|
if (CloseTransientFile(fd) != 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not close file \"%s\": %m", tmppath)));
|
|
|
|
/*
|
|
* Now move the completed history file into place with its final name.
|
|
*/
|
|
TLHistoryFilePath(path, newTLI);
|
|
|
|
/*
|
|
* Perform the rename using link if available, paranoidly trying to avoid
|
|
* overwriting an existing file (there shouldn't be one).
|
|
*/
|
|
durable_rename_excl(tmppath, path, ERROR);
|
|
|
|
/* The history file can be archived immediately. */
|
|
if (XLogArchivingActive())
|
|
{
|
|
TLHistoryFileName(histfname, newTLI);
|
|
XLogArchiveNotify(histfname);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Writes a history file for given timeline and contents.
|
|
*
|
|
* Currently this is only used in the walreceiver process, and so there are
|
|
* no locking considerations. But we should be just as tense as XLogFileInit
|
|
* to avoid emplacing a bogus file.
|
|
*/
|
|
void
|
|
writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
|
|
{
|
|
char path[MAXPGPATH];
|
|
char tmppath[MAXPGPATH];
|
|
int fd;
|
|
|
|
/*
|
|
* Write into a temp file name.
|
|
*/
|
|
snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
|
|
|
|
unlink(tmppath);
|
|
|
|
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
|
|
fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
|
|
if (fd < 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not create file \"%s\": %m", tmppath)));
|
|
|
|
errno = 0;
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
|
|
if ((int) write(fd, content, size) != size)
|
|
{
|
|
int save_errno = errno;
|
|
|
|
/*
|
|
* If we fail to make the file, delete it to release disk space
|
|
*/
|
|
unlink(tmppath);
|
|
/* if write didn't set errno, assume problem is no disk space */
|
|
errno = save_errno ? save_errno : ENOSPC;
|
|
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not write to file \"%s\": %m", tmppath)));
|
|
}
|
|
pgstat_report_wait_end();
|
|
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
|
|
if (pg_fsync(fd) != 0)
|
|
ereport(data_sync_elevel(ERROR),
|
|
(errcode_for_file_access(),
|
|
errmsg("could not fsync file \"%s\": %m", tmppath)));
|
|
pgstat_report_wait_end();
|
|
|
|
if (CloseTransientFile(fd) != 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not close file \"%s\": %m", tmppath)));
|
|
|
|
/*
|
|
* Now move the completed history file into place with its final name.
|
|
*/
|
|
TLHistoryFilePath(path, tli);
|
|
|
|
/*
|
|
* Perform the rename using link if available, paranoidly trying to avoid
|
|
* overwriting an existing file (there shouldn't be one).
|
|
*/
|
|
durable_rename_excl(tmppath, path, ERROR);
|
|
}
|
|
|
|
/*
|
|
* Returns true if 'expectedTLEs' contains a timeline with id 'tli'
|
|
*/
|
|
bool
|
|
tliInHistory(TimeLineID tli, List *expectedTLEs)
|
|
{
|
|
ListCell *cell;
|
|
|
|
foreach(cell, expectedTLEs)
|
|
{
|
|
if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Returns the ID of the timeline in use at a particular point in time, in
|
|
* the given timeline history.
|
|
*/
|
|
TimeLineID
|
|
tliOfPointInHistory(XLogRecPtr ptr, List *history)
|
|
{
|
|
ListCell *cell;
|
|
|
|
foreach(cell, history)
|
|
{
|
|
TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
|
|
|
|
if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
|
|
(XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
|
|
{
|
|
/* found it */
|
|
return tle->tli;
|
|
}
|
|
}
|
|
|
|
/* shouldn't happen. */
|
|
elog(ERROR, "timeline history was not contiguous");
|
|
return 0; /* keep compiler quiet */
|
|
}
|
|
|
|
/*
|
|
* Returns the point in history where we branched off the given timeline,
|
|
* and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
|
|
* the timeline is current, ie. we have not branched off from it, and throws
|
|
* an error if the timeline is not part of this server's history.
|
|
*/
|
|
XLogRecPtr
|
|
tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
|
|
{
|
|
ListCell *cell;
|
|
|
|
if (nextTLI)
|
|
*nextTLI = 0;
|
|
foreach(cell, history)
|
|
{
|
|
TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
|
|
|
|
if (tle->tli == tli)
|
|
return tle->end;
|
|
if (nextTLI)
|
|
*nextTLI = tle->tli;
|
|
}
|
|
|
|
ereport(ERROR,
|
|
(errmsg("requested timeline %u is not in this server's history",
|
|
tli)));
|
|
return InvalidXLogRecPtr; /* keep compiler quiet */
|
|
}
|