postgresql/src/backend/access/transam/timeline.c

593 lines
15 KiB
C

/*-------------------------------------------------------------------------
*
* timeline.c
* Functions for reading and writing timeline history files.
*
* A timeline history file lists the timeline changes of the timeline, in
* a simple text format. They are archived along with the WAL segments.
*
* The files are named like "<tli>.history". For example, if the database
* starts up and switches to timeline 5, the timeline history file would be
* called "00000005.history".
*
* Each line in the file represents a timeline switch:
*
* <parentTLI> <switchpoint> <reason>
*
* parentTLI ID of the parent timeline
* switchpoint XLogRecPtr of the WAL location where the switch happened
* reason human-readable explanation of why the timeline was changed
*
* The fields are separated by tabs. Lines beginning with # are comments, and
* are ignored. Empty lines are also ignored.
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/backend/access/transam/timeline.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <sys/stat.h>
#include <unistd.h>
#include "access/timeline.h"
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xlogdefs.h"
#include "pgstat.h"
#include "storage/fd.h"
/*
* Copies all timeline history files with id's between 'begin' and 'end'
* from archive to pg_wal.
*/
void
restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
{
char path[MAXPGPATH];
char histfname[MAXFNAMELEN];
TimeLineID tli;
for (tli = begin; tli < end; tli++)
{
if (tli == 1)
continue;
TLHistoryFileName(histfname, tli);
if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
KeepFileRestoredFromArchive(path, histfname);
}
}
/*
* Try to read a timeline's history file.
*
* If successful, return the list of component TLIs (the given TLI followed by
* its ancestor TLIs). If we can't find the history file, assume that the
* timeline has no parents, and return a list of just the specified timeline
* ID.
*/
List *
readTimeLineHistory(TimeLineID targetTLI)
{
List *result;
char path[MAXPGPATH];
char histfname[MAXFNAMELEN];
FILE *fd;
TimeLineHistoryEntry *entry;
TimeLineID lasttli = 0;
XLogRecPtr prevend;
bool fromArchive = false;
/* Timeline 1 does not have a history file, so no need to check */
if (targetTLI == 1)
{
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
entry->tli = targetTLI;
entry->begin = entry->end = InvalidXLogRecPtr;
return list_make1(entry);
}
if (ArchiveRecoveryRequested)
{
TLHistoryFileName(histfname, targetTLI);
fromArchive =
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
}
else
TLHistoryFilePath(path, targetTLI);
fd = AllocateFile(path, "r");
if (fd == NULL)
{
if (errno != ENOENT)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", path)));
/* Not there, so assume no parents */
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
entry->tli = targetTLI;
entry->begin = entry->end = InvalidXLogRecPtr;
return list_make1(entry);
}
result = NIL;
/*
* Parse the file...
*/
prevend = InvalidXLogRecPtr;
for (;;)
{
char fline[MAXPGPATH];
char *res;
char *ptr;
TimeLineID tli;
uint32 switchpoint_hi;
uint32 switchpoint_lo;
int nfields;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
res = fgets(fline, sizeof(fline), fd);
pgstat_report_wait_end();
if (res == NULL)
{
if (ferror(fd))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", path)));
break;
}
/* skip leading whitespace and check for # comment */
for (ptr = fline; *ptr; ptr++)
{
if (!isspace((unsigned char) *ptr))
break;
}
if (*ptr == '\0' || *ptr == '#')
continue;
nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
if (nfields < 1)
{
/* expect a numeric timeline ID as first field of line */
ereport(FATAL,
(errmsg("syntax error in history file: %s", fline),
errhint("Expected a numeric timeline ID.")));
}
if (nfields != 3)
ereport(FATAL,
(errmsg("syntax error in history file: %s", fline),
errhint("Expected a write-ahead log switchpoint location.")));
if (result && tli <= lasttli)
ereport(FATAL,
(errmsg("invalid data in history file: %s", fline),
errhint("Timeline IDs must be in increasing sequence.")));
lasttli = tli;
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
entry->tli = tli;
entry->begin = prevend;
entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
prevend = entry->end;
/* Build list with newest item first */
result = lcons(entry, result);
/* we ignore the remainder of each line */
}
FreeFile(fd);
if (result && targetTLI <= lasttli)
ereport(FATAL,
(errmsg("invalid data in history file \"%s\"", path),
errhint("Timeline IDs must be less than child timeline's ID.")));
/*
* Create one more entry for the "tip" of the timeline, which has no entry
* in the history file.
*/
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
entry->tli = targetTLI;
entry->begin = prevend;
entry->end = InvalidXLogRecPtr;
result = lcons(entry, result);
/*
* If the history file was fetched from archive, save it in pg_wal for
* future reference.
*/
if (fromArchive)
KeepFileRestoredFromArchive(path, histfname);
return result;
}
/*
* Probe whether a timeline history file exists for the given timeline ID
*/
bool
existsTimeLineHistory(TimeLineID probeTLI)
{
char path[MAXPGPATH];
char histfname[MAXFNAMELEN];
FILE *fd;
/* Timeline 1 does not have a history file, so no need to check */
if (probeTLI == 1)
return false;
if (ArchiveRecoveryRequested)
{
TLHistoryFileName(histfname, probeTLI);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
}
else
TLHistoryFilePath(path, probeTLI);
fd = AllocateFile(path, "r");
if (fd != NULL)
{
FreeFile(fd);
return true;
}
else
{
if (errno != ENOENT)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", path)));
return false;
}
}
/*
* Find the newest existing timeline, assuming that startTLI exists.
*
* Note: while this is somewhat heuristic, it does positively guarantee
* that (result + 1) is not a known timeline, and therefore it should
* be safe to assign that ID to a new timeline.
*/
TimeLineID
findNewestTimeLine(TimeLineID startTLI)
{
TimeLineID newestTLI;
TimeLineID probeTLI;
/*
* The algorithm is just to probe for the existence of timeline history
* files. XXX is it useful to allow gaps in the sequence?
*/
newestTLI = startTLI;
for (probeTLI = startTLI + 1;; probeTLI++)
{
if (existsTimeLineHistory(probeTLI))
{
newestTLI = probeTLI; /* probeTLI exists */
}
else
{
/* doesn't exist, assume we're done */
break;
}
}
return newestTLI;
}
/*
* Create a new timeline history file.
*
* newTLI: ID of the new timeline
* parentTLI: ID of its immediate parent
* switchpoint: WAL location where the system switched to the new timeline
* reason: human-readable explanation of why the timeline was switched
*
* Currently this is only used at the end recovery, and so there are no locking
* considerations. But we should be just as tense as XLogFileInit to avoid
* emplacing a bogus file.
*/
void
writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
XLogRecPtr switchpoint, char *reason)
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
char histfname[MAXFNAMELEN];
char buffer[BLCKSZ];
int srcfd;
int fd;
int nbytes;
Assert(newTLI > parentTLI); /* else bad selection of newTLI */
/*
* Write into a temp file name.
*/
snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
unlink(tmppath);
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
if (fd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", tmppath)));
/*
* If a history file exists for the parent, copy it verbatim
*/
if (ArchiveRecoveryRequested)
{
TLHistoryFileName(histfname, parentTLI);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
}
else
TLHistoryFilePath(path, parentTLI);
srcfd = OpenTransientFile(path, O_RDONLY);
if (srcfd < 0)
{
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", path)));
/* Not there, so assume parent has no parents */
}
else
{
for (;;)
{
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
nbytes = (int) read(srcfd, buffer, sizeof(buffer));
pgstat_report_wait_end();
if (nbytes < 0 || errno != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", path)));
if (nbytes == 0)
break;
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
if ((int) write(fd, buffer, nbytes) != nbytes)
{
int save_errno = errno;
/*
* If we fail to make the file, delete it to release disk
* space
*/
unlink(tmppath);
/*
* if write didn't set errno, assume problem is no disk space
*/
errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
}
if (CloseTransientFile(srcfd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not close file \"%s\": %m", path)));
}
/*
* Append one line with the details of this timeline split.
*
* If we did have a parent file, insert an extra newline just in case the
* parent file failed to end with one.
*/
snprintf(buffer, sizeof(buffer),
"%s%u\t%X/%X\t%s\n",
(srcfd < 0) ? "" : "\n",
parentTLI,
LSN_FORMAT_ARGS(switchpoint),
reason);
nbytes = strlen(buffer);
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
if ((int) write(fd, buffer, nbytes) != nbytes)
{
int save_errno = errno;
/*
* If we fail to make the file, delete it to release disk space
*/
unlink(tmppath);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
if (pg_fsync(fd) != 0)
ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
if (CloseTransientFile(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not close file \"%s\": %m", tmppath)));
/*
* Now move the completed history file into place with its final name.
*/
TLHistoryFilePath(path, newTLI);
Assert(access(path, F_OK) != 0 && errno == ENOENT);
durable_rename(tmppath, path, ERROR);
/* The history file can be archived immediately. */
if (XLogArchivingActive())
{
TLHistoryFileName(histfname, newTLI);
XLogArchiveNotify(histfname);
}
}
/*
* Writes a history file for given timeline and contents.
*
* Currently this is only used in the walreceiver process, and so there are
* no locking considerations. But we should be just as tense as XLogFileInit
* to avoid emplacing a bogus file.
*/
void
writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
int fd;
/*
* Write into a temp file name.
*/
snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
unlink(tmppath);
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
if (fd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", tmppath)));
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
if ((int) write(fd, content, size) != size)
{
int save_errno = errno;
/*
* If we fail to make the file, delete it to release disk space
*/
unlink(tmppath);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
if (pg_fsync(fd) != 0)
ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
if (CloseTransientFile(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not close file \"%s\": %m", tmppath)));
/*
* Now move the completed history file into place with its final name,
* replacing any existing file with the same name.
*/
TLHistoryFilePath(path, tli);
durable_rename(tmppath, path, ERROR);
}
/*
* Returns true if 'expectedTLEs' contains a timeline with id 'tli'
*/
bool
tliInHistory(TimeLineID tli, List *expectedTLEs)
{
ListCell *cell;
foreach(cell, expectedTLEs)
{
if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
return true;
}
return false;
}
/*
* Returns the ID of the timeline in use at a particular point in time, in
* the given timeline history.
*/
TimeLineID
tliOfPointInHistory(XLogRecPtr ptr, List *history)
{
ListCell *cell;
foreach(cell, history)
{
TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
(XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
{
/* found it */
return tle->tli;
}
}
/* shouldn't happen. */
elog(ERROR, "timeline history was not contiguous");
return 0; /* keep compiler quiet */
}
/*
* Returns the point in history where we branched off the given timeline,
* and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
* the timeline is current, ie. we have not branched off from it, and throws
* an error if the timeline is not part of this server's history.
*/
XLogRecPtr
tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
{
ListCell *cell;
if (nextTLI)
*nextTLI = 0;
foreach(cell, history)
{
TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
if (tle->tli == tli)
return tle->end;
if (nextTLI)
*nextTLI = tle->tli;
}
ereport(ERROR,
(errmsg("requested timeline %u is not in this server's history",
tli)));
return InvalidXLogRecPtr; /* keep compiler quiet */
}