From 60a8b63d24321f5162dfcaf1502936c7ed00b63e Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Tue, 17 Jan 2017 17:29:15 +0900 Subject: [PATCH] Fix an assertion failure related to an exclusive backup. Previously multiple sessions could execute pg_start_backup() and pg_stop_backup() to start and stop an exclusive backup at the same time. This could trigger the assertion failure of "FailedAssertion("!(XLogCtl->Insert.exclusiveBackup)". This happend because, even while pg_start_backup() was starting an exclusive backup, other session could run pg_stop_backup() concurrently and mark the backup as not-in-progress unconditionally. This patch introduces ExclusiveBackupState indicating the state of an exclusive backup. This state is used to ensure that there is only one session running pg_start_backup() or pg_stop_backup() at the same time, to avoid the assertion failure. Back-patch to all supported versions. Author: Michael Paquier Reviewed-By: Kyotaro Horiguchi and me Reported-By: Andreas Seltenreich Discussion: <87mvktojme.fsf@credativ.de> --- src/backend/access/transam/xlog.c | 223 ++++++++++++++++++++---------- 1 file changed, 150 insertions(+), 73 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 18bfa2fc2c..ce89efc194 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -459,6 +459,29 @@ typedef union WALInsertLockPadded char pad[PG_CACHE_LINE_SIZE]; } WALInsertLockPadded; +/* + * State of an exclusive backup, necessary to control concurrent activities + * across sessions when working on exclusive backups. + * + * EXCLUSIVE_BACKUP_NONE means that there is no exclusive backup actually + * running, to be more precise pg_start_backup() is not being executed for + * an exclusive backup and there is no exclusive backup in progress. + * EXCLUSIVE_BACKUP_STARTING means that pg_start_backup() is starting an + * exclusive backup. + * EXCLUSIVE_BACKUP_IN_PROGRESS means that pg_start_backup() has finished + * running and an exclusive backup is in progress. pg_stop_backup() is + * needed to finish it. + * EXCLUSIVE_BACKUP_STOPPING means that pg_stop_backup() is stopping an + * exclusive backup. + */ +typedef enum ExclusiveBackupState +{ + EXCLUSIVE_BACKUP_NONE = 0, + EXCLUSIVE_BACKUP_STARTING, + EXCLUSIVE_BACKUP_IN_PROGRESS, + EXCLUSIVE_BACKUP_STOPPING +} ExclusiveBackupState; + /* * Shared state data for WAL insertion. */ @@ -500,13 +523,15 @@ typedef struct XLogCtlInsert bool fullPageWrites; /* - * exclusiveBackup is true if a backup started with pg_start_backup() is - * in progress, and nonExclusiveBackups is a counter indicating the number - * of streaming base backups currently in progress. forcePageWrites is set - * to true when either of these is non-zero. lastBackupStart is the latest - * checkpoint redo location used as a starting point for an online backup. + * exclusiveBackupState indicates the state of an exclusive backup + * (see comments of ExclusiveBackupState for more details). + * nonExclusiveBackups is a counter indicating the number of streaming + * base backups currently in progress. forcePageWrites is set to true + * when either of these is non-zero. lastBackupStart is the latest + * checkpoint redo location used as a starting point for an online + * backup. */ - bool exclusiveBackup; + ExclusiveBackupState exclusiveBackupState; int nonExclusiveBackups; XLogRecPtr lastBackupStart; @@ -845,6 +870,7 @@ static void xlog_outrec(StringInfo buf, XLogReaderState *record); #endif static void xlog_outdesc(StringInfo buf, XLogReaderState *record); static void pg_start_backup_callback(int code, Datum arg); +static void pg_stop_backup_callback(int code, Datum arg); static bool read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired, bool *backupFromStandby); static bool read_tablespace_map(List **tablespaces); @@ -9872,7 +9898,12 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, WALInsertLockAcquireExclusive(); if (exclusive) { - if (XLogCtl->Insert.exclusiveBackup) + /* + * At first, mark that we're now starting an exclusive backup, + * to ensure that there are no other sessions currently running + * pg_start_backup() or pg_stop_backup(). + */ + if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_NONE) { WALInsertLockRelease(); ereport(ERROR, @@ -9880,7 +9911,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, errmsg("a backup is already in progress"), errhint("Run pg_stop_backup() and try again."))); } - XLogCtl->Insert.exclusiveBackup = true; + XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STARTING; } else XLogCtl->Insert.nonExclusiveBackups++; @@ -10135,7 +10166,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, { /* * Check for existing backup label --- implies a backup is already - * running. (XXX given that we checked exclusiveBackup above, + * running. (XXX given that we checked exclusiveBackupState above, * maybe it would be OK to just unlink any such label file?) */ if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0) @@ -10216,6 +10247,16 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, } PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive)); + /* + * Mark that start phase has correctly finished for an exclusive backup. + */ + if (exclusive) + { + WALInsertLockAcquireExclusive(); + XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS; + WALInsertLockRelease(); + } + /* * We're done. As a convenience, return the starting WAL location. */ @@ -10234,8 +10275,8 @@ pg_start_backup_callback(int code, Datum arg) WALInsertLockAcquireExclusive(); if (exclusive) { - Assert(XLogCtl->Insert.exclusiveBackup); - XLogCtl->Insert.exclusiveBackup = false; + Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STARTING); + XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE; } else { @@ -10243,7 +10284,7 @@ pg_start_backup_callback(int code, Datum arg) XLogCtl->Insert.nonExclusiveBackups--; } - if (!XLogCtl->Insert.exclusiveBackup && + if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE && XLogCtl->Insert.nonExclusiveBackups == 0) { XLogCtl->Insert.forcePageWrites = false; @@ -10251,6 +10292,24 @@ pg_start_backup_callback(int code, Datum arg) WALInsertLockRelease(); } +/* + * Error cleanup callback for pg_stop_backup + */ +static void +pg_stop_backup_callback(int code, Datum arg) +{ + bool exclusive = DatumGetBool(arg); + + /* Update backup status on failure */ + WALInsertLockAcquireExclusive(); + if (exclusive) + { + Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STOPPING); + XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS; + } + WALInsertLockRelease(); +} + /* * do_pg_stop_backup is the workhorse of the user-visible pg_stop_backup() * function. @@ -10313,20 +10372,91 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) errmsg("WAL level not sufficient for making an online backup"), errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); - /* - * OK to update backup counters and forcePageWrites - */ - WALInsertLockAcquireExclusive(); if (exclusive) { - if (!XLogCtl->Insert.exclusiveBackup) + /* + * At first, mark that we're now stopping an exclusive backup, + * to ensure that there are no other sessions currently running + * pg_start_backup() or pg_stop_backup(). + */ + WALInsertLockAcquireExclusive(); + if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_IN_PROGRESS) { WALInsertLockRelease(); ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("exclusive backup not in progress"))); } - XLogCtl->Insert.exclusiveBackup = false; + XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STOPPING; + WALInsertLockRelease(); + + /* + * Remove backup_label. In case of failure, the state for an exclusive + * backup is switched back to in-progress. + */ + PG_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive)); + { + /* + * Read the existing label file into memory. + */ + struct stat statbuf; + int r; + + if (stat(BACKUP_LABEL_FILE, &statbuf)) + { + /* should not happen per the upper checks */ + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + BACKUP_LABEL_FILE))); + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("a backup is not in progress"))); + } + + lfp = AllocateFile(BACKUP_LABEL_FILE, "r"); + if (!lfp) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + BACKUP_LABEL_FILE))); + } + labelfile = palloc(statbuf.st_size + 1); + r = fread(labelfile, statbuf.st_size, 1, lfp); + labelfile[statbuf.st_size] = '\0'; + + /* + * Close and remove the backup label file + */ + if (r != 1 || ferror(lfp) || FreeFile(lfp)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + BACKUP_LABEL_FILE))); + if (unlink(BACKUP_LABEL_FILE) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", + BACKUP_LABEL_FILE))); + + /* + * Remove tablespace_map file if present, it is created only if there + * are tablespaces. + */ + unlink(TABLESPACE_MAP); + } + PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive)); + } + + /* + * OK to update backup counters and forcePageWrites + */ + WALInsertLockAcquireExclusive(); + if (exclusive) + { + XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE; } else { @@ -10340,66 +10470,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) XLogCtl->Insert.nonExclusiveBackups--; } - if (!XLogCtl->Insert.exclusiveBackup && + if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE && XLogCtl->Insert.nonExclusiveBackups == 0) { XLogCtl->Insert.forcePageWrites = false; } WALInsertLockRelease(); - if (exclusive) - { - /* - * Read the existing label file into memory. - */ - struct stat statbuf; - int r; - - if (stat(BACKUP_LABEL_FILE, &statbuf)) - { - if (errno != ENOENT) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - BACKUP_LABEL_FILE))); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("a backup is not in progress"))); - } - - lfp = AllocateFile(BACKUP_LABEL_FILE, "r"); - if (!lfp) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", - BACKUP_LABEL_FILE))); - } - labelfile = palloc(statbuf.st_size + 1); - r = fread(labelfile, statbuf.st_size, 1, lfp); - labelfile[statbuf.st_size] = '\0'; - - /* - * Close and remove the backup label file - */ - if (r != 1 || ferror(lfp) || FreeFile(lfp)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", - BACKUP_LABEL_FILE))); - if (unlink(BACKUP_LABEL_FILE) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not remove file \"%s\": %m", - BACKUP_LABEL_FILE))); - - /* - * Remove tablespace_map file if present, it is created only if there - * are tablespaces. - */ - unlink(TABLESPACE_MAP); - } - /* * Read and parse the START WAL LOCATION line (this code is pretty crude, * but we are not expecting any variability in the file format). @@ -10636,7 +10713,7 @@ do_pg_abort_backup(void) Assert(XLogCtl->Insert.nonExclusiveBackups > 0); XLogCtl->Insert.nonExclusiveBackups--; - if (!XLogCtl->Insert.exclusiveBackup && + if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE && XLogCtl->Insert.nonExclusiveBackups == 0) { XLogCtl->Insert.forcePageWrites = false;