diff --git a/contrib/pg_standby/pg_standby.c b/contrib/pg_standby/pg_standby.c index 000dac5662..f2a7697178 100644 --- a/contrib/pg_standby/pg_standby.c +++ b/contrib/pg_standby/pg_standby.c @@ -1,5 +1,5 @@ /* - * $PostgreSQL: pgsql/contrib/pg_standby/pg_standby.c,v 1.21 2009/03/26 22:29:13 tgl Exp $ + * $PostgreSQL: pgsql/contrib/pg_standby/pg_standby.c,v 1.22 2009/05/14 20:31:09 heikki Exp $ * * * pg_standby.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #ifdef WIN32 @@ -52,7 +53,6 @@ int maxwaittime = 0; /* how long are we prepared to wait for? */ int keepfiles = 0; /* number of WAL files to keep, 0 keep all */ int maxretries = 3; /* number of retries on restore command */ bool debug = false; /* are we debugging? */ -bool triggered = false; /* have we been triggered? */ bool need_cleanup = false; /* do we need to remove files from * archive? */ @@ -69,6 +69,30 @@ char restoreCommand[MAXPGPATH]; /* run this to restore */ char exclusiveCleanupFileName[MAXPGPATH]; /* the file we need to * get from archive */ +/* + * Two types of failover are supported (smart and fast failover). + * + * The content of the trigger file determines the type of failover. If the + * trigger file contains the word "smart" (or the file is empty), smart + * failover is chosen: pg_standby acts as cp or ln command itself, on + * successful completion all the available WAL records will be applied + * resulting in zero data loss. But, it might take a long time to finish + * recovery if there's a lot of unapplied WAL. + * + * On the other hand, if the trigger file contains the word "fast", the + * recovery is finished immediately even if unapplied WAL files remain. Any + * transactions in the unapplied WAL files are lost. + * + * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers + * fast failover. A timeout causes fast failover (smart failover would have + * the same effect, since if the timeout is reached there is no unapplied WAL). + */ +#define NoFailover 0 +#define SmartFailover 1 +#define FastFailover 2 + +static int Failover = NoFailover; + #define RESTORE_COMMAND_COPY 0 #define RESTORE_COMMAND_LINK 1 int restoreCommandType; @@ -108,7 +132,6 @@ struct stat stat_buf; * * As an example, and probably the common case, we use either * cp/ln commands on *nix, or copy/move command on Windows. - * */ static void CustomizableInitialize(void) @@ -352,12 +375,16 @@ SetWALFileNameForCleanup(void) /* * CheckForExternalTrigger() * - * Is there a trigger file? + * Is there a trigger file? Sets global 'Failover' variable to indicate + * what kind of a trigger file it was. A "fast" trigger file is turned + * into a "smart" file as a side-effect. */ -static bool +static void CheckForExternalTrigger(void) { - int rc; + char buf[32]; + int fd; + int len; /* * Look for a trigger file, if that option has been selected @@ -365,28 +392,79 @@ CheckForExternalTrigger(void) * We use stat() here because triggerPath is always a file rather than * potentially being in an archive */ - if (triggerPath && stat(triggerPath, &stat_buf) == 0) + if (!triggerPath || stat(triggerPath, &stat_buf) != 0) + return; + + /* + * An empty trigger file performs smart failover. There's a little race + * condition here: if the writer of the trigger file has just created + * the file, but not yet written anything to it, we'll treat that as + * smart shutdown even if the other process was just about to write "fast" + * to it. But that's fine: we'll restore one more WAL file, and when we're + * invoked next time, we'll see the word "fast" and fail over immediately. + */ + if (stat_buf.st_size == 0) { - fprintf(stderr, "trigger file found\n"); + Failover = SmartFailover; + fprintf(stderr, "trigger file found: smart failover\n"); + fflush(stderr); + return; + } + + if ((fd = open(triggerPath, O_RDWR, 0)) < 0) + { + fprintf(stderr, "WARNING: could not open \"%s\": %s\n", + triggerPath, strerror(errno)); + fflush(stderr); + return; + } + + if ((len = read(fd, buf, sizeof(buf))) < 0) + { + fprintf(stderr, "WARNING: could not read \"%s\": %s\n", + triggerPath, strerror(errno)); + fflush(stderr); + close(fd); + return; + } + buf[len] = '\0'; + + if (strncmp(buf, "smart", 5) == 0) + { + Failover = SmartFailover; + fprintf(stderr, "trigger file found: smart failover\n"); + fflush(stderr); + close(fd); + return; + } + + if (strncmp(buf, "fast", 4) == 0) + { + Failover = FastFailover; + + fprintf(stderr, "trigger file found: fast failover\n"); fflush(stderr); /* - * If trigger file found, we *must* delete it. Here's why: When - * recovery completes, we will be asked again for the same file from - * the archive using pg_standby so must remove trigger file so we can - * reload file again and come up correctly. + * Turn it into a "smart" trigger by truncating the file. Otherwise + * if the server asks us again to restore a segment that was restored + * restored already, we would return "not found" and upset the server. */ - rc = unlink(triggerPath); - if (rc != 0) + if (ftruncate(fd, 0) < 0) { - fprintf(stderr, "\n ERROR: could not remove \"%s\": %s", triggerPath, strerror(errno)); + fprintf(stderr, "WARNING: could not read \"%s\": %s\n", + triggerPath, strerror(errno)); fflush(stderr); - exit(rc); } - return true; - } + close(fd); - return false; + return; + } + close(fd); + + fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath); + fflush(stderr); + return; } /* @@ -402,7 +480,7 @@ RestoreWALFileForRecovery(void) if (debug) { - fprintf(stderr, "\nrunning restore :"); + fprintf(stderr, "running restore :"); fflush(stderr); } @@ -413,7 +491,7 @@ RestoreWALFileForRecovery(void) { if (debug) { - fprintf(stderr, " OK"); + fprintf(stderr, " OK\n"); fflush(stderr); } return true; @@ -425,7 +503,7 @@ RestoreWALFileForRecovery(void) * Allow caller to add additional info */ if (debug) - fprintf(stderr, "not restored : "); + fprintf(stderr, "not restored\n"); return false; } @@ -552,8 +630,6 @@ main(int argc, char **argv) break; case 't': /* Trigger file */ triggerPath = optarg; - if (CheckForExternalTrigger()) - exit(1); /* Normal exit, with non-zero */ break; case 'w': /* Max wait time */ maxwaittime = atoi(optarg); @@ -633,20 +709,20 @@ main(int argc, char **argv) if (debug) { - fprintf(stderr, "\nTrigger file : %s", triggerPath ? triggerPath : ""); - fprintf(stderr, "\nWaiting for WAL file : %s", nextWALFileName); - fprintf(stderr, "\nWAL file path : %s", WALFilePath); - fprintf(stderr, "\nRestoring to... : %s", xlogFilePath); - fprintf(stderr, "\nSleep interval : %d second%s", + fprintf(stderr, "Trigger file : %s\n", triggerPath ? triggerPath : ""); + fprintf(stderr, "Waiting for WAL file : %s\n", nextWALFileName); + fprintf(stderr, "WAL file path : %s\n", WALFilePath); + fprintf(stderr, "Restoring to : %s\n", xlogFilePath); + fprintf(stderr, "Sleep interval : %d second%s\n", sleeptime, (sleeptime > 1 ? "s" : " ")); - fprintf(stderr, "\nMax wait interval : %d %s", + fprintf(stderr, "Max wait interval : %d %s\n", maxwaittime, (maxwaittime > 0 ? "seconds" : "forever")); - fprintf(stderr, "\nCommand for restore : %s", restoreCommand); - fprintf(stderr, "\nKeep archive history : "); + fprintf(stderr, "Command for restore : %s\n", restoreCommand); + fprintf(stderr, "Keep archive history : "); if (need_cleanup) - fprintf(stderr, "%s and later", exclusiveCleanupFileName); + fprintf(stderr, "%s and later\n", exclusiveCleanupFileName); else - fprintf(stderr, "No cleanup required"); + fprintf(stderr, "No cleanup required\n"); fflush(stderr); } @@ -676,56 +752,74 @@ main(int argc, char **argv) /* * Main wait loop */ - while (!CustomizableNextWALFileReady() && !triggered) + for (;;) { + /* Check for trigger file or signal first */ + CheckForExternalTrigger(); + if (signaled) + { + Failover = FastFailover; + if (debug) + { + fprintf(stderr, "signaled to exit: fast failover\n"); + fflush(stderr); + } + } + + /* + * Check for fast failover immediately, before checking if the + * requested WAL file is available + */ + if (Failover == FastFailover) + exit(1); + + if (CustomizableNextWALFileReady()) + { + /* + * Once we have restored this file successfully we can remove some + * prior WAL files. If this restore fails we musn't remove any file + * because some of them will be requested again immediately after + * the failed restore, or when we restart recovery. + */ + if (RestoreWALFileForRecovery()) + { + if (need_cleanup) + CustomizableCleanupPriorWALFiles(); + + exit(0); + } + else + { + /* Something went wrong in copying the file */ + exit(1); + } + } + + /* Check for smart failover if the next WAL file was not available */ + if (Failover == SmartFailover) + exit(1); + if (sleeptime <= 60) pg_usleep(sleeptime * 1000000L); - if (signaled) + waittime += sleeptime; + if (waittime >= maxwaittime && maxwaittime > 0) { - triggered = true; + Failover = FastFailover; if (debug) { - fprintf(stderr, "\nsignaled to exit\n"); + fprintf(stderr, "Timed out after %d seconds: fast failover\n", + waittime); fflush(stderr); } } - else + if (debug) { - - if (debug) - { - fprintf(stderr, "\nWAL file not present yet."); - if (triggerPath) - fprintf(stderr, " Checking for trigger file..."); - fflush(stderr); - } - - waittime += sleeptime; - - if (!triggered && (CheckForExternalTrigger() || (waittime >= maxwaittime && maxwaittime > 0))) - { - triggered = true; - if (debug && waittime >= maxwaittime && maxwaittime > 0) - fprintf(stderr, "\nTimed out after %d seconds\n", waittime); - } + fprintf(stderr, "WAL file not present yet."); + if (triggerPath) + fprintf(stderr, " Checking for trigger file..."); + fprintf(stderr, "\n"); + fflush(stderr); } } - - /* - * Action on exit - */ - if (triggered) - exit(1); /* Normal exit, with non-zero */ - - /* - * Once we have restored this file successfully we can remove some prior - * WAL files. If this restore fails we musn't remove any file because some - * of them will be requested again immediately after the failed restore, - * or when we restart recovery. - */ - if (RestoreWALFileForRecovery() && need_cleanup) - CustomizableCleanupPriorWALFiles(); - - return 0; } diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml index 016050664f..b018e2fdb4 100644 --- a/doc/src/sgml/backup.sgml +++ b/doc/src/sgml/backup.sgml @@ -1,4 +1,4 @@ - + Backup and Restore @@ -1126,6 +1126,29 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows + + recovery_end_command (string) + + + This parameter specifies a shell command that will be executed once only + at the end of recovery. This parameter is optional. The purpose of the + recovery_end_command is to provide a mechanism for cleanup following + replication or recovery. + Any %r is replaced by the name of the file + containing the last valid restart point. That is the earliest file that + must be kept to allow a restore to be restartable, so this information + can be used to truncate the archive to just the minimum required to + support restart of the current restore. %r would only be + used in a warm-standby configuration (see ). + Write %% to embed an actual % character + in the command. + If the command returns a non-zero exit status then a WARNING log + message will be written, unless signalled in which case we return + a FATAL error. + + + + recovery_target_time (timestamp) diff --git a/doc/src/sgml/pgstandby.sgml b/doc/src/sgml/pgstandby.sgml index 6b381c69ac..81e53b6a63 100644 --- a/doc/src/sgml/pgstandby.sgml +++ b/doc/src/sgml/pgstandby.sgml @@ -1,4 +1,4 @@ - + pg_standby @@ -92,6 +92,37 @@ pg_standby option ... archiv is specified, the archivelocation directory must be writable too. + + There are two ways to fail over a warm standby database server. + You control the type of failover with the contents of the trigger file: + + + + Smart Failover + + + In smart failover, the server is brought up after applying all + WAL files available in the archive. This results in zero data loss, + even if the standby server has fallen behind, but if there is a lot + unapplied WAL the recovery can take a long time. To trigger a smart + failover, create a trigger file containing the word smart, + or just leave it empty. + + + + + Fast Failover + + + In fast failover, the server is brought up immediately. Any WAL files + in the archive that have not yet been applied will be ignored, and + all transactions in those files are lost. To trigger a fast failover, + write the word fast into the trigger file. + + + + + <application>pg_standby</> options @@ -177,8 +208,7 @@ pg_standby option ... archiv -t triggerfile none - Specify a trigger file whose presence should cause recovery to end - whether or not the next WAL file is available. + Specify a trigger file whose presence should perform failover. It is recommended that you use a structured filename to avoid confusion as to which server is being triggered when multiple servers exist on the same system; for example @@ -190,7 +220,7 @@ pg_standby option ... archiv 0 Set the maximum number of seconds to wait for the next WAL file, - after which recovery will end and the standby will come up. + after which a fast failover will be performed. A setting of zero (the default) means wait forever. The default setting is not necessarily recommended; consult for discussion. @@ -210,6 +240,7 @@ pg_standby option ... archiv archive_command = 'cp %p .../archive/%f' restore_command = 'pg_standby -l -d -s 2 -t /tmp/pgsql.trigger.5442 .../archive %f %p %r 2>>standby.log' +recovery_end_command = 'rm -f /tmp/pgsql.trigger.5442' where the archive directory is physically located on the standby server, @@ -236,7 +267,13 @@ restore_command = 'pg_standby -l -d -s 2 -t /tmp/pgsql.trigger.5442 .../archive stop waiting only when a trigger file called - /tmp/pgsql.trigger.5442 appears + /tmp/pgsql.trigger.5442 appears, + and perform failover according to its content + + + + + remove the trigger file when recovery ends @@ -277,7 +314,8 @@ restore_command = 'pg_standby -d -s 5 -t C:\pgsql.trigger.5442 ...\archive %f %p stop waiting only when a trigger file called - C:\pgsql.trigger.5442 appears + C:\pgsql.trigger.5442 appears, + and perform failover according to its content diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 9b3fe5eafa..09b507500f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.337 2009/05/07 11:25:25 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.338 2009/05/14 20:31:09 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -147,6 +147,7 @@ static bool restoredFromArchive = false; /* options taken from recovery.conf */ static char *recoveryRestoreCommand = NULL; +static char *recoveryEndCommand = NULL; static bool recoveryTarget = false; static bool recoveryTargetExact = false; static bool recoveryTargetInclusive = true; @@ -463,6 +464,7 @@ static int XLogFileRead(uint32 log, uint32 seg, int emode); static void XLogFileClose(void); static bool RestoreArchivedFile(char *path, const char *xlogfname, const char *recovername, off_t expectedSize); +static void ExecuteRecoveryEndCommand(void); static void PreallocXlogFiles(XLogRecPtr endptr); static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr); static void ValidateXLOGDirectoryStructure(void); @@ -2849,6 +2851,114 @@ RestoreArchivedFile(char *path, const char *xlogfname, return false; } +/* + * Attempt to execute the recovery_end_command. + */ +static void +ExecuteRecoveryEndCommand(void) +{ + char xlogRecoveryEndCmd[MAXPGPATH]; + char lastRestartPointFname[MAXPGPATH]; + char *dp; + char *endp; + const char *sp; + int rc; + bool signaled; + uint32 restartLog; + uint32 restartSeg; + + Assert(recoveryEndCommand); + + /* + * Calculate the archive file cutoff point for use during log shipping + * replication. All files earlier than this point can be deleted + * from the archive, though there is no requirement to do so. + * + * We initialise this with the filename of an InvalidXLogRecPtr, which + * will prevent the deletion of any WAL files from the archive + * because of the alphabetic sorting property of WAL filenames. + * + * Once we have successfully located the redo pointer of the checkpoint + * from which we start recovery we never request a file prior to the redo + * pointer of the last restartpoint. When redo begins we know that we + * have successfully located it, so there is no need for additional + * status flags to signify the point when we can begin deleting WAL files + * from the archive. + */ + if (InRedo) + { + XLByteToSeg(ControlFile->checkPointCopy.redo, + restartLog, restartSeg); + XLogFileName(lastRestartPointFname, + ControlFile->checkPointCopy.ThisTimeLineID, + restartLog, restartSeg); + } + else + XLogFileName(lastRestartPointFname, 0, 0, 0); + + /* + * construct the command to be executed + */ + dp = xlogRecoveryEndCmd; + endp = xlogRecoveryEndCmd + MAXPGPATH - 1; + *endp = '\0'; + + for (sp = recoveryEndCommand; *sp; sp++) + { + if (*sp == '%') + { + switch (sp[1]) + { + case 'r': + /* %r: filename of last restartpoint */ + sp++; + StrNCpy(dp, lastRestartPointFname, endp - dp); + dp += strlen(dp); + break; + case '%': + /* convert %% to a single % */ + sp++; + if (dp < endp) + *dp++ = *sp; + break; + default: + /* otherwise treat the % as not special */ + if (dp < endp) + *dp++ = *sp; + break; + } + } + else + { + if (dp < endp) + *dp++ = *sp; + } + } + *dp = '\0'; + + ereport(DEBUG3, + (errmsg_internal("executing recovery end command \"%s\"", + xlogRecoveryEndCmd))); + + /* + * Copy xlog from archival storage to XLOGDIR + */ + rc = system(xlogRecoveryEndCmd); + if (rc != 0) + { + /* + * If the failure was due to any sort of signal, it's best to punt and + * abort recovery. See also detailed comments on signals in + * RestoreArchivedFile(). + */ + signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; + + ereport(signaled ? FATAL : WARNING, + (errmsg("recovery_end_command \"%s\": return code %d", + xlogRecoveryEndCmd, rc))); + } +} + /* * Preallocate log files beyond the specified log endpoint. * @@ -4664,6 +4774,13 @@ readRecoveryCommandFile(void) (errmsg("restore_command = '%s'", recoveryRestoreCommand))); } + else if (strcmp(tok1, "recovery_end_command") == 0) + { + recoveryEndCommand = pstrdup(tok2); + ereport(LOG, + (errmsg("recovery_end_command = '%s'", + recoveryEndCommand))); + } else if (strcmp(tok1, "recovery_target_timeline") == 0) { rtliGiven = true; @@ -5622,6 +5739,9 @@ StartupXLOG(void) * allows some extra error checking in xlog_redo. */ CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); + + if (recoveryEndCommand) + ExecuteRecoveryEndCommand(); } /*