/*------------------------------------------------------------------------- * * pg_rewind.c * Synchronizes a PostgreSQL data directory to a new timeline * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ #include "postgres_fe.h" #include #include #include #include #include "access/timeline.h" #include "access/xlog_internal.h" #include "catalog/catversion.h" #include "catalog/pg_control.h" #include "common/controldata_utils.h" #include "common/file_perm.h" #include "common/restricted_token.h" #include "common/string.h" #include "fe_utils/option_utils.h" #include "fe_utils/recovery_gen.h" #include "fe_utils/string_utils.h" #include "file_ops.h" #include "filemap.h" #include "getopt_long.h" #include "pg_rewind.h" #include "rewind_source.h" #include "storage/bufpage.h" static void usage(const char *progname); static void perform_rewind(filemap_t *filemap, rewind_source *source, XLogRecPtr chkptrec, TimeLineID chkpttli, XLogRecPtr chkptredo); static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc); static void digestControlFile(ControlFileData *ControlFile, const char *content, size_t size); static void getRestoreCommand(const char *argv0); static void sanityChecks(void); static TimeLineHistoryEntry *getTimelineHistory(TimeLineID tli, bool is_source, int *nentries); static void findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, int a_nentries, TimeLineHistoryEntry *b_history, int b_nentries, XLogRecPtr *recptr, int *tliIndex); static void ensureCleanShutdown(const char *argv0); static void disconnect_atexit(void); static ControlFileData ControlFile_target; static ControlFileData ControlFile_source; static ControlFileData ControlFile_source_after; const char *progname; int WalSegSz; /* Configuration options */ char *datadir_target = NULL; char *datadir_source = NULL; char *connstr_source = NULL; char *restore_command = NULL; char *config_file = NULL; static bool debug = false; bool showprogress = false; bool dry_run = false; bool do_sync = true; bool restore_wal = false; DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; /* Target history */ TimeLineHistoryEntry *targetHistory; int targetNentries; /* Progress counters */ uint64 fetch_size; uint64 fetch_done; static PGconn *conn; static rewind_source *source; static void usage(const char *progname) { printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname); printf(_("Usage:\n %s [OPTION]...\n\n"), progname); printf(_("Options:\n")); printf(_(" -c, --restore-target-wal use restore_command in target configuration to\n" " retrieve WAL files from archives\n")); printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n")); printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n")); printf(_(" --source-server=CONNSTR source server to synchronize with\n")); printf(_(" -n, --dry-run stop before modifying anything\n")); printf(_(" -N, --no-sync do not wait for changes to be written\n" " safely to disk\n")); printf(_(" -P, --progress write progress messages\n")); printf(_(" -R, --write-recovery-conf write configuration for replication\n" " (requires --source-server)\n")); printf(_(" --config-file=FILENAME use specified main server configuration\n" " file when running target cluster\n")); printf(_(" --debug write a lot of debug messages\n")); printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n")); printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); } int main(int argc, char **argv) { static struct option long_options[] = { {"help", no_argument, NULL, '?'}, {"target-pgdata", required_argument, NULL, 'D'}, {"write-recovery-conf", no_argument, NULL, 'R'}, {"source-pgdata", required_argument, NULL, 1}, {"source-server", required_argument, NULL, 2}, {"no-ensure-shutdown", no_argument, NULL, 4}, {"config-file", required_argument, NULL, 5}, {"version", no_argument, NULL, 'V'}, {"restore-target-wal", no_argument, NULL, 'c'}, {"dry-run", no_argument, NULL, 'n'}, {"no-sync", no_argument, NULL, 'N'}, {"progress", no_argument, NULL, 'P'}, {"debug", no_argument, NULL, 3}, {"sync-method", required_argument, NULL, 6}, {NULL, 0, NULL, 0} }; int option_index; int c; XLogRecPtr divergerec; int lastcommontliIndex; XLogRecPtr chkptrec; TimeLineID chkpttli; XLogRecPtr chkptredo; TimeLineID source_tli; TimeLineID target_tli; XLogRecPtr target_wal_endrec; size_t size; char *buffer; bool no_ensure_shutdown = false; bool rewind_needed; bool writerecoveryconf = false; filemap_t *filemap; pg_logging_init(argv[0]); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind")); progname = get_progname(argv[0]); /* Process command-line arguments */ if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { usage(progname); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { puts("pg_rewind (PostgreSQL) " PG_VERSION); exit(0); } } while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1) { switch (c) { case 'c': restore_wal = true; break; case 'P': showprogress = true; break; case 'n': dry_run = true; break; case 'N': do_sync = false; break; case 'R': writerecoveryconf = true; break; case 3: debug = true; pg_logging_increase_verbosity(); break; case 'D': /* -D or --target-pgdata */ datadir_target = pg_strdup(optarg); break; case 1: /* --source-pgdata */ datadir_source = pg_strdup(optarg); break; case 2: /* --source-server */ connstr_source = pg_strdup(optarg); break; case 4: no_ensure_shutdown = true; break; case 5: config_file = pg_strdup(optarg); break; case 6: if (!parse_sync_method(optarg, &sync_method)) exit(1); break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } } if (datadir_source == NULL && connstr_source == NULL) { pg_log_error("no source specified (--source-pgdata or --source-server)"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } if (datadir_source != NULL && connstr_source != NULL) { pg_log_error("only one of --source-pgdata or --source-server can be specified"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } if (datadir_target == NULL) { pg_log_error("no target data directory specified (--target-pgdata)"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } if (writerecoveryconf && connstr_source == NULL) { pg_log_error("no source server information (--source-server) specified for --write-recovery-conf"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } if (optind < argc) { pg_log_error("too many command-line arguments (first is \"%s\")", argv[optind]); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } /* * Don't allow pg_rewind to be run as root, to avoid overwriting the * ownership of files in the data directory. We need only check for root * -- any other user won't have sufficient permissions to modify files in * the data directory. */ #ifndef WIN32 if (geteuid() == 0) { pg_log_error("cannot be executed by \"root\""); pg_log_error_hint("You must run %s as the PostgreSQL superuser.", progname); exit(1); } #endif get_restricted_token(); /* Set mask based on PGDATA permissions */ if (!GetDataDirectoryCreatePerm(datadir_target)) pg_fatal("could not read permissions of directory \"%s\": %m", datadir_target); umask(pg_mode_mask); getRestoreCommand(argv[0]); atexit(disconnect_atexit); /* * Ok, we have all the options and we're ready to start. First, connect to * remote server. */ if (connstr_source) { conn = PQconnectdb(connstr_source); if (PQstatus(conn) == CONNECTION_BAD) pg_fatal("%s", PQerrorMessage(conn)); if (showprogress) pg_log_info("connected to server"); source = init_libpq_source(conn); } else source = init_local_source(datadir_source); /* * Check the status of the target instance. * * If the target instance was not cleanly shut down, start and stop the * target cluster once in single-user mode to enforce recovery to finish, * ensuring that the cluster can be used by pg_rewind. Note that if * no_ensure_shutdown is specified, pg_rewind ignores this step, and users * need to make sure by themselves that the target cluster is in a clean * state. */ buffer = slurpFile(datadir_target, "global/pg_control", &size); digestControlFile(&ControlFile_target, buffer, size); pg_free(buffer); if (!no_ensure_shutdown && ControlFile_target.state != DB_SHUTDOWNED && ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY) { ensureCleanShutdown(argv[0]); buffer = slurpFile(datadir_target, "global/pg_control", &size); digestControlFile(&ControlFile_target, buffer, size); pg_free(buffer); } buffer = source->fetch_file(source, "global/pg_control", &size); digestControlFile(&ControlFile_source, buffer, size); pg_free(buffer); sanityChecks(); /* * Usually, the TLI can be found in the latest checkpoint record. But if * the source server is just being promoted (or it's a standby that's * following a primary that's just being promoted), and the checkpoint * requested by the promotion hasn't completed yet, the latest timeline is * in minRecoveryPoint. So we check which is later, the TLI of the * minRecoveryPoint or the latest checkpoint. */ source_tli = Max(ControlFile_source.minRecoveryPointTLI, ControlFile_source.checkPointCopy.ThisTimeLineID); /* Similarly for the target. */ target_tli = Max(ControlFile_target.minRecoveryPointTLI, ControlFile_target.checkPointCopy.ThisTimeLineID); /* * Find the common ancestor timeline between the clusters. * * If both clusters are already on the same timeline, there's nothing to * do. */ if (target_tli == source_tli) { pg_log_info("source and target cluster are on the same timeline"); rewind_needed = false; target_wal_endrec = 0; } else { XLogRecPtr chkptendrec; TimeLineHistoryEntry *sourceHistory; int sourceNentries; /* * Retrieve timelines for both source and target, and find the point * where they diverged. */ sourceHistory = getTimelineHistory(source_tli, true, &sourceNentries); targetHistory = getTimelineHistory(target_tli, false, &targetNentries); findCommonAncestorTimeline(sourceHistory, sourceNentries, targetHistory, targetNentries, &divergerec, &lastcommontliIndex); pg_log_info("servers diverged at WAL location %X/%X on timeline %u", LSN_FORMAT_ARGS(divergerec), targetHistory[lastcommontliIndex].tli); /* * Don't need the source history anymore. The target history is still * needed by the routines in parsexlog.c, when we read the target WAL. */ pfree(sourceHistory); /* * Determine the end-of-WAL on the target. * * The WAL ends at the last shutdown checkpoint, or at * minRecoveryPoint if it was a standby. (If we supported rewinding a * server that was not shut down cleanly, we would need to replay * until we reach the first invalid record, like crash recovery does.) */ /* read the checkpoint record on the target to see where it ends. */ chkptendrec = readOneRecord(datadir_target, ControlFile_target.checkPoint, targetNentries - 1, restore_command); if (ControlFile_target.minRecoveryPoint > chkptendrec) { target_wal_endrec = ControlFile_target.minRecoveryPoint; } else { target_wal_endrec = chkptendrec; } /* * Check for the possibility that the target is in fact a direct * ancestor of the source. In that case, there is no divergent history * in the target that needs rewinding. */ if (target_wal_endrec > divergerec) { rewind_needed = true; } else { /* the last common checkpoint record must be part of target WAL */ Assert(target_wal_endrec == divergerec); rewind_needed = false; } } if (!rewind_needed) { pg_log_info("no rewind required"); if (writerecoveryconf && !dry_run) WriteRecoveryConfig(conn, datadir_target, GenerateRecoveryConfig(conn, NULL, NULL)); exit(0); } findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex, &chkptrec, &chkpttli, &chkptredo, restore_command); pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u", LSN_FORMAT_ARGS(chkptrec), chkpttli); /* Initialize the hash table to track the status of each file */ filehash_init(); /* * Collect information about all files in the both data directories. */ if (showprogress) pg_log_info("reading source file list"); source->traverse_files(source, &process_source_file); if (showprogress) pg_log_info("reading target file list"); traverse_datadir(datadir_target, &process_target_file); /* * Read the target WAL from last checkpoint before the point of fork, to * extract all the pages that were modified on the target cluster after * the fork. */ if (showprogress) pg_log_info("reading WAL in target"); extractPageMap(datadir_target, chkptrec, lastcommontliIndex, target_wal_endrec, restore_command); /* * We have collected all information we need from both systems. Decide * what to do with each file. */ filemap = decide_file_actions(); if (showprogress) calculate_totals(filemap); /* this is too verbose even for verbose mode */ if (debug) print_filemap(filemap); /* * Ok, we're ready to start copying things over. */ if (showprogress) { pg_log_info("need to copy %lu MB (total source directory size is %lu MB)", (unsigned long) (filemap->fetch_size / (1024 * 1024)), (unsigned long) (filemap->total_size / (1024 * 1024))); fetch_size = filemap->fetch_size; fetch_done = 0; } /* * We have now collected all the information we need from both systems, * and we are ready to start modifying the target directory. * * This is the point of no return. Once we start copying things, there is * no turning back! */ perform_rewind(filemap, source, chkptrec, chkpttli, chkptredo); if (showprogress) pg_log_info("syncing target data directory"); sync_target_dir(); /* Also update the standby configuration, if requested. */ if (writerecoveryconf && !dry_run) WriteRecoveryConfig(conn, datadir_target, GenerateRecoveryConfig(conn, NULL, NULL)); /* don't need the source connection anymore */ source->destroy(source); if (conn) { PQfinish(conn); conn = NULL; } pg_log_info("Done!"); return 0; } /* * Perform the rewind. * * We have already collected all the information we need from the * target and the source. */ static void perform_rewind(filemap_t *filemap, rewind_source *source, XLogRecPtr chkptrec, TimeLineID chkpttli, XLogRecPtr chkptredo) { XLogRecPtr endrec; TimeLineID endtli; ControlFileData ControlFile_new; size_t size; char *buffer; /* * Execute the actions in the file map, fetching data from the source * system as needed. */ for (int i = 0; i < filemap->nentries; i++) { file_entry_t *entry = filemap->entries[i]; /* * If this is a relation file, copy the modified blocks. * * This is in addition to any other changes. */ if (entry->target_pages_to_overwrite.bitmapsize > 0) { datapagemap_iterator_t *iter; BlockNumber blkno; off_t offset; iter = datapagemap_iterate(&entry->target_pages_to_overwrite); while (datapagemap_next(iter, &blkno)) { offset = blkno * BLCKSZ; source->queue_fetch_range(source, entry->path, offset, BLCKSZ); } pg_free(iter); } switch (entry->action) { case FILE_ACTION_NONE: /* nothing else to do */ break; case FILE_ACTION_COPY: source->queue_fetch_file(source, entry->path, entry->source_size); break; case FILE_ACTION_TRUNCATE: truncate_target_file(entry->path, entry->source_size); break; case FILE_ACTION_COPY_TAIL: source->queue_fetch_range(source, entry->path, entry->target_size, entry->source_size - entry->target_size); break; case FILE_ACTION_REMOVE: remove_target(entry); break; case FILE_ACTION_CREATE: create_target(entry); break; case FILE_ACTION_UNDECIDED: pg_fatal("no action decided for file \"%s\"", entry->path); break; } } /* Complete any remaining range-fetches that we queued up above. */ source->finish_fetch(source); close_target_file(); progress_report(true); /* * Fetch the control file from the source last. This ensures that the * minRecoveryPoint is up-to-date. */ buffer = source->fetch_file(source, "global/pg_control", &size); digestControlFile(&ControlFile_source_after, buffer, size); pg_free(buffer); /* * Sanity check: If the source is a local system, the control file should * not have changed since we started. * * XXX: We assume it hasn't been modified, but actually, what could go * wrong? The logic handles a libpq source that's modified concurrently, * why not a local datadir? */ if (datadir_source && memcmp(&ControlFile_source, &ControlFile_source_after, sizeof(ControlFileData)) != 0) { pg_fatal("source system was modified while pg_rewind was running"); } if (showprogress) pg_log_info("creating backup label and updating control file"); /* * Create a backup label file, to tell the target where to begin the WAL * replay. Normally, from the last common checkpoint between the source * and the target. But if the source is a standby server, it's possible * that the last common checkpoint is *after* the standby's restartpoint. * That implies that the source server has applied the checkpoint record, * but hasn't performed a corresponding restartpoint yet. Make sure we * start at the restartpoint's redo point in that case. * * Use the old version of the source's control file for this. The server * might have finished the restartpoint after we started copying files, * but we must begin from the redo point at the time that started copying. */ if (ControlFile_source.checkPointCopy.redo < chkptredo) { chkptredo = ControlFile_source.checkPointCopy.redo; chkpttli = ControlFile_source.checkPointCopy.ThisTimeLineID; chkptrec = ControlFile_source.checkPoint; } createBackupLabel(chkptredo, chkpttli, chkptrec); /* * Update control file of target, to tell the target how far it must * replay the WAL (minRecoveryPoint). */ if (connstr_source) { /* * The source is a live server. Like in an online backup, it's * important that we recover all the WAL that was generated while we * were copying files. */ if (ControlFile_source_after.state == DB_IN_ARCHIVE_RECOVERY) { /* * Source is a standby server. We must replay to its * minRecoveryPoint. */ endrec = ControlFile_source_after.minRecoveryPoint; endtli = ControlFile_source_after.minRecoveryPointTLI; } else { /* * Source is a production, non-standby, server. We must replay to * the last WAL insert location. */ if (ControlFile_source_after.state != DB_IN_PRODUCTION) pg_fatal("source system was in unexpected state at end of rewind"); endrec = source->get_current_wal_insert_lsn(source); endtli = Max(ControlFile_source_after.checkPointCopy.ThisTimeLineID, ControlFile_source_after.minRecoveryPointTLI); } } else { /* * Source is a local data directory. It should've shut down cleanly, * and we must replay to the latest shutdown checkpoint. */ endrec = ControlFile_source_after.checkPoint; endtli = ControlFile_source_after.checkPointCopy.ThisTimeLineID; } memcpy(&ControlFile_new, &ControlFile_source_after, sizeof(ControlFileData)); ControlFile_new.minRecoveryPoint = endrec; ControlFile_new.minRecoveryPointTLI = endtli; ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY; if (!dry_run) update_controlfile(datadir_target, &ControlFile_new, do_sync); } static void sanityChecks(void) { /* TODO Check that there's no backup_label in either cluster */ /* Check system_identifier match */ if (ControlFile_target.system_identifier != ControlFile_source.system_identifier) pg_fatal("source and target clusters are from different systems"); /* check version */ if (ControlFile_target.pg_control_version != PG_CONTROL_VERSION || ControlFile_source.pg_control_version != PG_CONTROL_VERSION || ControlFile_target.catalog_version_no != CATALOG_VERSION_NO || ControlFile_source.catalog_version_no != CATALOG_VERSION_NO) { pg_fatal("clusters are not compatible with this version of pg_rewind"); } /* * Target cluster need to use checksums or hint bit wal-logging, this to * prevent from data corruption that could occur because of hint bits. */ if (ControlFile_target.data_checksum_version != PG_DATA_CHECKSUM_VERSION && !ControlFile_target.wal_log_hints) { pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\""); } /* * Target cluster better not be running. This doesn't guard against * someone starting the cluster concurrently. Also, this is probably more * strict than necessary; it's OK if the target node was not shut down * cleanly, as long as it isn't running at the moment. */ if (ControlFile_target.state != DB_SHUTDOWNED && ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY) pg_fatal("target server must be shut down cleanly"); /* * When the source is a data directory, also require that the source * server is shut down. There isn't any very strong reason for this * limitation, but better safe than sorry. */ if (datadir_source && ControlFile_source.state != DB_SHUTDOWNED && ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY) pg_fatal("source data directory must be shut down cleanly"); } /* * Print a progress report based on the fetch_size and fetch_done variables. * * Progress report is written at maximum once per second, except that the * last progress report is always printed. * * If finished is set to true, this is the last progress report. The cursor * is moved to the next line. */ void progress_report(bool finished) { static pg_time_t last_progress_report = 0; int percent; char fetch_done_str[32]; char fetch_size_str[32]; pg_time_t now; if (!showprogress) return; now = time(NULL); if (now == last_progress_report && !finished) return; /* Max once per second */ last_progress_report = now; percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0; /* * Avoid overflowing past 100% or the full size. This may make the total * size number change as we approach the end of the backup (the estimate * will always be wrong if WAL is included), but that's better than having * the done column be bigger than the total. */ if (percent > 100) percent = 100; if (fetch_done > fetch_size) fetch_size = fetch_done; snprintf(fetch_done_str, sizeof(fetch_done_str), UINT64_FORMAT, fetch_done / 1024); snprintf(fetch_size_str, sizeof(fetch_size_str), UINT64_FORMAT, fetch_size / 1024); fprintf(stderr, _("%*s/%s kB (%d%%) copied"), (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str, percent); /* * Stay on the same line if reporting to a terminal and we're not done * yet. */ fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr); } /* * Find minimum from two WAL locations assuming InvalidXLogRecPtr means * infinity as src/include/access/timeline.h states. This routine should * be used only when comparing WAL locations related to history files. */ static XLogRecPtr MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b) { if (XLogRecPtrIsInvalid(a)) return b; else if (XLogRecPtrIsInvalid(b)) return a; else return Min(a, b); } /* * Retrieve timeline history for the source or target system. */ static TimeLineHistoryEntry * getTimelineHistory(TimeLineID tli, bool is_source, int *nentries) { TimeLineHistoryEntry *history; /* * Timeline 1 does not have a history file, so there is no need to check * and fake an entry with infinite start and end positions. */ if (tli == 1) { history = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry)); history->tli = tli; history->begin = history->end = InvalidXLogRecPtr; *nentries = 1; } else { char path[MAXPGPATH]; char *histfile; TLHistoryFilePath(path, tli); /* Get history file from appropriate source */ if (is_source) histfile = source->fetch_file(source, path, NULL); else histfile = slurpFile(datadir_target, path, NULL); history = rewind_parseTimeLineHistory(histfile, tli, nentries); pg_free(histfile); } if (debug) { int i; if (is_source) pg_log_debug("Source timeline history:"); else pg_log_debug("Target timeline history:"); /* * Print the target timeline history. */ for (i = 0; i < targetNentries; i++) { TimeLineHistoryEntry *entry; entry = &history[i]; pg_log_debug("%u: %X/%X - %X/%X", entry->tli, LSN_FORMAT_ARGS(entry->begin), LSN_FORMAT_ARGS(entry->end)); } } return history; } /* * Determine the TLI of the last common timeline in the timeline history of * two clusters. *tliIndex is set to the index of last common timeline in * the arrays, and *recptr is set to the position where the timeline history * diverged (ie. the first WAL record that's not the same in both clusters). */ static void findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, int a_nentries, TimeLineHistoryEntry *b_history, int b_nentries, XLogRecPtr *recptr, int *tliIndex) { int i, n; /* * Trace the history forward, until we hit the timeline diverge. It may * still be possible that the source and target nodes used the same * timeline number in their history but with different start position * depending on the history files that each node has fetched in previous * recovery processes. Hence check the start position of the new timeline * as well and move down by one extra timeline entry if they do not match. */ n = Min(a_nentries, b_nentries); for (i = 0; i < n; i++) { if (a_history[i].tli != b_history[i].tli || a_history[i].begin != b_history[i].begin) break; } if (i > 0) { i--; *recptr = MinXLogRecPtr(a_history[i].end, b_history[i].end); *tliIndex = i; return; } else { pg_fatal("could not find common ancestor of the source and target cluster's timelines"); } } /* * Create a backup_label file that forces recovery to begin at the last common * checkpoint. */ static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc) { XLogSegNo startsegno; time_t stamp_time; char strfbuf[128]; char xlogfilename[MAXFNAMELEN]; struct tm *tmp; char buf[1000]; int len; XLByteToSeg(startpoint, startsegno, WalSegSz); XLogFileName(xlogfilename, starttli, startsegno, WalSegSz); /* * Construct backup label file */ stamp_time = time(NULL); tmp = localtime(&stamp_time); strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp); len = snprintf(buf, sizeof(buf), "START WAL LOCATION: %X/%X (file %s)\n" "CHECKPOINT LOCATION: %X/%X\n" "BACKUP METHOD: pg_rewind\n" "BACKUP FROM: standby\n" "START TIME: %s\n", /* omit LABEL: line */ LSN_FORMAT_ARGS(startpoint), xlogfilename, LSN_FORMAT_ARGS(checkpointloc), strfbuf); if (len >= sizeof(buf)) pg_fatal("backup label buffer too small"); /* shouldn't happen */ /* TODO: move old file out of the way, if any. */ open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */ write_target_range(buf, 0, len); close_target_file(); } /* * Check CRC of control file */ static void checkControlFile(ControlFileData *ControlFile) { pg_crc32c crc; /* Calculate CRC */ INIT_CRC32C(crc); COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc)); FIN_CRC32C(crc); /* And simply compare it */ if (!EQ_CRC32C(crc, ControlFile->crc)) pg_fatal("unexpected control file CRC"); } /* * Verify control file contents in the buffer 'content', and copy it to * *ControlFile. */ static void digestControlFile(ControlFileData *ControlFile, const char *content, size_t size) { if (size != PG_CONTROL_FILE_SIZE) pg_fatal("unexpected control file size %d, expected %d", (int) size, PG_CONTROL_FILE_SIZE); memcpy(ControlFile, content, sizeof(ControlFileData)); /* set and validate WalSegSz */ WalSegSz = ControlFile->xlog_seg_size; if (!IsValidWalSegSize(WalSegSz)) { pg_log_error(ngettext("invalid WAL segment size in control file (%d byte)", "invalid WAL segment size in control file (%d bytes)", WalSegSz), WalSegSz); pg_log_error_detail("The WAL segment size must be a power of two between 1 MB and 1 GB."); exit(1); } /* Additional checks on control file */ checkControlFile(ControlFile); } /* * Get value of GUC parameter restore_command from the target cluster. * * This uses a logic based on "postgres -C" to get the value from the * cluster. */ static void getRestoreCommand(const char *argv0) { int rc; char postgres_exec_path[MAXPGPATH]; PQExpBuffer postgres_cmd; if (!restore_wal) return; /* find postgres executable */ rc = find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR, postgres_exec_path); if (rc < 0) { char full_path[MAXPGPATH]; if (find_my_exec(argv0, full_path) < 0) strlcpy(full_path, progname, sizeof(full_path)); if (rc == -1) pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"", "postgres", progname, full_path); else pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s", "postgres", full_path, progname); } /* * Build a command able to retrieve the value of GUC parameter * restore_command, if set. */ postgres_cmd = createPQExpBuffer(); /* path to postgres, properly quoted */ appendShellString(postgres_cmd, postgres_exec_path); /* add -D switch, with properly quoted data directory */ appendPQExpBufferStr(postgres_cmd, " -D "); appendShellString(postgres_cmd, datadir_target); /* add custom configuration file only if requested */ if (config_file != NULL) { appendPQExpBufferStr(postgres_cmd, " -c config_file="); appendShellString(postgres_cmd, config_file); } /* add -C switch, for restore_command */ appendPQExpBufferStr(postgres_cmd, " -C restore_command"); restore_command = pipe_read_line(postgres_cmd->data); if (restore_command == NULL) pg_fatal("unable to read restore_command from target cluster"); (void) pg_strip_crlf(restore_command); if (strcmp(restore_command, "") == 0) pg_fatal("restore_command is not set in the target cluster"); pg_log_debug("using for rewind restore_command = \'%s\'", restore_command); destroyPQExpBuffer(postgres_cmd); } /* * Ensure clean shutdown of target instance by launching single-user mode * postgres to do crash recovery. */ static void ensureCleanShutdown(const char *argv0) { int ret; char exec_path[MAXPGPATH]; PQExpBuffer postgres_cmd; /* locate postgres binary */ if ((ret = find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR, exec_path)) < 0) { char full_path[MAXPGPATH]; if (find_my_exec(argv0, full_path) < 0) strlcpy(full_path, progname, sizeof(full_path)); if (ret == -1) pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"", "postgres", progname, full_path); else pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s", "postgres", full_path, progname); } pg_log_info("executing \"%s\" for target server to complete crash recovery", exec_path); /* * Skip processing if requested, but only after ensuring presence of * postgres. */ if (dry_run) return; /* * Finally run postgres in single-user mode. There is no need to use * fsync here. This makes the recovery faster, and the target data folder * is synced at the end anyway. */ postgres_cmd = createPQExpBuffer(); /* path to postgres, properly quoted */ appendShellString(postgres_cmd, exec_path); /* add set of options with properly quoted data directory */ appendPQExpBufferStr(postgres_cmd, " --single -F -D "); appendShellString(postgres_cmd, datadir_target); /* add custom configuration file only if requested */ if (config_file != NULL) { appendPQExpBufferStr(postgres_cmd, " -c config_file="); appendShellString(postgres_cmd, config_file); } /* finish with the database name, and a properly quoted redirection */ appendPQExpBufferStr(postgres_cmd, " template1 < "); appendShellString(postgres_cmd, DEVNULL); fflush(NULL); if (system(postgres_cmd->data) != 0) { pg_log_error("postgres single-user mode in target cluster failed"); pg_log_error_detail("Command was: %s", postgres_cmd->data); exit(1); } destroyPQExpBuffer(postgres_cmd); } static void disconnect_atexit(void) { if (conn != NULL) PQfinish(conn); }