From 8c16ad3b43299695f203f9157a2b27c22b9ed634 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 6 Sep 2023 16:27:16 -0700 Subject: [PATCH] Allow using syncfs() in frontend utilities. This commit allows specifying a --sync-method in several frontend utilities that must synchronize many files to disk (initdb, pg_basebackup, pg_checksums, pg_dump, pg_rewind, and pg_upgrade). On Linux, users can specify "syncfs" to synchronize the relevant file systems instead of calling fsync() for every single file. In many cases, using syncfs() is much faster. As with recovery_init_sync_method, this new option comes with some caveats. The descriptions of these caveats have been moved to a new appendix section in the documentation. Co-authored-by: Justin Pryzby Reviewed-by: Michael Paquier, Thomas Munro, Robert Haas, Justin Pryzby Discussion: https://postgr.es/m/20210930004340.GM831%40telsasoft.com --- doc/src/sgml/config.sgml | 12 +++------ doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/postgres.sgml | 1 + doc/src/sgml/ref/initdb.sgml | 22 ++++++++++++++++ doc/src/sgml/ref/pg_basebackup.sgml | 25 +++++++++++++++++++ doc/src/sgml/ref/pg_checksums.sgml | 22 ++++++++++++++++ doc/src/sgml/ref/pg_dump.sgml | 21 ++++++++++++++++ doc/src/sgml/ref/pg_rewind.sgml | 22 ++++++++++++++++ doc/src/sgml/ref/pgupgrade.sgml | 23 +++++++++++++++++ doc/src/sgml/syncfs.sgml | 36 +++++++++++++++++++++++++++ src/bin/initdb/initdb.c | 6 +++++ src/bin/initdb/t/001_initdb.pl | 12 +++++++++ src/bin/pg_basebackup/pg_basebackup.c | 7 ++++++ src/bin/pg_checksums/pg_checksums.c | 6 +++++ src/bin/pg_dump/pg_dump.c | 7 ++++++ src/bin/pg_rewind/pg_rewind.c | 8 ++++++ src/bin/pg_upgrade/option.c | 13 ++++++++++ src/bin/pg_upgrade/pg_upgrade.c | 6 +++-- src/bin/pg_upgrade/pg_upgrade.h | 1 + src/fe_utils/option_utils.c | 27 ++++++++++++++++++++ src/include/fe_utils/option_utils.h | 4 +++ 21 files changed, 271 insertions(+), 11 deletions(-) create mode 100644 doc/src/sgml/syncfs.sgml diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f0a50a5f9a..6bc1b215db 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -10511,15 +10511,9 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' On Linux, syncfs may be used instead, to ask the operating system to synchronize the whole file systems that contain the data directory, the WAL files and each tablespace (but not any other - file systems that may be reachable through symbolic links). This may - be a lot faster than the fsync setting, because it - doesn't need to open each file one by one. On the other hand, it may - be slower if a file system is shared by other applications that - modify a lot of files, since those files will also be written to disk. - Furthermore, on versions of Linux before 5.8, I/O errors encountered - while writing data to disk may not be reported to - PostgreSQL, and relevant error messages may - appear only in kernel logs. + file systems that may be reachable through symbolic links). See + for more information about using + syncfs(). This parameter can only be set in the diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index e3d94a62b3..4c63a7e768 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -183,6 +183,7 @@ + diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index 2e271862fc..f629524be0 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -294,6 +294,7 @@ break is not needed in a wider output rendering. &acronyms; &glossary; &color; + &syncfs; &obsolete; diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 22f1011781..8a09c5c438 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -365,6 +365,28 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + initdb will recursively open and synchronize all + files in the data directory. The search for files will follow symbolic + links for the WAL directory and each configured tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file systems that contain the + data directory, the WAL files, and each tablespace. See + for more information about using + syncfs(). + + + This option has no effect when is used. + + + + diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 79d3e657c3..d2b8ddd200 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -594,6 +594,31 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + pg_basebackup will recursively open and synchronize + all files in the backup directory. When the plain format is used, the + search for files will follow symbolic links for the WAL directory and + each configured tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file system that contains the + backup directory. When the plain format is used, + pg_basebackup will also synchronize the file systems + that contain the WAL files and each tablespace. See + for more information about using + syncfs(). + + + This option has no effect when is used. + + + + diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml index a3d0b0f0a3..7b44ba71cf 100644 --- a/doc/src/sgml/ref/pg_checksums.sgml +++ b/doc/src/sgml/ref/pg_checksums.sgml @@ -139,6 +139,28 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + pg_checksums will recursively open and synchronize + all files in the data directory. The search for files will follow + symbolic links for the WAL directory and each configured tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file systems that contain the + data directory, the WAL files, and each tablespace. See + for more information about using + syncfs(). + + + This option has no effect when is used. + + + + diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index a3cf0608f5..c1e2220b3c 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1179,6 +1179,27 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + pg_dump --format=directory will recursively open and + synchronize all files in the archive directory. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file system that contains the + archive directory. See for more information + about using syncfs(). + + + This option has no effect when is used or + is not set to directory. + + + + diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml index 15cddd086b..80dff16168 100644 --- a/doc/src/sgml/ref/pg_rewind.sgml +++ b/doc/src/sgml/ref/pg_rewind.sgml @@ -284,6 +284,28 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + pg_rewind will recursively open and synchronize all + files in the data directory. The search for files will follow symbolic + links for the WAL directory and each configured tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file systems that contain the + data directory, the WAL files, and each tablespace. See + for more information about using + syncfs(). + + + This option has no effect when is used. + + + + diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 7816b4c685..bea0d1b93f 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -190,6 +190,29 @@ PostgreSQL documentation variable PGSOCKETDIR + + + + + When set to fsync, which is the default, + pg_upgrade will recursively open and synchronize all + files in the upgraded cluster's data directory. The search for files + will follow symbolic links for the WAL directory and each configured + tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file systems that contain the + upgraded cluster's data directory, its WAL files, and each tablespace. + See for more information about using + syncfs(). + + + This option has no effect when is used. + + + + username username diff --git a/doc/src/sgml/syncfs.sgml b/doc/src/sgml/syncfs.sgml new file mode 100644 index 0000000000..00457d2457 --- /dev/null +++ b/doc/src/sgml/syncfs.sgml @@ -0,0 +1,36 @@ + + + + <function>syncfs()</function> Caveats + + + syncfs + + + + On Linux syncfs() may be specified for some + configuration parameters (e.g., + ), server applications (e.g., + pg_upgrade), and client applications (e.g., + pg_basebackup) that involve synchronizing many + files to disk. syncfs() is advantageous in many cases, + but there are some trade-offs to keep in mind. + + + + Since syncfs() instructs the operating system to + synchronize a whole file system, it typically requires many fewer system + calls than using fsync() to synchronize each file one by + one. Therefore, using syncfs() may be a lot faster than + using fsync(). However, it may be slower if a file + system is shared by other applications that modify a lot of files, since + those files will also be written to disk. + + + + Furthermore, on versions of Linux before 5.8, I/O errors encountered while + writing data to disk may not be reported to the calling program, and relevant + error messages may appear only in kernel logs. + + + diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 51198e6665..bddb30d766 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2467,6 +2467,7 @@ usage(const char *progname) printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" --no-instructions do not print instructions for next steps\n")); printf(_(" -s, --show show internal settings\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -S, --sync-only only sync database files to disk, then exit\n")); printf(_("\nOther options:\n")); printf(_(" -V, --version output version information, then exit\n")); @@ -3107,6 +3108,7 @@ main(int argc, char *argv[]) {"locale-provider", required_argument, NULL, 15}, {"icu-locale", required_argument, NULL, 16}, {"icu-rules", required_argument, NULL, 17}, + {"sync-method", required_argument, NULL, 18}, {NULL, 0, NULL, 0} }; @@ -3287,6 +3289,10 @@ main(int argc, char *argv[]) case 17: icu_rules = pg_strdup(optarg); break; + case 18: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 2d7469d2fc..45f96cd8bb 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -16,6 +16,7 @@ use Test::More; my $tempdir = PostgreSQL::Test::Utils::tempdir; my $xlogdir = "$tempdir/pgxlog"; my $datadir = "$tempdir/data"; +my $supports_syncfs = check_pg_config("#define HAVE_SYNCFS 1"); program_help_ok('initdb'); program_version_ok('initdb'); @@ -82,6 +83,17 @@ command_fails([ 'pg_checksums', '-D', $datadir ], command_ok([ 'initdb', '-S', $datadir ], 'sync only'); command_fails([ 'initdb', $datadir ], 'existing data directory'); +if ($supports_syncfs) +{ + command_ok([ 'initdb', '-S', $datadir, '--sync-method', 'syncfs' ], + 'sync method syncfs'); +} +else +{ + command_fails([ 'initdb', '-S', $datadir, '--sync-method', 'syncfs' ], + 'sync method syncfs'); +} + # Check group access on PGDATA SKIP: { diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index e9033af5c0..74f5332e95 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -425,6 +425,8 @@ usage(void) printf(_(" --no-slot prevent creation of temporary replication slot\n")); printf(_(" --no-verify-checksums\n" " do not verify checksums\n")); + printf(_(" --sync-method=METHOD\n" + " set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=CONNSTR connection string\n")); @@ -2282,6 +2284,7 @@ main(int argc, char **argv) {"no-manifest", no_argument, NULL, 5}, {"manifest-force-encode", no_argument, NULL, 6}, {"manifest-checksums", required_argument, NULL, 7}, + {"sync-method", required_argument, NULL, 8}, {NULL, 0, NULL, 0} }; int c; @@ -2453,6 +2456,10 @@ main(int argc, char **argv) case 7: manifest_checksums = pg_strdup(optarg); break; + case 8: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 836ee65405..e009ba5e0b 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -78,6 +78,7 @@ usage(void) printf(_(" -f, --filenode=FILENODE check only relation with specified filenode\n")); printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" -P, --progress show progress information\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -v, --verbose output verbose messages\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); @@ -436,6 +437,7 @@ main(int argc, char *argv[]) {"no-sync", no_argument, NULL, 'N'}, {"progress", no_argument, NULL, 'P'}, {"verbose", no_argument, NULL, 'v'}, + {"sync-method", required_argument, NULL, 1}, {NULL, 0, NULL, 0} }; @@ -494,6 +496,10 @@ main(int argc, char *argv[]) case 'v': verbose = true; break; + case 1: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 280662bc33..f7b6176692 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -432,6 +432,7 @@ main(int argc, char **argv) {"table-and-children", required_argument, NULL, 12}, {"exclude-table-and-children", required_argument, NULL, 13}, {"exclude-table-data-and-children", required_argument, NULL, 14}, + {"sync-method", required_argument, NULL, 15}, {NULL, 0, NULL, 0} }; @@ -658,6 +659,11 @@ main(int argc, char **argv) optarg); break; + case 15: + if (!parse_sync_method(optarg, &sync_method)) + exit_nicely(1); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -1069,6 +1075,7 @@ help(const char *progname) " compress as specified\n")); printf(_(" --lock-wait-timeout=TIMEOUT fail after waiting TIMEOUT for a table lock\n")); printf(_(" --no-sync do not wait for changes to be written safely to disk\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nOptions controlling the output content:\n")); diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index bdfacf3263..bfd44a284e 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -22,6 +22,7 @@ #include "common/file_perm.h" #include "common/restricted_token.h" #include "common/string.h" +#include "fe_utils/option_utils.h" #include "fe_utils/recovery_gen.h" #include "fe_utils/string_utils.h" #include "file_ops.h" @@ -108,6 +109,7 @@ usage(const char *progname) " file when running target cluster\n")); printf(_(" --debug write a lot of debug messages\n")); printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); @@ -132,6 +134,7 @@ main(int argc, char **argv) {"no-sync", no_argument, NULL, 'N'}, {"progress", no_argument, NULL, 'P'}, {"debug", no_argument, NULL, 3}, + {"sync-method", required_argument, NULL, 6}, {NULL, 0, NULL, 0} }; int option_index; @@ -219,6 +222,11 @@ main(int argc, char **argv) config_file = pg_strdup(optarg); break; + case 6: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c index 640361009e..b9d900d0db 100644 --- a/src/bin/pg_upgrade/option.c +++ b/src/bin/pg_upgrade/option.c @@ -14,6 +14,7 @@ #endif #include "common/string.h" +#include "fe_utils/option_utils.h" #include "getopt_long.h" #include "pg_upgrade.h" #include "utils/pidfile.h" @@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[]) {"verbose", no_argument, NULL, 'v'}, {"clone", no_argument, NULL, 1}, {"copy", no_argument, NULL, 2}, + {"sync-method", required_argument, NULL, 3}, {NULL, 0, NULL, 0} }; int option; /* Command line option */ int optindex = 0; /* used by getopt_long */ int os_user_effective_id; + DataDirSyncMethod unused; user_opts.do_sync = true; user_opts.transfer_mode = TRANSFER_MODE_COPY; @@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[]) user_opts.transfer_mode = TRANSFER_MODE_COPY; break; + case 3: + if (!parse_sync_method(optarg, &unused)) + exit(1); + user_opts.sync_method = pg_strdup(optarg); + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), os_info.progname); @@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[]) if (optind < argc) pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]); + if (!user_opts.sync_method) + user_opts.sync_method = pg_strdup("fsync"); + if (log_opts.verbose) pg_log(PG_REPORT, "Running in verbose mode"); @@ -289,6 +301,7 @@ usage(void) printf(_(" -V, --version display version information, then exit\n")); printf(_(" --clone clone instead of copying files to new cluster\n")); printf(_(" --copy copy files to new cluster (default)\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\n" "Before running pg_upgrade you must:\n" diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 4562dafcff..96bfb67167 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -192,8 +192,10 @@ main(int argc, char **argv) { prep_status("Sync data directory to disk"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, - "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir, - new_cluster.pgdata); + "\"%s/initdb\" --sync-only \"%s\" --sync-method %s", + new_cluster.bindir, + new_cluster.pgdata, + user_opts.sync_method); check_ok(); } diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 7afa96716e..842f3b6cd3 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -304,6 +304,7 @@ typedef struct transferMode transfer_mode; /* copy files or link them? */ int jobs; /* number of processes/threads to use */ char *socketdir; /* directory to use for Unix sockets */ + char *sync_method; } UserOpts; typedef struct diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c index 763c991015..d2a3adeb4b 100644 --- a/src/fe_utils/option_utils.c +++ b/src/fe_utils/option_utils.c @@ -82,3 +82,30 @@ option_parse_int(const char *optarg, const char *optname, *result = val; return true; } + +/* + * Provide strictly harmonized handling of the --sync-method option. + */ +bool +parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method) +{ + if (strcmp(optarg, "fsync") == 0) + *sync_method = DATA_DIR_SYNC_METHOD_FSYNC; + else if (strcmp(optarg, "syncfs") == 0) + { +#ifdef HAVE_SYNCFS + *sync_method = DATA_DIR_SYNC_METHOD_SYNCFS; +#else + pg_log_error("this build does not support sync method \"%s\"", + "syncfs"); + return false; +#endif + } + else + { + pg_log_error("unrecognized sync method: %s", optarg); + return false; + } + + return true; +} diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h index b7b0654cee..6f3a965916 100644 --- a/src/include/fe_utils/option_utils.h +++ b/src/include/fe_utils/option_utils.h @@ -14,6 +14,8 @@ #include "postgres_fe.h" +#include "common/file_utils.h" + typedef void (*help_handler) (const char *progname); extern void handle_help_version_opts(int argc, char *argv[], @@ -22,5 +24,7 @@ extern void handle_help_version_opts(int argc, char *argv[], extern bool option_parse_int(const char *optarg, const char *optname, int min_range, int max_range, int *result); +extern bool parse_sync_method(const char *optarg, + DataDirSyncMethod *sync_method); #endif /* OPTION_UTILS_H */