Add support for syncfs() in frontend support functions.

This commit adds support for using syncfs() in fsync_pgdata() and
fsync_dir_recurse() (which have been renamed to sync_pgdata() and
sync_dir_recurse()).  Like recovery_init_sync_method,
sync_pgdata() calls syncfs() for the data directory, each
tablespace, and pg_wal (if it is a symlink).  For now, all of the
frontend utilities that use these support functions are hard-coded
to use fsync(), but a follow-up commit will allow specifying
syncfs().

Co-authored-by: Justin Pryzby
Reviewed-by: Michael Paquier
Discussion: https://postgr.es/m/20210930004340.GM831%40telsasoft.com
This commit is contained in:
Nathan Bossart 2023-09-06 16:27:00 -07:00
parent 3ed1956719
commit cccc6cdeb3
13 changed files with 190 additions and 56 deletions

View File

@ -165,6 +165,7 @@ static bool show_setting = false;
static bool data_checksums = false; static bool data_checksums = false;
static char *xlog_dir = NULL; static char *xlog_dir = NULL;
static int wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024); static int wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
/* internal vars */ /* internal vars */
@ -3322,7 +3323,7 @@ main(int argc, char *argv[])
atexit(cleanup_directories_atexit); atexit(cleanup_directories_atexit);
/* If we only need to fsync, just do it and exit */ /* If we only need to sync, just do it and exit */
if (sync_only) if (sync_only)
{ {
setup_pgdata(); setup_pgdata();
@ -3333,7 +3334,7 @@ main(int argc, char *argv[])
fputs(_("syncing data to disk ... "), stdout); fputs(_("syncing data to disk ... "), stdout);
fflush(stdout); fflush(stdout);
fsync_pgdata(pg_data, PG_VERSION_NUM); sync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
check_ok(); check_ok();
return 0; return 0;
} }
@ -3396,7 +3397,7 @@ main(int argc, char *argv[])
{ {
fputs(_("syncing data to disk ... "), stdout); fputs(_("syncing data to disk ... "), stdout);
fflush(stdout); fflush(stdout);
fsync_pgdata(pg_data, PG_VERSION_NUM); sync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
check_ok(); check_ok();
} }
else else

View File

@ -148,6 +148,7 @@ static bool verify_checksums = true;
static bool manifest = true; static bool manifest = true;
static bool manifest_force_encode = false; static bool manifest_force_encode = false;
static char *manifest_checksums = NULL; static char *manifest_checksums = NULL;
static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
static bool success = false; static bool success = false;
static bool made_new_pgdata = false; static bool made_new_pgdata = false;
@ -2199,11 +2200,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
if (format == 't') if (format == 't')
{ {
if (strcmp(basedir, "-") != 0) if (strcmp(basedir, "-") != 0)
(void) fsync_dir_recurse(basedir); (void) sync_dir_recurse(basedir, sync_method);
} }
else else
{ {
(void) fsync_pgdata(basedir, serverVersion); (void) sync_pgdata(basedir, serverVersion, sync_method);
} }
} }

View File

@ -44,6 +44,7 @@ static char *only_filenode = NULL;
static bool do_sync = true; static bool do_sync = true;
static bool verbose = false; static bool verbose = false;
static bool showprogress = false; static bool showprogress = false;
static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
typedef enum typedef enum
{ {
@ -623,7 +624,7 @@ main(int argc, char *argv[])
if (do_sync) if (do_sync)
{ {
pg_log_info("syncing data directory"); pg_log_info("syncing data directory");
fsync_pgdata(DataDir, PG_VERSION_NUM); sync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
} }
pg_log_info("updating control file"); pg_log_info("updating control file");

View File

@ -24,6 +24,7 @@
#define PG_BACKUP_H #define PG_BACKUP_H
#include "common/compression.h" #include "common/compression.h"
#include "common/file_utils.h"
#include "fe_utils/simple_list.h" #include "fe_utils/simple_list.h"
#include "libpq-fe.h" #include "libpq-fe.h"
@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt, extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
const pg_compress_specification compression_spec, const pg_compress_specification compression_spec,
bool dosync, ArchiveMode mode, bool dosync, ArchiveMode mode,
SetupWorkerPtrType setupDumpWorker); SetupWorkerPtrType setupDumpWorker,
DataDirSyncMethod sync_method);
/* The --list option */ /* The --list option */
extern void PrintTOCSummary(Archive *AHX); extern void PrintTOCSummary(Archive *AHX);

View File

@ -66,7 +66,8 @@ typedef struct _parallelReadyList
static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt, static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
const pg_compress_specification compression_spec, const pg_compress_specification compression_spec,
bool dosync, ArchiveMode mode, bool dosync, ArchiveMode mode,
SetupWorkerPtrType setupWorkerPtr); SetupWorkerPtrType setupWorkerPtr,
DataDirSyncMethod sync_method);
static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te); static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData); static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
static char *sanitize_line(const char *str, bool want_hyphen); static char *sanitize_line(const char *str, bool want_hyphen);
@ -238,11 +239,12 @@ Archive *
CreateArchive(const char *FileSpec, const ArchiveFormat fmt, CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
const pg_compress_specification compression_spec, const pg_compress_specification compression_spec,
bool dosync, ArchiveMode mode, bool dosync, ArchiveMode mode,
SetupWorkerPtrType setupDumpWorker) SetupWorkerPtrType setupDumpWorker,
DataDirSyncMethod sync_method)
{ {
ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec, ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
dosync, mode, setupDumpWorker); dosync, mode, setupDumpWorker, sync_method);
return (Archive *) AH; return (Archive *) AH;
} }
@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
compression_spec.algorithm = PG_COMPRESSION_NONE; compression_spec.algorithm = PG_COMPRESSION_NONE;
AH = _allocAH(FileSpec, fmt, compression_spec, true, AH = _allocAH(FileSpec, fmt, compression_spec, true,
archModeRead, setupRestoreWorker); archModeRead, setupRestoreWorker,
DATA_DIR_SYNC_METHOD_FSYNC);
return (Archive *) AH; return (Archive *) AH;
} }
@ -2233,7 +2236,7 @@ static ArchiveHandle *
_allocAH(const char *FileSpec, const ArchiveFormat fmt, _allocAH(const char *FileSpec, const ArchiveFormat fmt,
const pg_compress_specification compression_spec, const pg_compress_specification compression_spec,
bool dosync, ArchiveMode mode, bool dosync, ArchiveMode mode,
SetupWorkerPtrType setupWorkerPtr) SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method)
{ {
ArchiveHandle *AH; ArchiveHandle *AH;
CompressFileHandle *CFH; CompressFileHandle *CFH;
@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
AH->mode = mode; AH->mode = mode;
AH->compression_spec = compression_spec; AH->compression_spec = compression_spec;
AH->dosync = dosync; AH->dosync = dosync;
AH->sync_method = sync_method;
memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse)); memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));

View File

@ -312,6 +312,7 @@ struct _archiveHandle
pg_compress_specification compression_spec; /* Requested specification for pg_compress_specification compression_spec; /* Requested specification for
* compression */ * compression */
bool dosync; /* data requested to be synced on sight */ bool dosync; /* data requested to be synced on sight */
DataDirSyncMethod sync_method;
ArchiveMode mode; /* File mode - r or w */ ArchiveMode mode; /* File mode - r or w */
void *formatData; /* Header data specific to file format */ void *formatData; /* Header data specific to file format */

View File

@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
* individually. Just recurse once through all the files generated. * individually. Just recurse once through all the files generated.
*/ */
if (AH->dosync) if (AH->dosync)
fsync_dir_recurse(ctx->directory); sync_dir_recurse(ctx->directory, AH->sync_method);
} }
AH->FH = NULL; AH->FH = NULL;
} }

View File

@ -357,6 +357,7 @@ main(int argc, char **argv)
char *compression_algorithm_str = "none"; char *compression_algorithm_str = "none";
char *error_detail = NULL; char *error_detail = NULL;
bool user_compression_defined = false; bool user_compression_defined = false;
DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
static DumpOptions dopt; static DumpOptions dopt;
@ -777,7 +778,7 @@ main(int argc, char **argv)
/* Open the output file */ /* Open the output file */
fout = CreateArchive(filename, archiveFormat, compression_spec, fout = CreateArchive(filename, archiveFormat, compression_spec,
dosync, archiveMode, setupDumpWorker); dosync, archiveMode, setupDumpWorker, sync_method);
/* Make dump options accessible right away */ /* Make dump options accessible right away */
SetArchiveOptions(fout, &dopt, NULL); SetArchiveOptions(fout, &dopt, NULL);

View File

@ -286,9 +286,9 @@ remove_target_symlink(const char *path)
* *
* We do this once, for the whole data directory, for performance reasons. At * We do this once, for the whole data directory, for performance reasons. At
* the end of pg_rewind's run, the kernel is likely to already have flushed * the end of pg_rewind's run, the kernel is likely to already have flushed
* most dirty buffers to disk. Additionally fsync_pgdata uses a two-pass * most dirty buffers to disk. Additionally sync_pgdata uses a two-pass
* approach (only initiating writeback in the first pass), which often reduces * approach when fsync is specified (only initiating writeback in the first
* the overall amount of IO noticeably. * pass), which often reduces the overall amount of IO noticeably.
*/ */
void void
sync_target_dir(void) sync_target_dir(void)
@ -296,7 +296,7 @@ sync_target_dir(void)
if (!do_sync || dry_run) if (!do_sync || dry_run)
return; return;
fsync_pgdata(datadir_target, PG_VERSION_NUM); sync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
} }

View File

@ -74,6 +74,7 @@ bool showprogress = false;
bool dry_run = false; bool dry_run = false;
bool do_sync = true; bool do_sync = true;
bool restore_wal = false; bool restore_wal = false;
DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
/* Target history */ /* Target history */
TimeLineHistoryEntry *targetHistory; TimeLineHistoryEntry *targetHistory;

View File

@ -13,6 +13,7 @@
#include "access/timeline.h" #include "access/timeline.h"
#include "common/logging.h" #include "common/logging.h"
#include "common/file_utils.h"
#include "datapagemap.h" #include "datapagemap.h"
#include "libpq-fe.h" #include "libpq-fe.h"
#include "storage/block.h" #include "storage/block.h"
@ -24,6 +25,7 @@ extern bool showprogress;
extern bool dry_run; extern bool dry_run;
extern bool do_sync; extern bool do_sync;
extern int WalSegSz; extern int WalSegSz;
extern DataDirSyncMethod sync_method;
/* Target history */ /* Target history */
extern TimeLineHistoryEntry *targetHistory; extern TimeLineHistoryEntry *targetHistory;

View File

@ -51,19 +51,52 @@ static void walkdir(const char *path,
int (*action) (const char *fname, bool isdir), int (*action) (const char *fname, bool isdir),
bool process_symlinks); bool process_symlinks);
#ifdef HAVE_SYNCFS
/* /*
* Issue fsync recursively on PGDATA and all its contents. * do_syncfs -- Try to syncfs a file system
* *
* We fsync regular files and directories wherever they are, but we follow * Reports errors trying to open the path. syncfs() errors are fatal.
*/
static void
do_syncfs(const char *path)
{
int fd;
fd = open(path, O_RDONLY, 0);
if (fd < 0)
{
pg_log_error("could not open file \"%s\": %m", path);
return;
}
if (syncfs(fd) < 0)
{
pg_log_error("could not synchronize file system for file \"%s\": %m", path);
(void) close(fd);
exit(EXIT_FAILURE);
}
(void) close(fd);
}
#endif /* HAVE_SYNCFS */
/*
* Synchronize PGDATA and all its contents.
*
* We sync regular files and directories wherever they are, but we follow
* symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc. * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
* Other symlinks are presumed to point at files we're not responsible for * Other symlinks are presumed to point at files we're not responsible for
* fsyncing, and might not have privileges to write at all. * syncing, and might not have privileges to write at all.
* *
* serverVersion indicates the version of the server to be fsync'd. * serverVersion indicates the version of the server to be sync'd.
*/ */
void void
fsync_pgdata(const char *pg_data, sync_pgdata(const char *pg_data,
int serverVersion) int serverVersion,
DataDirSyncMethod sync_method)
{ {
bool xlog_is_symlink; bool xlog_is_symlink;
char pg_wal[MAXPGPATH]; char pg_wal[MAXPGPATH];
@ -89,9 +122,68 @@ fsync_pgdata(const char *pg_data,
xlog_is_symlink = true; xlog_is_symlink = true;
} }
switch (sync_method)
{
case DATA_DIR_SYNC_METHOD_SYNCFS:
{
#ifndef HAVE_SYNCFS
pg_log_error("this build does not support sync method \"%s\"",
"syncfs");
exit(EXIT_FAILURE);
#else
DIR *dir;
struct dirent *de;
/* /*
* If possible, hint to the kernel that we're soon going to fsync the data * On Linux, we don't have to open every single file one by
* directory and its contents. * one. We can use syncfs() to sync whole filesystems. We
* only expect filesystem boundaries to exist where we
* tolerate symlinks, namely pg_wal and the tablespaces, so we
* call syncfs() for each of those directories.
*/
/* Sync the top level pgdata directory. */
do_syncfs(pg_data);
/* If any tablespaces are configured, sync each of those. */
dir = opendir(pg_tblspc);
if (dir == NULL)
pg_log_error("could not open directory \"%s\": %m",
pg_tblspc);
else
{
while (errno = 0, (de = readdir(dir)) != NULL)
{
char subpath[MAXPGPATH * 2];
if (strcmp(de->d_name, ".") == 0 ||
strcmp(de->d_name, "..") == 0)
continue;
snprintf(subpath, sizeof(subpath), "%s/%s",
pg_tblspc, de->d_name);
do_syncfs(subpath);
}
if (errno)
pg_log_error("could not read directory \"%s\": %m",
pg_tblspc);
(void) closedir(dir);
}
/* If pg_wal is a symlink, process that too. */
if (xlog_is_symlink)
do_syncfs(pg_wal);
#endif /* HAVE_SYNCFS */
}
break;
case DATA_DIR_SYNC_METHOD_FSYNC:
{
/*
* If possible, hint to the kernel that we're soon going to
* fsync the data directory and its contents.
*/ */
#ifdef PG_FLUSH_DATA_WORKS #ifdef PG_FLUSH_DATA_WORKS
walkdir(pg_data, pre_sync_fname, false); walkdir(pg_data, pre_sync_fname, false);
@ -103,35 +195,62 @@ fsync_pgdata(const char *pg_data,
/* /*
* Now we do the fsync()s in the same order. * Now we do the fsync()s in the same order.
* *
* The main call ignores symlinks, so in addition to specially processing * The main call ignores symlinks, so in addition to specially
* pg_wal if it's a symlink, pg_tblspc has to be visited separately with * processing pg_wal if it's a symlink, pg_tblspc has to be
* process_symlinks = true. Note that if there are any plain directories * visited separately with process_symlinks = true. Note that
* in pg_tblspc, they'll get fsync'd twice. That's not an expected case * if there are any plain directories in pg_tblspc, they'll
* so we don't worry about optimizing it. * get fsync'd twice. That's not an expected case so we don't
* worry about optimizing it.
*/ */
walkdir(pg_data, fsync_fname, false); walkdir(pg_data, fsync_fname, false);
if (xlog_is_symlink) if (xlog_is_symlink)
walkdir(pg_wal, fsync_fname, false); walkdir(pg_wal, fsync_fname, false);
walkdir(pg_tblspc, fsync_fname, true); walkdir(pg_tblspc, fsync_fname, true);
}
break;
}
} }
/* /*
* Issue fsync recursively on the given directory and all its contents. * Synchronize the given directory and all its contents.
* *
* This is a convenient wrapper on top of walkdir(). * This is a convenient wrapper on top of walkdir() and do_syncfs().
*/ */
void void
fsync_dir_recurse(const char *dir) sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
{ {
switch (sync_method)
{
case DATA_DIR_SYNC_METHOD_SYNCFS:
{
#ifndef HAVE_SYNCFS
pg_log_error("this build does not support sync method \"%s\"",
"syncfs");
exit(EXIT_FAILURE);
#else
/* /*
* If possible, hint to the kernel that we're soon going to fsync the data * On Linux, we don't have to open every single file one by
* directory and its contents. * one. We can use syncfs() to sync the whole filesystem.
*/
do_syncfs(dir);
#endif /* HAVE_SYNCFS */
}
break;
case DATA_DIR_SYNC_METHOD_FSYNC:
{
/*
* If possible, hint to the kernel that we're soon going to
* fsync the data directory and its contents.
*/ */
#ifdef PG_FLUSH_DATA_WORKS #ifdef PG_FLUSH_DATA_WORKS
walkdir(dir, pre_sync_fname, false); walkdir(dir, pre_sync_fname, false);
#endif #endif
walkdir(dir, fsync_fname, false); walkdir(dir, fsync_fname, false);
}
break;
}
} }
/* /*

View File

@ -34,8 +34,9 @@ struct iovec; /* avoid including port/pg_iovec.h here */
#ifdef FRONTEND #ifdef FRONTEND
extern int fsync_fname(const char *fname, bool isdir); extern int fsync_fname(const char *fname, bool isdir);
extern void fsync_pgdata(const char *pg_data, int serverVersion); extern void sync_pgdata(const char *pg_data, int serverVersion,
extern void fsync_dir_recurse(const char *dir); DataDirSyncMethod sync_method);
extern void sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method);
extern int durable_rename(const char *oldfile, const char *newfile); extern int durable_rename(const char *oldfile, const char *newfile);
extern int fsync_parent_path(const char *fname); extern int fsync_parent_path(const char *fname);
#endif #endif