diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index a340a5f6af..0792c08ce1 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -1057,10 +1057,46 @@ BasicOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode) int fd; tryAgain: +#ifdef PG_O_DIRECT_USE_F_NOCACHE + + /* + * The value we defined to stand in for O_DIRECT when simulating it with + * F_NOCACHE had better not collide with any of the standard flags. + */ + StaticAssertStmt((PG_O_DIRECT & + (O_APPEND | + O_CLOEXEC | + O_CREAT | + O_DSYNC | + O_RDWR | + O_RDONLY | + O_SYNC | + O_TRUNC | + O_WRONLY)) == 0, + "PG_O_DIRECT value collides with standard flag"); + fd = open(fileName, fileFlags & ~PG_O_DIRECT, fileMode); +#else fd = open(fileName, fileFlags, fileMode); +#endif if (fd >= 0) + { +#ifdef PG_O_DIRECT_USE_F_NOCACHE + if (fileFlags & PG_O_DIRECT) + { + if (fcntl(fd, F_NOCACHE, 1) < 0) + { + int save_errno = errno; + + close(fd); + errno = save_errno; + return -1; + } + } +#endif + return fd; /* success! */ + } if (errno == EMFILE || errno == ENFILE) { diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c index 78dab5096c..fef31844fa 100644 --- a/src/bin/pg_test_fsync/pg_test_fsync.c +++ b/src/bin/pg_test_fsync/pg_test_fsync.c @@ -217,8 +217,10 @@ handle_args(int argc, char *argv[]) "%u seconds per test\n", secs_per_test), secs_per_test); -#if PG_O_DIRECT != 0 +#if defined(O_DIRECT) printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n")); +#elif defined(F_NOCACHE) + printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n")); #else printf(_("Direct I/O is not supported on this platform.\n")); #endif @@ -258,6 +260,31 @@ test_open(void) close(tmpfile); } +static int +open_direct(const char *path, int flags, mode_t mode) +{ + int fd; + +#ifdef O_DIRECT + flags |= O_DIRECT; +#endif + + fd = open(path, flags, mode); + +#if !defined(O_DIRECT) && defined(F_NOCACHE) + if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0) + { + int save_errno = errno; + + close(fd); + errno = save_errno; + return -1; + } +#endif + + return fd; +} + static void test_sync(int writes_per_op) { @@ -279,7 +306,7 @@ test_sync(int writes_per_op) fflush(stdout); #ifdef OPEN_DATASYNC_FLAG - if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT | PG_BINARY, 0)) == -1) + if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1) { printf(NA_FORMAT, _("n/a*")); fs_warning = true; @@ -386,7 +413,7 @@ test_sync(int writes_per_op) fflush(stdout); #ifdef OPEN_SYNC_FLAG - if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1) + if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1) { printf(NA_FORMAT, _("n/a*")); fs_warning = true; @@ -454,7 +481,7 @@ test_open_sync(const char *msg, int writes_size) fflush(stdout); #ifdef OPEN_SYNC_FLAG - if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1) + if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1) printf(NA_FORMAT, _("n/a*")); else { diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h index 0940b64ca6..60348d1850 100644 --- a/src/include/access/xlogdefs.h +++ b/src/include/access/xlogdefs.h @@ -64,21 +64,6 @@ typedef uint32 TimeLineID; */ typedef uint16 RepOriginId; -/* - * Because O_DIRECT bypasses the kernel buffers, and because we never - * read those buffers except during crash recovery or if wal_level != minimal, - * it is a win to use it in all cases where we sync on each write(). We could - * allow O_DIRECT with fsync(), but it is unclear if fsync() could process - * writes not buffered in the kernel. Also, O_DIRECT is never enough to force - * data to the drives, it merely tries to bypass the kernel cache, so we still - * need O_SYNC/O_DSYNC. - */ -#ifdef O_DIRECT -#define PG_O_DIRECT O_DIRECT -#else -#define PG_O_DIRECT 0 -#endif - /* * This chunk of hackery attempts to determine which file sync methods * are available on the current platform, and to choose an appropriate diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 5b3c280dd7..2d843eb992 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -79,6 +79,22 @@ extern int max_safe_fds; #define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT || (err) == EACCES) #endif +/* + * O_DIRECT is not standard, but almost every Unix has it. We translate it + * to the appropriate Windows flag in src/port/open.c. We simulate it with + * fcntl(F_NOCACHE) on macOS inside fd.c's open() wrapper. We use the name + * PG_O_DIRECT rather than defining O_DIRECT in that case (probably not a good + * idea on a Unix). + */ +#if defined(O_DIRECT) +#define PG_O_DIRECT O_DIRECT +#elif defined(F_NOCACHE) +#define PG_O_DIRECT 0x80000000 +#define PG_O_DIRECT_USE_F_NOCACHE +#else +#define PG_O_DIRECT 0 +#endif + /* * prototypes for functions in fd.c */