Support direct I/O on macOS.

Macs don't understand O_DIRECT, but they can disable caching with a
separate fcntl() call.  Extend the file opening functions in fd.c to
handle this for us if the caller passes in PG_O_DIRECT.

For now, this affects only WAL data and even then only if you set:

  max_wal_senders=0
  wal_level=minimal

This is not expected to be very useful on its own, but later proposed
patches will make greater use of direct I/O, and it'll be useful for
testing if developers on Macs can see the effects.

Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CA%2BhUKG%2BADiyyHe0cun2wfT%2BSVnFVqNYPxoO6J9zcZkVO7%2BNGig%40mail.gmail.com
This commit is contained in:
Thomas Munro 2021-07-19 08:52:00 +12:00
parent f157db8622
commit 2dbe890571
4 changed files with 83 additions and 19 deletions

View File

@ -1057,10 +1057,46 @@ BasicOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
int fd;
tryAgain:
#ifdef PG_O_DIRECT_USE_F_NOCACHE
/*
* The value we defined to stand in for O_DIRECT when simulating it with
* F_NOCACHE had better not collide with any of the standard flags.
*/
StaticAssertStmt((PG_O_DIRECT &
(O_APPEND |
O_CLOEXEC |
O_CREAT |
O_DSYNC |
O_RDWR |
O_RDONLY |
O_SYNC |
O_TRUNC |
O_WRONLY)) == 0,
"PG_O_DIRECT value collides with standard flag");
fd = open(fileName, fileFlags & ~PG_O_DIRECT, fileMode);
#else
fd = open(fileName, fileFlags, fileMode);
#endif
if (fd >= 0)
{
#ifdef PG_O_DIRECT_USE_F_NOCACHE
if (fileFlags & PG_O_DIRECT)
{
if (fcntl(fd, F_NOCACHE, 1) < 0)
{
int save_errno = errno;
close(fd);
errno = save_errno;
return -1;
}
}
#endif
return fd; /* success! */
}
if (errno == EMFILE || errno == ENFILE)
{

View File

@ -217,8 +217,10 @@ handle_args(int argc, char *argv[])
"%u seconds per test\n",
secs_per_test),
secs_per_test);
#if PG_O_DIRECT != 0
#if defined(O_DIRECT)
printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
#elif defined(F_NOCACHE)
printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
#else
printf(_("Direct I/O is not supported on this platform.\n"));
#endif
@ -258,6 +260,31 @@ test_open(void)
close(tmpfile);
}
static int
open_direct(const char *path, int flags, mode_t mode)
{
int fd;
#ifdef O_DIRECT
flags |= O_DIRECT;
#endif
fd = open(path, flags, mode);
#if !defined(O_DIRECT) && defined(F_NOCACHE)
if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
{
int save_errno = errno;
close(fd);
errno = save_errno;
return -1;
}
#endif
return fd;
}
static void
test_sync(int writes_per_op)
{
@ -279,7 +306,7 @@ test_sync(int writes_per_op)
fflush(stdout);
#ifdef OPEN_DATASYNC_FLAG
if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT | PG_BINARY, 0)) == -1)
if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
{
printf(NA_FORMAT, _("n/a*"));
fs_warning = true;
@ -386,7 +413,7 @@ test_sync(int writes_per_op)
fflush(stdout);
#ifdef OPEN_SYNC_FLAG
if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
{
printf(NA_FORMAT, _("n/a*"));
fs_warning = true;
@ -454,7 +481,7 @@ test_open_sync(const char *msg, int writes_size)
fflush(stdout);
#ifdef OPEN_SYNC_FLAG
if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
printf(NA_FORMAT, _("n/a*"));
else
{

View File

@ -64,21 +64,6 @@ typedef uint32 TimeLineID;
*/
typedef uint16 RepOriginId;
/*
* Because O_DIRECT bypasses the kernel buffers, and because we never
* read those buffers except during crash recovery or if wal_level != minimal,
* it is a win to use it in all cases where we sync on each write(). We could
* allow O_DIRECT with fsync(), but it is unclear if fsync() could process
* writes not buffered in the kernel. Also, O_DIRECT is never enough to force
* data to the drives, it merely tries to bypass the kernel cache, so we still
* need O_SYNC/O_DSYNC.
*/
#ifdef O_DIRECT
#define PG_O_DIRECT O_DIRECT
#else
#define PG_O_DIRECT 0
#endif
/*
* This chunk of hackery attempts to determine which file sync methods
* are available on the current platform, and to choose an appropriate

View File

@ -79,6 +79,22 @@ extern int max_safe_fds;
#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT || (err) == EACCES)
#endif
/*
* O_DIRECT is not standard, but almost every Unix has it. We translate it
* to the appropriate Windows flag in src/port/open.c. We simulate it with
* fcntl(F_NOCACHE) on macOS inside fd.c's open() wrapper. We use the name
* PG_O_DIRECT rather than defining O_DIRECT in that case (probably not a good
* idea on a Unix).
*/
#if defined(O_DIRECT)
#define PG_O_DIRECT O_DIRECT
#elif defined(F_NOCACHE)
#define PG_O_DIRECT 0x80000000
#define PG_O_DIRECT_USE_F_NOCACHE
#else
#define PG_O_DIRECT 0
#endif
/*
* prototypes for functions in fd.c
*/