From ce6a71fa5300cf00adf32c9daee302c523609709 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Mon, 11 Jan 2021 14:41:39 +1300 Subject: [PATCH] Use vectored I/O to fill new WAL segments. Instead of making many block-sized write() calls to fill a new WAL file with zeroes, make a smaller number of pwritev() calls (or various emulations). The actual number depends on the OS's IOV_MAX, which PG_IOV_MAX currently caps at 32. That means we'll write 256kB per call on typical systems. We may want to tune the number later with more experience. Reviewed-by: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/CA%2BhUKGJA%2Bu-220VONeoREBXJ9P3S94Y7J%2BkqCnTYmahvZJwM%3Dg%40mail.gmail.com --- src/backend/access/transam/xlog.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ede93ad7fd..b18257c198 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -48,6 +48,7 @@ #include "pg_trace.h" #include "pgstat.h" #include "port/atomics.h" +#include "port/pg_iovec.h" #include "postmaster/bgwriter.h" #include "postmaster/startup.h" #include "postmaster/walwriter.h" @@ -3270,7 +3271,6 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) XLogSegNo installed_segno; XLogSegNo max_segno; int fd; - int nbytes; int save_errno; XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size); @@ -3317,6 +3317,9 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) save_errno = 0; if (wal_init_zero) { + struct iovec iov[PG_IOV_MAX]; + int blocks; + /* * Zero-fill the file. With this setting, we do this the hard way to * ensure that all the file space has really been allocated. On @@ -3326,15 +3329,28 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) * indirect blocks are down on disk. Therefore, fdatasync(2) or * O_DSYNC will be sufficient to sync future writes to the log file. */ - for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) + + /* Prepare to write out a lot of copies of our zero buffer at once. */ + for (int i = 0; i < lengthof(iov); ++i) { - errno = 0; - if (write(fd, zbuffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ) + iov[i].iov_base = zbuffer.data; + iov[i].iov_len = XLOG_BLCKSZ; + } + + /* Loop, writing as many blocks as we can for each system call. */ + blocks = wal_segment_size / XLOG_BLCKSZ; + for (int i = 0; i < blocks;) + { + int iovcnt = Min(blocks - i, lengthof(iov)); + off_t offset = i * XLOG_BLCKSZ; + + if (pg_pwritev_with_retry(fd, iov, iovcnt, offset) < 0) { - /* if write didn't set errno, assume no disk space */ - save_errno = errno ? errno : ENOSPC; + save_errno = errno; break; } + + i += iovcnt; } } else