From 475861b2615dd63ae8431d811749a6f9a15bbfd6 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Tue, 2 Apr 2019 14:37:14 +1300 Subject: [PATCH] Add wal_recycle and wal_init_zero GUCs. On at least ZFS, it can be beneficial to create new WAL files every time and not to bother zero-filling them. Since it's not clear which other filesystems might benefit from one or both of those things, add individual GUCs to control those two behaviors independently and make only very general statements in the docs. Author: Jerry Jelinek, with some adjustments by Thomas Munro Reviewed-by: Alvaro Herrera, Andres Freund, Tomas Vondra, Robert Haas and others Discussion: https://postgr.es/m/CACPQ5Fo00QR7LNAcd1ZjgoBi4y97%2BK760YABs0vQHH5dLdkkMA%40mail.gmail.com --- doc/src/sgml/config.sgml | 35 ++++++ src/backend/access/transam/xlog.c | 105 ++++++++++++------ src/backend/utils/misc/guc.c | 20 ++++ src/backend/utils/misc/postgresql.conf.sample | 2 + src/include/access/xlog.h | 2 + 5 files changed, 127 insertions(+), 37 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index d383de2512..2166b99fc4 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3590,6 +3590,41 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows + + wal_init_zero (boolean) + + wal_init_zero configuration parameter + + + + + If set to on (the default), this option causes new + WAL files to be filled with zeroes. On some filesystems, this ensures + that space is allocated before we need to write WAL records. However, + Copy-On-Write (COW) filesystems may not benefit + from this technique, so the option is given to skip the unnecessary + work. If set to off, only the final byte is written + when the file is created so that it has the expected size. + + + + + + wal_recycle (boolean) + + wal_recycle configuration parameter + + + + + If set to on (the default), this option causes WAL + files to be recycled by renaming them, avoiding the need to create new + ones. On COW filesystems, it may be faster to create new ones, so the + option is given to disable this behavior. + + + + wal_sender_timeout (integer) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a181e33dd4..c6ca96079c 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -95,6 +95,8 @@ bool wal_log_hints = false; bool wal_compression = false; char *wal_consistency_checking_string = NULL; bool *wal_consistency_checking = NULL; +bool wal_init_zero = true; +bool wal_recycle = true; bool log_checkpoints = false; int sync_method = DEFAULT_SYNC_METHOD; int wal_level = WAL_LEVEL_MINIMAL; @@ -3209,6 +3211,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) XLogSegNo max_segno; int fd; int nbytes; + int save_errno; XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size); @@ -3248,39 +3251,61 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmppath))); - /* - * Zero-fill the file. We have to do this the hard way to ensure that all - * the file space has really been allocated --- on platforms that allow - * "holes" in files, just seeking to the end doesn't allocate intermediate - * space. This way, we know that we have all the space and (after the - * fsync below) that all the indirect blocks are down on disk. Therefore, - * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the - * log file. - */ memset(zbuffer.data, 0, XLOG_BLCKSZ); - for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) + + pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); + save_errno = 0; + if (wal_init_zero) { - errno = 0; - pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); - if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) + /* + * Zero-fill the file. With this setting, we do this the hard way to + * ensure that all the file space has really been allocated. On + * platforms that allow "holes" in files, just seeking to the end + * doesn't allocate intermediate space. This way, we know that we + * have all the space and (after the fsync below) that all the + * indirect blocks are down on disk. Therefore, fdatasync(2) or + * O_DSYNC will be sufficient to sync future writes to the log file. + */ + for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) { - int save_errno = errno; - - /* - * If we fail to make the file, delete it to release disk space - */ - unlink(tmppath); - - close(fd); - - /* if write didn't set errno, assume problem is no disk space */ - errno = save_errno ? save_errno : ENOSPC; - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); + errno = 0; + if (write(fd, zbuffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ) + { + /* if write didn't set errno, assume no disk space */ + save_errno = errno ? errno : ENOSPC; + break; + } } - pgstat_report_wait_end(); + } + else + { + /* + * Otherwise, seeking to the end and writing a solitary byte is + * enough. + */ + errno = 0; + if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1) + { + /* if write didn't set errno, assume no disk space */ + save_errno = errno ? errno : ENOSPC; + } + } + pgstat_report_wait_end(); + + if (save_errno) + { + /* + * If we fail to make the file, delete it to release disk space + */ + unlink(tmppath); + + close(fd); + + errno = save_errno; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); } pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC); @@ -4049,14 +4074,19 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr) XLogSegNo endlogSegNo; XLogSegNo recycleSegNo; - /* - * Initialize info about where to try to recycle to. - */ - XLByteToSeg(endptr, endlogSegNo, wal_segment_size); - if (RedoRecPtr == InvalidXLogRecPtr) - recycleSegNo = endlogSegNo + 10; + if (wal_recycle) + { + /* + * Initialize info about where to try to recycle to. + */ + XLByteToSeg(endptr, endlogSegNo, wal_segment_size); + if (RedoRecPtr == InvalidXLogRecPtr) + recycleSegNo = endlogSegNo + 10; + else + recycleSegNo = XLOGfileslop(RedoRecPtr); + } else - recycleSegNo = XLOGfileslop(RedoRecPtr); + recycleSegNo = 0; /* keep compiler quiet */ snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname); @@ -4065,7 +4095,8 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr) * segment. Only recycle normal files, pg_standby for example can create * symbolic links pointing to a separate archive directory. */ - if (endlogSegNo <= recycleSegNo && + if (wal_recycle && + endlogSegNo <= recycleSegNo && lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) && InstallXLogFileSegment(&endlogSegNo, path, true, recycleSegNo, true)) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index aa564d153a..cd5a65be75 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1174,6 +1174,26 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"wal_init_zero", PGC_SUSET, WAL_SETTINGS, + gettext_noop("Writes zeroes to new WAL files before first use."), + NULL + }, + &wal_init_zero, + true, + NULL, NULL, NULL + }, + + { + {"wal_recycle", PGC_SUSET, WAL_SETTINGS, + gettext_noop("Recycles WAL files by renaming them."), + NULL + }, + &wal_recycle, + true, + NULL, NULL, NULL + }, + { {"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs each checkpoint."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index cccb5f145a..9b15361403 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -206,6 +206,8 @@ #wal_compression = off # enable compression of full-page writes #wal_log_hints = off # also do full page writes of non-critical updates # (change requires restart) +#wal_init_zero = on # zero-fill new WAL files +#wal_recycle = on # recycle WAL files #wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers # (change requires restart) #wal_writer_delay = 200ms # 1-10000 milliseconds diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index eb6c44649d..2af938bfdc 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -116,6 +116,8 @@ extern bool EnableHotStandby; extern bool fullPageWrites; extern bool wal_log_hints; extern bool wal_compression; +extern bool wal_init_zero; +extern bool wal_recycle; extern bool *wal_consistency_checking; extern char *wal_consistency_checking_string; extern bool log_checkpoints;