diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index d383de2512..2166b99fc4 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3590,6 +3590,41 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows + + wal_init_zero (boolean) + + wal_init_zero configuration parameter + + + + + If set to on (the default), this option causes new + WAL files to be filled with zeroes. On some filesystems, this ensures + that space is allocated before we need to write WAL records. However, + Copy-On-Write (COW) filesystems may not benefit + from this technique, so the option is given to skip the unnecessary + work. If set to off, only the final byte is written + when the file is created so that it has the expected size. + + + + + + wal_recycle (boolean) + + wal_recycle configuration parameter + + + + + If set to on (the default), this option causes WAL + files to be recycled by renaming them, avoiding the need to create new + ones. On COW filesystems, it may be faster to create new ones, so the + option is given to disable this behavior. + + + + wal_sender_timeout (integer) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a181e33dd4..c6ca96079c 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -95,6 +95,8 @@ bool wal_log_hints = false; bool wal_compression = false; char *wal_consistency_checking_string = NULL; bool *wal_consistency_checking = NULL; +bool wal_init_zero = true; +bool wal_recycle = true; bool log_checkpoints = false; int sync_method = DEFAULT_SYNC_METHOD; int wal_level = WAL_LEVEL_MINIMAL; @@ -3209,6 +3211,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) XLogSegNo max_segno; int fd; int nbytes; + int save_errno; XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size); @@ -3248,39 +3251,61 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmppath))); - /* - * Zero-fill the file. We have to do this the hard way to ensure that all - * the file space has really been allocated --- on platforms that allow - * "holes" in files, just seeking to the end doesn't allocate intermediate - * space. This way, we know that we have all the space and (after the - * fsync below) that all the indirect blocks are down on disk. Therefore, - * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the - * log file. - */ memset(zbuffer.data, 0, XLOG_BLCKSZ); - for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) + + pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); + save_errno = 0; + if (wal_init_zero) { - errno = 0; - pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); - if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) + /* + * Zero-fill the file. With this setting, we do this the hard way to + * ensure that all the file space has really been allocated. On + * platforms that allow "holes" in files, just seeking to the end + * doesn't allocate intermediate space. This way, we know that we + * have all the space and (after the fsync below) that all the + * indirect blocks are down on disk. Therefore, fdatasync(2) or + * O_DSYNC will be sufficient to sync future writes to the log file. + */ + for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) { - int save_errno = errno; - - /* - * If we fail to make the file, delete it to release disk space - */ - unlink(tmppath); - - close(fd); - - /* if write didn't set errno, assume problem is no disk space */ - errno = save_errno ? save_errno : ENOSPC; - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); + errno = 0; + if (write(fd, zbuffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ) + { + /* if write didn't set errno, assume no disk space */ + save_errno = errno ? errno : ENOSPC; + break; + } } - pgstat_report_wait_end(); + } + else + { + /* + * Otherwise, seeking to the end and writing a solitary byte is + * enough. + */ + errno = 0; + if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1) + { + /* if write didn't set errno, assume no disk space */ + save_errno = errno ? errno : ENOSPC; + } + } + pgstat_report_wait_end(); + + if (save_errno) + { + /* + * If we fail to make the file, delete it to release disk space + */ + unlink(tmppath); + + close(fd); + + errno = save_errno; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); } pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC); @@ -4049,14 +4074,19 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr) XLogSegNo endlogSegNo; XLogSegNo recycleSegNo; - /* - * Initialize info about where to try to recycle to. - */ - XLByteToSeg(endptr, endlogSegNo, wal_segment_size); - if (RedoRecPtr == InvalidXLogRecPtr) - recycleSegNo = endlogSegNo + 10; + if (wal_recycle) + { + /* + * Initialize info about where to try to recycle to. + */ + XLByteToSeg(endptr, endlogSegNo, wal_segment_size); + if (RedoRecPtr == InvalidXLogRecPtr) + recycleSegNo = endlogSegNo + 10; + else + recycleSegNo = XLOGfileslop(RedoRecPtr); + } else - recycleSegNo = XLOGfileslop(RedoRecPtr); + recycleSegNo = 0; /* keep compiler quiet */ snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname); @@ -4065,7 +4095,8 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr) * segment. Only recycle normal files, pg_standby for example can create * symbolic links pointing to a separate archive directory. */ - if (endlogSegNo <= recycleSegNo && + if (wal_recycle && + endlogSegNo <= recycleSegNo && lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) && InstallXLogFileSegment(&endlogSegNo, path, true, recycleSegNo, true)) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index aa564d153a..cd5a65be75 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1174,6 +1174,26 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"wal_init_zero", PGC_SUSET, WAL_SETTINGS, + gettext_noop("Writes zeroes to new WAL files before first use."), + NULL + }, + &wal_init_zero, + true, + NULL, NULL, NULL + }, + + { + {"wal_recycle", PGC_SUSET, WAL_SETTINGS, + gettext_noop("Recycles WAL files by renaming them."), + NULL + }, + &wal_recycle, + true, + NULL, NULL, NULL + }, + { {"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs each checkpoint."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index cccb5f145a..9b15361403 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -206,6 +206,8 @@ #wal_compression = off # enable compression of full-page writes #wal_log_hints = off # also do full page writes of non-critical updates # (change requires restart) +#wal_init_zero = on # zero-fill new WAL files +#wal_recycle = on # recycle WAL files #wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers # (change requires restart) #wal_writer_delay = 200ms # 1-10000 milliseconds diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index eb6c44649d..2af938bfdc 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -116,6 +116,8 @@ extern bool EnableHotStandby; extern bool fullPageWrites; extern bool wal_log_hints; extern bool wal_compression; +extern bool wal_init_zero; +extern bool wal_recycle; extern bool *wal_consistency_checking; extern char *wal_consistency_checking_string; extern bool log_checkpoints;