Add wal_recycle and wal_init_zero GUCs.

On at least ZFS, it can be beneficial to create new WAL files every
time and not to bother zero-filling them.  Since it's not clear which
other filesystems might benefit from one or both of those things,
add individual GUCs to control those two behaviors independently and
make only very general statements in the docs.

Author: Jerry Jelinek, with some adjustments by Thomas Munro
Reviewed-by: Alvaro Herrera, Andres Freund, Tomas Vondra, Robert Haas and others
Discussion: https://postgr.es/m/CACPQ5Fo00QR7LNAcd1ZjgoBi4y97%2BK760YABs0vQHH5dLdkkMA%40mail.gmail.com
This commit is contained in:
Thomas Munro 2019-04-02 14:37:14 +13:00
parent 4b82664156
commit 475861b261
5 changed files with 127 additions and 37 deletions

View File

@ -3590,6 +3590,41 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
</listitem>
</varlistentry>
<varlistentry id="guc-wal-init-zero" xreflabel="wal_init_zero">
<term><varname>wal_init_zero</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>wal_init_zero</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
If set to <literal>on</literal> (the default), this option causes new
WAL files to be filled with zeroes. On some filesystems, this ensures
that space is allocated before we need to write WAL records. However,
<firstterm>Copy-On-Write</firstterm> (COW) filesystems may not benefit
from this technique, so the option is given to skip the unnecessary
work. If set to <literal>off</literal>, only the final byte is written
when the file is created so that it has the expected size.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-wal-recycle" xreflabel="wal_recycle">
<term><varname>wal_recycle</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>wal_recycle</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
If set to <literal>on</literal> (the default), this option causes WAL
files to be recycled by renaming them, avoiding the need to create new
ones. On COW filesystems, it may be faster to create new ones, so the
option is given to disable this behavior.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-wal-sender-timeout" xreflabel="wal_sender_timeout">
<term><varname>wal_sender_timeout</varname> (<type>integer</type>)
<indexterm>

View File

@ -95,6 +95,8 @@ bool wal_log_hints = false;
bool wal_compression = false;
char *wal_consistency_checking_string = NULL;
bool *wal_consistency_checking = NULL;
bool wal_init_zero = true;
bool wal_recycle = true;
bool log_checkpoints = false;
int sync_method = DEFAULT_SYNC_METHOD;
int wal_level = WAL_LEVEL_MINIMAL;
@ -3209,6 +3211,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
XLogSegNo max_segno;
int fd;
int nbytes;
int save_errno;
XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size);
@ -3248,39 +3251,61 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", tmppath)));
/*
* Zero-fill the file. We have to do this the hard way to ensure that all
* the file space has really been allocated --- on platforms that allow
* "holes" in files, just seeking to the end doesn't allocate intermediate
* space. This way, we know that we have all the space and (after the
* fsync below) that all the indirect blocks are down on disk. Therefore,
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
* log file.
*/
memset(zbuffer.data, 0, XLOG_BLCKSZ);
for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ)
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
save_errno = 0;
if (wal_init_zero)
{
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
/*
* Zero-fill the file. With this setting, we do this the hard way to
* ensure that all the file space has really been allocated. On
* platforms that allow "holes" in files, just seeking to the end
* doesn't allocate intermediate space. This way, we know that we
* have all the space and (after the fsync below) that all the
* indirect blocks are down on disk. Therefore, fdatasync(2) or
* O_DSYNC will be sufficient to sync future writes to the log file.
*/
for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ)
{
int save_errno = errno;
/*
* If we fail to make the file, delete it to release disk space
*/
unlink(tmppath);
close(fd);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
errno = 0;
if (write(fd, zbuffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
/* if write didn't set errno, assume no disk space */
save_errno = errno ? errno : ENOSPC;
break;
}
}
pgstat_report_wait_end();
}
else
{
/*
* Otherwise, seeking to the end and writing a solitary byte is
* enough.
*/
errno = 0;
if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1)
{
/* if write didn't set errno, assume no disk space */
save_errno = errno ? errno : ENOSPC;
}
}
pgstat_report_wait_end();
if (save_errno)
{
/*
* If we fail to make the file, delete it to release disk space
*/
unlink(tmppath);
close(fd);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
@ -4049,14 +4074,19 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
XLogSegNo endlogSegNo;
XLogSegNo recycleSegNo;
/*
* Initialize info about where to try to recycle to.
*/
XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
if (RedoRecPtr == InvalidXLogRecPtr)
recycleSegNo = endlogSegNo + 10;
if (wal_recycle)
{
/*
* Initialize info about where to try to recycle to.
*/
XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
if (RedoRecPtr == InvalidXLogRecPtr)
recycleSegNo = endlogSegNo + 10;
else
recycleSegNo = XLOGfileslop(RedoRecPtr);
}
else
recycleSegNo = XLOGfileslop(RedoRecPtr);
recycleSegNo = 0; /* keep compiler quiet */
snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
@ -4065,7 +4095,8 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
* segment. Only recycle normal files, pg_standby for example can create
* symbolic links pointing to a separate archive directory.
*/
if (endlogSegNo <= recycleSegNo &&
if (wal_recycle &&
endlogSegNo <= recycleSegNo &&
lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
InstallXLogFileSegment(&endlogSegNo, path,
true, recycleSegNo, true))

View File

@ -1174,6 +1174,26 @@ static struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
{
{"wal_init_zero", PGC_SUSET, WAL_SETTINGS,
gettext_noop("Writes zeroes to new WAL files before first use."),
NULL
},
&wal_init_zero,
true,
NULL, NULL, NULL
},
{
{"wal_recycle", PGC_SUSET, WAL_SETTINGS,
gettext_noop("Recycles WAL files by renaming them."),
NULL
},
&wal_recycle,
true,
NULL, NULL, NULL
},
{
{"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT,
gettext_noop("Logs each checkpoint."),

View File

@ -206,6 +206,8 @@
#wal_compression = off # enable compression of full-page writes
#wal_log_hints = off # also do full page writes of non-critical updates
# (change requires restart)
#wal_init_zero = on # zero-fill new WAL files
#wal_recycle = on # recycle WAL files
#wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers
# (change requires restart)
#wal_writer_delay = 200ms # 1-10000 milliseconds

View File

@ -116,6 +116,8 @@ extern bool EnableHotStandby;
extern bool fullPageWrites;
extern bool wal_log_hints;
extern bool wal_compression;
extern bool wal_init_zero;
extern bool wal_recycle;
extern bool *wal_consistency_checking;
extern char *wal_consistency_checking_string;
extern bool log_checkpoints;