From dbdfd114f34443f1e4ad16ce2721f9817d3b3d80 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Nov 2016 18:36:10 -0500 Subject: [PATCH] Bring some clarity to the defaults for the xxx_flush_after parameters. Instead of confusingly stating platform-dependent defaults for these parameters in the comments in postgresql.conf.sample (with the main entry being a lie on Linux), teach initdb to install the correct platform-dependent value in postgresql.conf, similarly to the way we handle other platform-dependent defaults. This won't do anything for existing 9.6 installations, but since it's effectively only a documentation improvement, that seems OK. Since this requires initdb to have access to the default values, move the #define's for those to pg_config_manual.h; the original placement in bufmgr.h is unworkable because that file can't be included by frontend programs. Adjust the default value for wal_writer_flush_after so that it is 1MB regardless of XLOG_BLCKSZ, conforming to what is stated in both the SGML docs and postgresql.conf. (We could alternatively make it scale with XLOG_BLCKSZ, but I'm not sure I see the point.) Copy-edit related SGML documentation. Fabien Coelho and Tom Lane, per a gripe from Tomas Vondra. Discussion: <30ebc6e3-8358-09cf-44a8-578252938424@2ndquadrant.com> --- doc/src/sgml/config.sgml | 33 ++++++++++--------- src/backend/access/transam/xlog.c | 2 +- src/backend/utils/misc/guc.c | 8 ++--- src/backend/utils/misc/postgresql.conf.sample | 10 +++--- src/bin/initdb/initdb.c | 25 ++++++++++++-- src/include/pg_config_manual.h | 18 ++++++++++ src/include/storage/bufmgr.h | 11 ------- 7 files changed, 66 insertions(+), 41 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index adab2f8378..dcd06634fe 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1903,10 +1903,10 @@ include_dir 'conf.d' , but smaller than the OS's page cache, where performance might degrade. This setting may have no effect on some platforms. The valid range is between - 0, which disables controlled writeback, and + 0, which disables forced writeback, and 2MB. The default is 512kB on Linux, - 0 elsewhere. (Non-default values of - BLCKSZ change the default and maximum.) + 0 elsewhere. (If BLCKSZ is not 8kB, + the default and maximum values scale proportionally to it.) This parameter can only be set in the postgresql.conf file or on the server command line. @@ -2055,10 +2055,10 @@ include_dir 'conf.d' that are bigger than , but smaller than the OS's page cache, where performance might degrade. This setting may have no effect on some platforms. The valid range is - between 0, which disables controlled writeback, - and 2MB. The default is 0 (i.e. no - flush control). (Non-default values of BLCKSZ - change the maximum.) + between 0, which disables forced writeback, + and 2MB. The default is 0, i.e., no + forced writeback. (If BLCKSZ is not 8kB, + the maximum value scales proportionally to it.) @@ -2518,10 +2518,11 @@ include_dir 'conf.d' Specifies how often the WAL writer flushes WAL. After flushing WAL it sleeps for wal_writer_delay milliseconds, unless woken up - by an asynchronously committing transaction. In case the last flush + by an asynchronously committing transaction. If the last flush happened less than wal_writer_delay milliseconds ago and less than wal_writer_flush_after bytes of WAL have been - produced since, WAL is only written to the OS, not flushed to disk. + produced since, then WAL is only written to the operating system, not + flushed to disk. The default value is 200 milliseconds (200ms). Note that on many systems, the effective resolution of sleep delays is 10 milliseconds; setting wal_writer_delay to a value that is @@ -2540,12 +2541,12 @@ include_dir 'conf.d' - Specifies how often the WAL writer flushes WAL. In case the last flush + Specifies how often the WAL writer flushes WAL. If the last flush happened less than wal_writer_delay milliseconds ago and less than wal_writer_flush_after bytes of WAL have been - produced since, WAL is only written to the OS, not flushed to disk. - If wal_writer_flush_after is set to 0 WAL is - flushed every time the WAL writer has written WAL. The default is + produced since, then WAL is only written to the operating system, not + flushed to disk. If wal_writer_flush_after is set + to 0 then WAL data is flushed immediately. The default is 1MB. This parameter can only be set in the postgresql.conf file or on the server command line. @@ -2665,10 +2666,10 @@ include_dir 'conf.d' that are bigger than , but smaller than the OS's page cache, where performance might degrade. This setting may have no effect on some platforms. The valid range is - between 0, which disables controlled writeback, + between 0, which disables forced writeback, and 2MB. The default is 256kB on - Linux, 0 elsewhere. (Non-default values of - BLCKSZ change the default and maximum.) + Linux, 0 elsewhere. (If BLCKSZ is not + 8kB, the default and maximum values scale proportionally to it.) This parameter can only be set in the postgresql.conf file or on the server command line. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ce4f1fc298..084401d2f2 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2759,7 +2759,7 @@ XLogFlush(XLogRecPtr record) * This routine is invoked periodically by the background walwriter process. * * Returns TRUE if there was any work to do, even if we skipped flushing due - * to wal_writer_delay/wal_flush_after. + * to wal_writer_delay/wal_writer_flush_after. */ bool XLogBackgroundFlush(void) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index da74f00ab2..28ebcb6f3f 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2281,7 +2281,6 @@ static struct config_int ConfigureNamesInt[] = GUC_UNIT_BLOCKS }, &checkpoint_flush_after, - /* see bufmgr.h: OS dependent default */ DEFAULT_CHECKPOINT_FLUSH_AFTER, 0, WRITEBACK_MAX_PENDING_FLUSHES, NULL, NULL, NULL }, @@ -2310,12 +2309,12 @@ static struct config_int ConfigureNamesInt[] = { {"wal_writer_flush_after", PGC_SIGHUP, WAL_SETTINGS, - gettext_noop("Amount of WAL written out by WAL writer triggering a flush."), + gettext_noop("Amount of WAL written out by WAL writer that triggers a flush."), NULL, GUC_UNIT_XBLOCKS }, &WalWriterFlushAfter, - 128, 0, INT_MAX, + (1024*1024) / XLOG_BLCKSZ, 0, INT_MAX, NULL, NULL, NULL }, @@ -2439,7 +2438,6 @@ static struct config_int ConfigureNamesInt[] = GUC_UNIT_BLOCKS }, &bgwriter_flush_after, - /* see bufmgr.h: OS dependent default */ DEFAULT_BGWRITER_FLUSH_AFTER, 0, WRITEBACK_MAX_PENDING_FLUSHES, NULL, NULL, NULL }, @@ -2467,7 +2465,7 @@ static struct config_int ConfigureNamesInt[] = GUC_UNIT_BLOCKS }, &backend_flush_after, - 0, 0, WRITEBACK_MAX_PENDING_FLUSHES, + DEFAULT_BACKEND_FLUSH_AFTER, 0, WRITEBACK_MAX_PENDING_FLUSHES, NULL, NULL, NULL }, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 7c2daa54bc..0df15380a9 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -156,8 +156,7 @@ #bgwriter_delay = 200ms # 10-10000ms between rounds #bgwriter_lru_maxpages = 100 # 0-1000 max buffers written/round #bgwriter_lru_multiplier = 2.0 # 0-10.0 multiplier on buffers scanned/round -#bgwriter_flush_after = 0 # 0 disables, - # default is 512kB on linux, 0 otherwise +#bgwriter_flush_after = 0 # measured in pages, 0 disables # - Asynchronous Behavior - @@ -166,7 +165,7 @@ #max_parallel_workers_per_gather = 2 # taken from max_worker_processes #old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate # (change requires restart) -#backend_flush_after = 0 # 0 disables, default is 0 +#backend_flush_after = 0 # measured in pages, 0 disables #------------------------------------------------------------------------------ @@ -196,7 +195,7 @@ #wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers # (change requires restart) #wal_writer_delay = 200ms # 1-10000 milliseconds -#wal_writer_flush_after = 1MB # 0 disables +#wal_writer_flush_after = 1MB # measured in pages, 0 disables #commit_delay = 0 # range 0-100000, in microseconds #commit_siblings = 5 # range 1-1000 @@ -207,8 +206,7 @@ #max_wal_size = 1GB #min_wal_size = 80MB #checkpoint_completion_target = 0.5 # checkpoint target duration, 0.0 - 1.0 -#checkpoint_flush_after = 0 # 0 disables, - # default is 256kB on linux, 0 otherwise +#checkpoint_flush_after = 0 # measured in pages, 0 disables #checkpoint_warning = 30s # 0 disables # - Archiving - diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index c8a8c52c3d..24f9cc8eae 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -64,11 +64,11 @@ #include "common/file_utils.h" #include "common/restricted_token.h" #include "common/username.h" -#include "mb/pg_wchar.h" +#include "fe_utils/string_utils.h" #include "getaddrinfo.h" #include "getopt_long.h" +#include "mb/pg_wchar.h" #include "miscadmin.h" -#include "fe_utils/string_utils.h" /* Ideally this would be in a .h file, but it hardly seems worth the trouble */ @@ -1095,6 +1095,27 @@ setup_config(void) conflines = replace_token(conflines, "#dynamic_shared_memory_type = posix", repltok); +#if DEFAULT_BACKEND_FLUSH_AFTER > 0 + snprintf(repltok, sizeof(repltok), "#backend_flush_after = %dkB", + DEFAULT_BACKEND_FLUSH_AFTER * (BLCKSZ / 1024)); + conflines = replace_token(conflines, "#backend_flush_after = 0", + repltok); +#endif + +#if DEFAULT_BGWRITER_FLUSH_AFTER > 0 + snprintf(repltok, sizeof(repltok), "#bgwriter_flush_after = %dkB", + DEFAULT_BGWRITER_FLUSH_AFTER * (BLCKSZ / 1024)); + conflines = replace_token(conflines, "#bgwriter_flush_after = 0", + repltok); +#endif + +#if DEFAULT_CHECKPOINT_FLUSH_AFTER > 0 + snprintf(repltok, sizeof(repltok), "#checkpoint_flush_after = %dkB", + DEFAULT_CHECKPOINT_FLUSH_AFTER * (BLCKSZ / 1024)); + conflines = replace_token(conflines, "#checkpoint_flush_after = 0", + repltok); +#endif + #ifndef USE_PREFETCH conflines = replace_token(conflines, "#effective_io_concurrency = 1", diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index a2b2b614be..96885bb990 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -147,6 +147,24 @@ #define USE_PREFETCH #endif +/* + * Default and maximum values for backend_flush_after, bgwriter_flush_after + * and checkpoint_flush_after; measured in blocks. Currently, these are + * enabled by default if sync_file_range() exists, ie, only on Linux. Perhaps + * we could also enable by default if we have mmap and msync(MS_ASYNC)? + */ +#ifdef HAVE_SYNC_FILE_RANGE +#define DEFAULT_BACKEND_FLUSH_AFTER 0 /* never enabled by default */ +#define DEFAULT_BGWRITER_FLUSH_AFTER 64 +#define DEFAULT_CHECKPOINT_FLUSH_AFTER 32 +#else +#define DEFAULT_BACKEND_FLUSH_AFTER 0 +#define DEFAULT_BGWRITER_FLUSH_AFTER 0 +#define DEFAULT_CHECKPOINT_FLUSH_AFTER 0 +#endif +/* upper limit for all three variables */ +#define WRITEBACK_MAX_PENDING_FLUSHES 256 + /* * USE_SSL code should be compiled only when compiling with an SSL * implementation. (Currently, only OpenSSL is supported, but we might add diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 821bee5ece..c543ad6fde 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -54,17 +54,6 @@ struct WritebackContext; extern PGDLLIMPORT int NBuffers; /* in bufmgr.c */ -#define WRITEBACK_MAX_PENDING_FLUSHES 256 - -/* FIXME: Also default to on for mmap && msync(MS_ASYNC)? */ -#ifdef HAVE_SYNC_FILE_RANGE -#define DEFAULT_CHECKPOINT_FLUSH_AFTER 32 -#define DEFAULT_BGWRITER_FLUSH_AFTER 64 -#else -#define DEFAULT_CHECKPOINT_FLUSH_AFTER 0 -#define DEFAULT_BGWRITER_FLUSH_AFTER 0 -#endif /* HAVE_SYNC_FILE_RANGE */ - extern bool zero_damaged_pages; extern int bgwriter_lru_maxpages; extern double bgwriter_lru_multiplier;