diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 0bcc6fd322..4c73fe6ce8 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -6549,6 +6549,30 @@ local0.* /var/log/postgresql + + log_startup_progress_interval (integer) + + log_startup_progress_interval configuration parameter + + + + + Sets the amount of time after which the startup process will log + a message about a long-running operation that is still in progress, + as well as the interval between further progress messages for that + operation. This setting is applied separately to each operation. + For example, if syncing the data directory takes 25 seconds and + thereafter resetting unlogged relations takes 8 seconds, and if this + setting has the default value of 10 seconds, then a messages will be + logged for syncing the data directory after it has been in progress + for 10 seconds and again after it has been in progress for 20 seconds, + but nothing will be logged for resetting unlogged operations. + A setting of 0 disables the feature. If this value + is specified without units, it is taken as milliseconds. + + + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 142506834c..cd553d6e12 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -79,6 +79,7 @@ #include "utils/relmapper.h" #include "utils/pg_rusage.h" #include "utils/snapmgr.h" +#include "utils/timeout.h" #include "utils/timestamp.h" extern uint32 bootstrap_data_checksum_version; @@ -6718,6 +6719,11 @@ StartupXLOG(void) */ ValidateXLOGDirectoryStructure(); + /* Set up timeout handler needed to report startup progress. */ + if (!IsBootstrapProcessingMode()) + RegisterTimeout(STARTUP_PROGRESS_TIMEOUT, + startup_progress_timeout_handler); + /*---------- * If we previously crashed, perform a couple of actions: * @@ -7491,6 +7497,10 @@ StartupXLOG(void) (errmsg("redo starts at %X/%X", LSN_FORMAT_ARGS(ReadRecPtr)))); + /* Prepare to report progress of the redo phase. */ + if (!StandbyMode) + begin_startup_progress_phase(); + /* * main redo apply loop */ @@ -7498,6 +7508,10 @@ StartupXLOG(void) { bool switchedTLI = false; + if (!StandbyMode) + ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X", + LSN_FORMAT_ARGS(ReadRecPtr)); + #ifdef WAL_DEBUG if (XLOG_DEBUG || (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) || diff --git a/src/backend/nls.mk b/src/backend/nls.mk index 771b58d4f4..355299804f 100644 --- a/src/backend/nls.mk +++ b/src/backend/nls.mk @@ -10,13 +10,15 @@ GETTEXT_TRIGGERS = $(BACKEND_COMMON_GETTEXT_TRIGGERS) \ yyerror \ parser_yyerror \ scanner_yyerror \ - report_invalid_record:2 + report_invalid_record:2 \ + ereport_startup_progress GETTEXT_FLAGS = $(BACKEND_COMMON_GETTEXT_FLAGS) \ GUC_check_errmsg:1:c-format \ GUC_check_errdetail:1:c-format \ GUC_check_errhint:1:c-format \ write_stderr:1:c-format \ - report_invalid_record:2:c-format + report_invalid_record:2:c-format \ + ereport_startup_progress:1:c-format gettext-files: distprep find $(srcdir) $(srcdir)/../common $(srcdir)/../port -name '*.c' -print | LC_ALL=C sort >$@ diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 0f4f00d689..28e68dd871 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -59,6 +59,22 @@ static volatile sig_atomic_t promote_signaled = false; */ static volatile sig_atomic_t in_restore_command = false; +/* + * Time at which the most recent startup operation started. + */ +static TimestampTz startup_progress_phase_start_time; + +/* + * Indicates whether the startup progress interval mentioned by the user is + * elapsed or not. TRUE if timeout occurred, FALSE otherwise. + */ +static volatile sig_atomic_t startup_progress_timer_expired = false; + +/* + * Time between progress updates for long-running startup operations. + */ +int log_startup_progress_interval = 10000; /* 10 sec */ + /* Signal handlers */ static void StartupProcTriggerHandler(SIGNAL_ARGS); static void StartupProcSigHupHandler(SIGNAL_ARGS); @@ -282,3 +298,58 @@ ResetPromoteSignaled(void) { promote_signaled = false; } + +/* + * Set a flag indicating that it's time to log a progress report. + */ +void +startup_progress_timeout_handler(void) +{ + startup_progress_timer_expired = true; +} + +/* + * Set the start timestamp of the current operation and enable the timeout. + */ +void +begin_startup_progress_phase(void) +{ + TimestampTz fin_time; + + /* Feature is disabled. */ + if (log_startup_progress_interval == 0) + return; + + startup_progress_phase_start_time = GetCurrentTimestamp(); + fin_time = TimestampTzPlusMilliseconds(startup_progress_phase_start_time, + log_startup_progress_interval); + enable_timeout_every(STARTUP_PROGRESS_TIMEOUT, fin_time, + log_startup_progress_interval); +} + +/* + * Report whether startup progress timeout has occurred. Reset the timer flag + * if it did, set the elapsed time to the out parameters and return true, + * otherwise return false. + */ +bool +has_startup_progress_timeout_expired(long *secs, int *usecs) +{ + long seconds; + int useconds; + TimestampTz now; + + /* No timeout has occurred. */ + if (!startup_progress_timer_expired) + return false; + + /* Calculate the elapsed time. */ + now = GetCurrentTimestamp(); + TimestampDifference(startup_progress_phase_start_time, now, &seconds, &useconds); + + *secs = seconds; + *usecs = useconds; + startup_progress_timer_expired = false; + + return true; +} diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index f9cda6906d..cb1a8dd34f 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -96,6 +96,7 @@ #include "pgstat.h" #include "port/pg_iovec.h" #include "portability/mem.h" +#include "postmaster/startup.h" #include "storage/fd.h" #include "storage/ipc.h" #include "utils/guc.h" @@ -3381,6 +3382,9 @@ do_syncfs(const char *path) { int fd; + ereport_startup_progress("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s", + path); + fd = OpenTransientFile(path, O_RDONLY); if (fd < 0) { @@ -3465,6 +3469,9 @@ SyncDataDirectory(void) * directories. */ + /* Prepare to report progress syncing the data directory via syncfs. */ + begin_startup_progress_phase(); + /* Sync the top level pgdata directory. */ do_syncfs("."); /* If any tablespaces are configured, sync each of those. */ @@ -3487,18 +3494,24 @@ SyncDataDirectory(void) } #endif /* !HAVE_SYNCFS */ +#ifdef PG_FLUSH_DATA_WORKS + /* Prepare to report progress of the pre-fsync phase. */ + begin_startup_progress_phase(); + /* * If possible, hint to the kernel that we're soon going to fsync the data * directory and its contents. Errors in this step are even less * interesting than normal, so log them only at DEBUG1. */ -#ifdef PG_FLUSH_DATA_WORKS walkdir(".", pre_sync_fname, false, DEBUG1); if (xlog_is_symlink) walkdir("pg_wal", pre_sync_fname, false, DEBUG1); walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1); #endif + /* Prepare to report progress syncing the data directory via fsync. */ + begin_startup_progress_phase(); + /* * Now we do the fsync()s in the same order. * @@ -3601,6 +3614,9 @@ pre_sync_fname(const char *fname, bool isdir, int elevel) if (isdir) return; + ereport_startup_progress("syncing data directory (pre-fsync), elapsed time: %ld.%02d s, current path: %s", + fname); + fd = OpenTransientFile(fname, O_RDONLY | PG_BINARY); if (fd < 0) @@ -3630,6 +3646,9 @@ pre_sync_fname(const char *fname, bool isdir, int elevel) static void datadir_fsync_fname(const char *fname, bool isdir, int elevel) { + ereport_startup_progress("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s", + fname); + /* * We want to silently ignoring errors about unreadable files. Pass that * desire on to fsync_fname_ext(). diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index 40c758d789..0ae3fb6902 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -17,6 +17,7 @@ #include #include "common/relpath.h" +#include "postmaster/startup.h" #include "storage/copydir.h" #include "storage/fd.h" #include "storage/reinit.h" @@ -65,6 +66,9 @@ ResetUnloggedRelations(int op) ALLOCSET_DEFAULT_SIZES); oldctx = MemoryContextSwitchTo(tmpctx); + /* Prepare to report progress resetting unlogged relations. */ + begin_startup_progress_phase(); + /* * First process unlogged files in pg_default ($PGDATA/base) */ @@ -136,6 +140,14 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op) snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s", tsdirname, de->d_name); + + if (op & UNLOGGED_RELATION_INIT) + ereport_startup_progress("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s", + dbspace_path); + else if (op & UNLOGGED_RELATION_CLEANUP) + ereport_startup_progress("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s", + dbspace_path); + ResetUnloggedRelationsInDbspaceDir(dbspace_path, op); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index d2ce4a8450..e91d5a3cfd 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -71,6 +71,7 @@ #include "postmaster/bgworker_internals.h" #include "postmaster/bgwriter.h" #include "postmaster/postmaster.h" +#include "postmaster/startup.h" #include "postmaster/syslogger.h" #include "postmaster/walwriter.h" #include "replication/logicallauncher.h" @@ -3570,6 +3571,18 @@ static struct config_int ConfigureNamesInt[] = check_client_connection_check_interval, NULL, NULL }, + { + {"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN, + gettext_noop("Time between progress updates for " + "long-running startup operations."), + gettext_noop("0 turns this feature off."), + GUC_UNIT_MS, + }, + &log_startup_progress_interval, + 10000, 0, INT_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 3fe9a53cb3..1cbc9feeb6 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -523,6 +523,11 @@ # are logged regardless of their duration; 1.0 logs all # statements from all transactions, 0.0 never logs +#log_startup_progress_interval = 10s # Time between progress updates for + # long-running startup operations. + # 0 disables the feature, > 0 indicates + # the interval in milliseconds. + # - What to Log - #debug_print_parse = off diff --git a/src/include/postmaster/startup.h b/src/include/postmaster/startup.h index bf6adf1d36..2fb208bdb5 100644 --- a/src/include/postmaster/startup.h +++ b/src/include/postmaster/startup.h @@ -12,6 +12,19 @@ #ifndef _STARTUP_H #define _STARTUP_H +/* + * Log the startup progress message if a timer has expired. + */ +#define ereport_startup_progress(msg, ...) \ + do { \ + long secs; \ + int usecs; \ + if (has_startup_progress_timeout_expired(&secs, &usecs)) \ + ereport(LOG, errmsg(msg, secs, (usecs / 10000), __VA_ARGS__ )); \ + } while(0) + +extern int log_startup_progress_interval; + extern void HandleStartupProcInterrupts(void); extern void StartupProcessMain(void) pg_attribute_noreturn(); extern void PreRestoreCommand(void); @@ -19,4 +32,8 @@ extern void PostRestoreCommand(void); extern bool IsPromoteSignaled(void); extern void ResetPromoteSignaled(void); +extern void begin_startup_progress_phase(void); +extern void startup_progress_timeout_handler(void); +extern bool has_startup_progress_timeout_expired(long *secs, int *usecs); + #endif /* _STARTUP_H */ diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index 1b13ac96e0..2cbc5de4d9 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -33,6 +33,7 @@ typedef enum TimeoutId IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IDLE_SESSION_TIMEOUT, CLIENT_CONNECTION_CHECK_TIMEOUT, + STARTUP_PROGRESS_TIMEOUT, /* First user-definable timeout reason */ USER_TIMEOUT, /* Maximum number of timeout reasons */