Report progress of startup operations that take a long time.

Users sometimes get concerned whe they start the server and it
emits a few messages and then doesn't emit any more messages for
a long time. Generally, what's happening is either that the
system is taking a long time to apply WAL, or it's taking a
long time to reset unlogged relations, or it's taking a long
time to fsync the data directory, but it's not easy to tell
which is the case.

To fix that, add a new 'log_startup_progress_interval' setting,
by default 10s. When an operation that is known to be potentially
long-running takes more than this amount of time, we'll log a
status update each time this interval elapses.

To avoid undesirable log chatter, don't log anything about WAL
replay when in standby mode.

Nitin Jadhav and Robert Haas, reviewed by Amul Sul, Bharath
Rupireddy, Justin Pryzby, Michael Paquier, and Álvaro Herrera.

Discussion: https://postgr.es/m/CA+TgmoaHQrgDFOBwgY16XCoMtXxsrVGFB2jNCvb7-ubuEe1MGg@mail.gmail.com
Discussion: https://postgr.es/m/CAMm1aWaHF7VE69572_OLQ+MgpT5RUiUDgF1x5RrtkJBLdpRj3Q@mail.gmail.com
This commit is contained in:
Robert Haas 2021-10-25 11:51:57 -04:00
parent 732e6677a6
commit 9ce346eabf
10 changed files with 181 additions and 3 deletions

View File

@ -6549,6 +6549,30 @@ local0.* /var/log/postgresql
</listitem>
</varlistentry>
<varlistentry id="guc-log-startup-progress-interval" xreflabel="log_startup_progress_interval">
<term><varname>log_startup_progress_interval</varname> (<type>integer</type>)
<indexterm>
<primary><varname>log_startup_progress_interval</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Sets the amount of time after which the startup process will log
a message about a long-running operation that is still in progress,
as well as the interval between further progress messages for that
operation. This setting is applied separately to each operation.
For example, if syncing the data directory takes 25 seconds and
thereafter resetting unlogged relations takes 8 seconds, and if this
setting has the default value of 10 seconds, then a messages will be
logged for syncing the data directory after it has been in progress
for 10 seconds and again after it has been in progress for 20 seconds,
but nothing will be logged for resetting unlogged operations.
A setting of <literal>0</literal> disables the feature. If this value
is specified without units, it is taken as milliseconds.
</para>
</listitem>
</varlistentry>
</variablelist>
<para>

View File

@ -79,6 +79,7 @@
#include "utils/relmapper.h"
#include "utils/pg_rusage.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
extern uint32 bootstrap_data_checksum_version;
@ -6718,6 +6719,11 @@ StartupXLOG(void)
*/
ValidateXLOGDirectoryStructure();
/* Set up timeout handler needed to report startup progress. */
if (!IsBootstrapProcessingMode())
RegisterTimeout(STARTUP_PROGRESS_TIMEOUT,
startup_progress_timeout_handler);
/*----------
* If we previously crashed, perform a couple of actions:
*
@ -7491,6 +7497,10 @@ StartupXLOG(void)
(errmsg("redo starts at %X/%X",
LSN_FORMAT_ARGS(ReadRecPtr))));
/* Prepare to report progress of the redo phase. */
if (!StandbyMode)
begin_startup_progress_phase();
/*
* main redo apply loop
*/
@ -7498,6 +7508,10 @@ StartupXLOG(void)
{
bool switchedTLI = false;
if (!StandbyMode)
ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
LSN_FORMAT_ARGS(ReadRecPtr));
#ifdef WAL_DEBUG
if (XLOG_DEBUG ||
(rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||

View File

@ -10,13 +10,15 @@ GETTEXT_TRIGGERS = $(BACKEND_COMMON_GETTEXT_TRIGGERS) \
yyerror \
parser_yyerror \
scanner_yyerror \
report_invalid_record:2
report_invalid_record:2 \
ereport_startup_progress
GETTEXT_FLAGS = $(BACKEND_COMMON_GETTEXT_FLAGS) \
GUC_check_errmsg:1:c-format \
GUC_check_errdetail:1:c-format \
GUC_check_errhint:1:c-format \
write_stderr:1:c-format \
report_invalid_record:2:c-format
report_invalid_record:2:c-format \
ereport_startup_progress:1:c-format
gettext-files: distprep
find $(srcdir) $(srcdir)/../common $(srcdir)/../port -name '*.c' -print | LC_ALL=C sort >$@

View File

@ -59,6 +59,22 @@ static volatile sig_atomic_t promote_signaled = false;
*/
static volatile sig_atomic_t in_restore_command = false;
/*
* Time at which the most recent startup operation started.
*/
static TimestampTz startup_progress_phase_start_time;
/*
* Indicates whether the startup progress interval mentioned by the user is
* elapsed or not. TRUE if timeout occurred, FALSE otherwise.
*/
static volatile sig_atomic_t startup_progress_timer_expired = false;
/*
* Time between progress updates for long-running startup operations.
*/
int log_startup_progress_interval = 10000; /* 10 sec */
/* Signal handlers */
static void StartupProcTriggerHandler(SIGNAL_ARGS);
static void StartupProcSigHupHandler(SIGNAL_ARGS);
@ -282,3 +298,58 @@ ResetPromoteSignaled(void)
{
promote_signaled = false;
}
/*
* Set a flag indicating that it's time to log a progress report.
*/
void
startup_progress_timeout_handler(void)
{
startup_progress_timer_expired = true;
}
/*
* Set the start timestamp of the current operation and enable the timeout.
*/
void
begin_startup_progress_phase(void)
{
TimestampTz fin_time;
/* Feature is disabled. */
if (log_startup_progress_interval == 0)
return;
startup_progress_phase_start_time = GetCurrentTimestamp();
fin_time = TimestampTzPlusMilliseconds(startup_progress_phase_start_time,
log_startup_progress_interval);
enable_timeout_every(STARTUP_PROGRESS_TIMEOUT, fin_time,
log_startup_progress_interval);
}
/*
* Report whether startup progress timeout has occurred. Reset the timer flag
* if it did, set the elapsed time to the out parameters and return true,
* otherwise return false.
*/
bool
has_startup_progress_timeout_expired(long *secs, int *usecs)
{
long seconds;
int useconds;
TimestampTz now;
/* No timeout has occurred. */
if (!startup_progress_timer_expired)
return false;
/* Calculate the elapsed time. */
now = GetCurrentTimestamp();
TimestampDifference(startup_progress_phase_start_time, now, &seconds, &useconds);
*secs = seconds;
*usecs = useconds;
startup_progress_timer_expired = false;
return true;
}

View File

@ -96,6 +96,7 @@
#include "pgstat.h"
#include "port/pg_iovec.h"
#include "portability/mem.h"
#include "postmaster/startup.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "utils/guc.h"
@ -3381,6 +3382,9 @@ do_syncfs(const char *path)
{
int fd;
ereport_startup_progress("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s",
path);
fd = OpenTransientFile(path, O_RDONLY);
if (fd < 0)
{
@ -3465,6 +3469,9 @@ SyncDataDirectory(void)
* directories.
*/
/* Prepare to report progress syncing the data directory via syncfs. */
begin_startup_progress_phase();
/* Sync the top level pgdata directory. */
do_syncfs(".");
/* If any tablespaces are configured, sync each of those. */
@ -3487,18 +3494,24 @@ SyncDataDirectory(void)
}
#endif /* !HAVE_SYNCFS */
#ifdef PG_FLUSH_DATA_WORKS
/* Prepare to report progress of the pre-fsync phase. */
begin_startup_progress_phase();
/*
* If possible, hint to the kernel that we're soon going to fsync the data
* directory and its contents. Errors in this step are even less
* interesting than normal, so log them only at DEBUG1.
*/
#ifdef PG_FLUSH_DATA_WORKS
walkdir(".", pre_sync_fname, false, DEBUG1);
if (xlog_is_symlink)
walkdir("pg_wal", pre_sync_fname, false, DEBUG1);
walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1);
#endif
/* Prepare to report progress syncing the data directory via fsync. */
begin_startup_progress_phase();
/*
* Now we do the fsync()s in the same order.
*
@ -3601,6 +3614,9 @@ pre_sync_fname(const char *fname, bool isdir, int elevel)
if (isdir)
return;
ereport_startup_progress("syncing data directory (pre-fsync), elapsed time: %ld.%02d s, current path: %s",
fname);
fd = OpenTransientFile(fname, O_RDONLY | PG_BINARY);
if (fd < 0)
@ -3630,6 +3646,9 @@ pre_sync_fname(const char *fname, bool isdir, int elevel)
static void
datadir_fsync_fname(const char *fname, bool isdir, int elevel)
{
ereport_startup_progress("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s",
fname);
/*
* We want to silently ignoring errors about unreadable files. Pass that
* desire on to fsync_fname_ext().

View File

@ -17,6 +17,7 @@
#include <unistd.h>
#include "common/relpath.h"
#include "postmaster/startup.h"
#include "storage/copydir.h"
#include "storage/fd.h"
#include "storage/reinit.h"
@ -65,6 +66,9 @@ ResetUnloggedRelations(int op)
ALLOCSET_DEFAULT_SIZES);
oldctx = MemoryContextSwitchTo(tmpctx);
/* Prepare to report progress resetting unlogged relations. */
begin_startup_progress_phase();
/*
* First process unlogged files in pg_default ($PGDATA/base)
*/
@ -136,6 +140,14 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
tsdirname, de->d_name);
if (op & UNLOGGED_RELATION_INIT)
ereport_startup_progress("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s",
dbspace_path);
else if (op & UNLOGGED_RELATION_CLEANUP)
ereport_startup_progress("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s",
dbspace_path);
ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
}

View File

@ -71,6 +71,7 @@
#include "postmaster/bgworker_internals.h"
#include "postmaster/bgwriter.h"
#include "postmaster/postmaster.h"
#include "postmaster/startup.h"
#include "postmaster/syslogger.h"
#include "postmaster/walwriter.h"
#include "replication/logicallauncher.h"
@ -3570,6 +3571,18 @@ static struct config_int ConfigureNamesInt[] =
check_client_connection_check_interval, NULL, NULL
},
{
{"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN,
gettext_noop("Time between progress updates for "
"long-running startup operations."),
gettext_noop("0 turns this feature off."),
GUC_UNIT_MS,
},
&log_startup_progress_interval,
10000, 0, INT_MAX,
NULL, NULL, NULL
},
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL

View File

@ -523,6 +523,11 @@
# are logged regardless of their duration; 1.0 logs all
# statements from all transactions, 0.0 never logs
#log_startup_progress_interval = 10s # Time between progress updates for
# long-running startup operations.
# 0 disables the feature, > 0 indicates
# the interval in milliseconds.
# - What to Log -
#debug_print_parse = off

View File

@ -12,6 +12,19 @@
#ifndef _STARTUP_H
#define _STARTUP_H
/*
* Log the startup progress message if a timer has expired.
*/
#define ereport_startup_progress(msg, ...) \
do { \
long secs; \
int usecs; \
if (has_startup_progress_timeout_expired(&secs, &usecs)) \
ereport(LOG, errmsg(msg, secs, (usecs / 10000), __VA_ARGS__ )); \
} while(0)
extern int log_startup_progress_interval;
extern void HandleStartupProcInterrupts(void);
extern void StartupProcessMain(void) pg_attribute_noreturn();
extern void PreRestoreCommand(void);
@ -19,4 +32,8 @@ extern void PostRestoreCommand(void);
extern bool IsPromoteSignaled(void);
extern void ResetPromoteSignaled(void);
extern void begin_startup_progress_phase(void);
extern void startup_progress_timeout_handler(void);
extern bool has_startup_progress_timeout_expired(long *secs, int *usecs);
#endif /* _STARTUP_H */

View File

@ -33,6 +33,7 @@ typedef enum TimeoutId
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
IDLE_SESSION_TIMEOUT,
CLIENT_CONNECTION_CHECK_TIMEOUT,
STARTUP_PROGRESS_TIMEOUT,
/* First user-definable timeout reason */
USER_TIMEOUT,
/* Maximum number of timeout reasons */