Temporarily make pg_ctl and server shutdown a whole lot chattier.

This is a quick hack, due to be reverted when its purpose has been served,
to try to gather information about why some of the buildfarm critters
regularly fail with "postmaster does not shut down" complaints.  Maybe they
are just really overloaded, but maybe something else is going on.  Hence,
instrument pg_ctl to print the current time when it starts waiting for
postmaster shutdown and when it gives up, and add a lot of logging of the
current time in the server's checkpoint and shutdown code paths.

No attempt has been made to make this pretty.  I'm not even totally sure
if it will build on Windows, but we'll soon find out.
This commit is contained in:
Tom Lane 2016-02-08 18:43:11 -05:00
parent 0231f83856
commit 3971f64843
7 changed files with 125 additions and 8 deletions

View File

@ -7927,7 +7927,7 @@ ShutdownXLOG(int code, Datum arg)
{
/* Don't be chatty in standalone mode */
ereport(IsPostmasterEnvironment ? LOG : NOTICE,
(errmsg("shutting down")));
(errmsg("shutting down at %s", current_time_as_str())));
if (RecoveryInProgress())
CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
@ -7943,15 +7943,28 @@ ShutdownXLOG(int code, Datum arg)
RequestXLogSwitch();
CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"shutdown checkpoint complete at %s",
current_time_as_str());
}
ShutdownCLOG();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"ShutdownCLOG() complete at %s",
current_time_as_str());
ShutdownCommitTs();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"ShutdownCommitTs() complete at %s",
current_time_as_str());
ShutdownSUBTRANS();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"ShutdownSUBTRANS() complete at %s",
current_time_as_str());
ShutdownMultiXact();
/* Don't be chatty in standalone mode */
ereport(IsPostmasterEnvironment ? LOG : NOTICE,
(errmsg("database system is shut down")));
(errmsg("database system is shut down at %s", current_time_as_str())));
}
/*
@ -8602,19 +8615,45 @@ CreateEndOfRecoveryRecord(void)
static void
CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
{
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointGuts starting at %s", current_time_as_str());
CheckPointCLOG();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointCLOG() done at %s", current_time_as_str());
CheckPointCommitTs();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointCommitTs() done at %s", current_time_as_str());
CheckPointSUBTRANS();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointSUBTRANS() done at %s", current_time_as_str());
CheckPointMultiXact();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointMultiXact() done at %s", current_time_as_str());
CheckPointPredicate();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointPredicate() done at %s", current_time_as_str());
CheckPointRelationMap();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointRelationMap() done at %s", current_time_as_str());
CheckPointReplicationSlots();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointReplicationSlots() done at %s", current_time_as_str());
CheckPointSnapBuild();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointSnapBuild() done at %s", current_time_as_str());
CheckPointLogicalRewriteHeap();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointLogicalRewriteHeap() done at %s", current_time_as_str());
CheckPointBuffers(flags); /* performs all required fsyncs */
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointBuffers() done at %s", current_time_as_str());
CheckPointReplicationOrigin();
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointReplicationOrigin() done at %s", current_time_as_str());
/* We deliberately delay 2PC checkpointing as long as possible */
CheckPointTwoPhase(checkPointRedo);
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"CheckPointGuts done at %s", current_time_as_str());
}
/*

View File

@ -772,7 +772,8 @@ AutoVacLauncherMain(int argc, char *argv[])
/* Normal exit from the autovac launcher is here */
shutdown:
ereport(LOG,
(errmsg("autovacuum launcher shutting down")));
(errmsg("autovacuum launcher shutting down at %s",
current_time_as_str())));
AutoVacuumShmem->av_launcherpid = 0;
proc_exit(0); /* done */

View File

@ -2538,7 +2538,8 @@ pmdie(SIGNAL_ARGS)
break;
Shutdown = SmartShutdown;
ereport(LOG,
(errmsg("received smart shutdown request")));
(errmsg("received smart shutdown request at %s",
current_time_as_str())));
#ifdef USE_SYSTEMD
sd_notify(0, "STOPPING=1");
#endif
@ -2593,7 +2594,8 @@ pmdie(SIGNAL_ARGS)
break;
Shutdown = FastShutdown;
ereport(LOG,
(errmsg("received fast shutdown request")));
(errmsg("received fast shutdown request at %s",
current_time_as_str())));
#ifdef USE_SYSTEMD
sd_notify(0, "STOPPING=1");
#endif
@ -2656,7 +2658,8 @@ pmdie(SIGNAL_ARGS)
break;
Shutdown = ImmediateShutdown;
ereport(LOG,
(errmsg("received immediate shutdown request")));
(errmsg("received immediate shutdown request at %s",
current_time_as_str())));
#ifdef USE_SYSTEMD
sd_notify(0, "STOPPING=1");
#endif

View File

@ -1691,6 +1691,10 @@ BufferSync(int flags)
UnlockBufHdr(bufHdr);
}
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"BufferSync(%x) beginning to write %d buffers at %s",
flags, num_to_write, current_time_as_str());
if (num_to_write == 0)
return; /* nothing to do */
@ -1745,6 +1749,11 @@ BufferSync(int flags)
if (num_written >= num_to_write)
break;
if ((num_written % 1024) == 0)
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"BufferSync(%x) wrote %d/%d buffers at %s",
flags, num_written, num_to_write, current_time_as_str());
/*
* Sleep to throttle our I/O rate.
*/
@ -1756,6 +1765,10 @@ BufferSync(int flags)
buf_id = 0;
}
elog(IsPostmasterEnvironment ? LOG : NOTICE,
"BufferSync(%x) done, wrote %d/%d buffers at %s",
flags, num_written, num_to_write, current_time_as_str());
/*
* Update checkpoint statistics. As noted above, this doesn't include
* buffers written by other backends or bgwriter scan.

View File

@ -691,6 +691,31 @@ GetUserNameFromId(Oid roleid, bool noerr)
return result;
}
/*
* Quick hack.
*/
const char *
current_time_as_str(void)
{
static char buf[128];
struct timeval now_timeval;
pg_time_t now;
char msbuf[8];
gettimeofday(&now_timeval, NULL);
now = (pg_time_t) now_timeval.tv_sec;
pg_strftime(buf, sizeof(buf),
/* leave room for milliseconds... */
"%Y-%m-%d %H:%M:%S %Z",
pg_localtime(&now, log_timezone));
/* 'paste' milliseconds into place... */
sprintf(msbuf, ".%03d", (int) (now_timeval.tv_usec / 1000));
memcpy(buf + 19, msbuf, 4);
return buf;
}
/*-------------------------------------------------------------------------
* Interlock-file support
@ -724,6 +749,9 @@ UnlinkLockFiles(int status, Datum arg)
}
/* Since we're about to exit, no need to reclaim storage */
lock_files = NIL;
if (IsPostmasterEnvironment)
elog(LOG, "lock files all released at %s", current_time_as_str());
}
/*

View File

@ -946,6 +946,32 @@ do_start(void)
}
/*
* Quick hack.
*/
const char *
current_time_as_str(void)
{
static char buf[128];
struct timeval now_timeval;
time_t now;
char msbuf[8];
gettimeofday(&now_timeval, NULL);
now = (time_t) now_timeval.tv_sec;
strftime(buf, sizeof(buf),
/* leave room for milliseconds... */
"%Y-%m-%d %H:%M:%S %Z",
localtime(&now));
/* 'paste' milliseconds into place... */
sprintf(msbuf, ".%03d", (int) (now_timeval.tv_usec / 1000));
memcpy(buf + 19, msbuf, 4);
return buf;
}
static void
do_stop(void)
{
@ -998,7 +1024,12 @@ do_stop(void)
"Shutdown will not complete until pg_stop_backup() is called.\n\n"));
}
print_msg(_("waiting for server to shut down..."));
if (!silent_mode)
{
fprintf(stdout, _("waiting for server to shut down at %s..."),
current_time_as_str());
fflush(stdout);
}
for (cnt = 0; cnt < wait_seconds; cnt++)
{
@ -1015,7 +1046,8 @@ do_stop(void)
{
print_msg(_(" failed\n"));
write_stderr(_("%s: server does not shut down\n"), progname);
write_stderr(_("%s: server does not shut down at %s\n"),
progname, current_time_as_str());
if (shutdown_mode == SMART_MODE)
write_stderr(_("HINT: The \"-m fast\" option immediately disconnects sessions rather than\n"
"waiting for session-initiated disconnection.\n"));

View File

@ -448,6 +448,7 @@ extern char *local_preload_libraries_string;
#define LOCK_FILE_LINE_LISTEN_ADDR 6
#define LOCK_FILE_LINE_SHMEM_KEY 7
extern const char *current_time_as_str(void);
extern void CreateDataDirLockFile(bool amPostmaster);
extern void CreateSocketLockFile(const char *socketfile, bool amPostmaster,
const char *socketDir);