Arrange to explicitly stop the pgstat processes at the same time we

begin the shutdown checkpoint; there isn't anything left for them to do,
so we may as well ensure that they shut down sooner rather than later.
Per discussion.
This commit is contained in:
Tom Lane 2004-06-14 18:08:19 +00:00
parent f4b0d28aa0
commit bbe42a1514
3 changed files with 93 additions and 91 deletions

View File

@ -13,7 +13,7 @@
*
* Copyright (c) 2001-2003, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.74 2004/06/03 02:08:03 tgl Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.75 2004/06/14 18:08:18 tgl Exp $
* ----------
*/
#include "postgres.h"
@ -65,12 +65,6 @@ bool pgstat_collect_querystring = false;
bool pgstat_collect_tuplelevel = false;
bool pgstat_collect_blocklevel = false;
/* ----------
* Other global variables
* ----------
*/
bool pgstat_is_running = false;
/* ----------
* Local data
* ----------
@ -79,7 +73,6 @@ NON_EXEC_STATIC int pgStatSock = -1;
static int pgStatPipe[2];
static struct sockaddr_storage pgStatAddr;
static int pgStatPid;
static time_t last_pgstat_start_time;
static long pgStatNumMessages = 0;
@ -125,6 +118,7 @@ static void pgstat_parseArgs(int argc, char *argv[]);
NON_EXEC_STATIC void PgstatBufferMain(int argc, char *argv[]);
NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]);
static void pgstat_recvbuffer(void);
static void pgstat_exit(SIGNAL_ARGS);
static void pgstat_die(SIGNAL_ARGS);
static int pgstat_add_backend(PgStat_MsgHdr *msg);
@ -496,19 +490,22 @@ pgstat_parseArgs(int argc, char *argv[])
* Called from postmaster at startup or after an existing collector
* died. Attempt to fire up a fresh statistics collector.
*
* Returns PID of child process, or 0 if fail.
*
* Note: if fail, we will be called again from the postmaster main loop.
* ----------
*/
void
int
pgstat_start(void)
{
time_t curtime;
pid_t pgStatPid;
/*
* Do nothing if no collector needed
*/
if (pgstat_is_running || !pgstat_collect_startcollector)
return;
if (!pgstat_collect_startcollector)
return 0;
/*
* Do nothing if too soon since last collector start. This is a
@ -520,7 +517,7 @@ pgstat_start(void)
curtime = time(NULL);
if ((unsigned int) (curtime - last_pgstat_start_time) <
(unsigned int) PGSTAT_RESTART_INTERVAL)
return;
return 0;
last_pgstat_start_time = curtime;
/*
@ -536,12 +533,11 @@ pgstat_start(void)
* pgstat_collect_startcollector on after it had been off.
*/
pgstat_collect_startcollector = false;
return;
return 0;
}
/*
* Okay, fork off the collector. Remember its PID for
* pgstat_ispgstat.
* Okay, fork off the collector.
*/
fflush(stdout);
@ -553,9 +549,9 @@ pgstat_start(void)
#endif
#ifdef EXEC_BACKEND
switch ((pgStatPid = (int) pgstat_forkexec(STAT_PROC_BUFFER)))
switch ((pgStatPid = pgstat_forkexec(STAT_PROC_BUFFER)))
#else
switch ((pgStatPid = (int) fork()))
switch ((pgStatPid = fork()))
#endif
{
case -1:
@ -565,7 +561,7 @@ pgstat_start(void)
#endif
ereport(LOG,
(errmsg("could not fork statistics buffer: %m")));
return;
return 0;
#ifndef EXEC_BACKEND
case 0:
@ -585,32 +581,11 @@ pgstat_start(void)
#endif
default:
pgstat_is_running = true;
return;
return (int) pgStatPid;
}
}
/* ----------
* pgstat_ispgstat() -
*
* Called from postmaster to check if a terminated child process
* was the statistics collector.
* ----------
*/
bool
pgstat_ispgstat(int pid)
{
if (!pgstat_is_running)
return false;
if (pgStatPid != pid)
return false;
/* Oh dear ... */
pgstat_is_running = false;
return true;
/* shouldn't get here */
return 0;
}
@ -1381,12 +1356,12 @@ PgstatBufferMain(int argc, char *argv[])
/*
* Ignore all signals usually bound to some action in the postmaster,
* except for SIGCHLD --- see pgstat_recvbuffer.
* except for SIGCHLD and SIGQUIT --- see pgstat_recvbuffer.
*/
pqsignal(SIGHUP, SIG_IGN);
pqsignal(SIGINT, SIG_IGN);
pqsignal(SIGTERM, SIG_IGN);
pqsignal(SIGQUIT, SIG_IGN);
pqsignal(SIGQUIT, pgstat_exit);
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, SIG_IGN);
@ -1476,9 +1451,9 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Reset signal handling. With the exception of restoring default
* SIGCHLD handling, this is a no-op in the non-EXEC_BACKEND case
* because we'll have inherited these settings from the buffer process;
* but it's not a no-op for EXEC_BACKEND.
* SIGCHLD and SIGQUIT handling, this is a no-op in the non-EXEC_BACKEND
* case because we'll have inherited these settings from the buffer
* process; but it's not a no-op for EXEC_BACKEND.
*/
pqsignal(SIGHUP, SIG_IGN);
pqsignal(SIGINT, SIG_IGN);
@ -1885,9 +1860,9 @@ pgstat_recvbuffer(void)
}
/*
* Wait for some work to do; but not for more than 10 seconds
* (this determines how quickly we will shut down after postmaster
* termination).
* Wait for some work to do; but not for more than 10 seconds.
* (This determines how quickly we will shut down after an
* ungraceful postmaster termination; so it needn't be very fast.)
*/
timeout.tv_sec = 10;
timeout.tv_usec = 0;
@ -1992,19 +1967,33 @@ pgstat_recvbuffer(void)
/*
* Make sure we forwarded all messages before we check for
* Postmaster termination.
* postmaster termination.
*/
if (msg_have != 0 || FD_ISSET(pgStatSock, &rfds))
continue;
/*
* If the postmaster has terminated, we've done our job.
* If the postmaster has terminated, we die too. (This is no longer
* the normal exit path, however.)
*/
if (!PostmasterIsAlive(true))
exit(0);
}
}
/* SIGQUIT signal handler for buffer process */
static void
pgstat_exit(SIGNAL_ARGS)
{
/*
* For now, we just nail the doors shut and get out of town. It might
* be cleaner to allow any pending messages to be sent, but that creates
* a tradeoff against speed of exit.
*/
exit(0);
}
/* SIGCHLD signal handler for buffer process */
static void
pgstat_die(SIGNAL_ARGS)
{

View File

@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.403 2004/06/11 03:54:43 momjian Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.404 2004/06/14 18:08:19 tgl Exp $
*
* NOTES
*
@ -191,7 +191,8 @@ char *preload_libraries_string = NULL;
/* PIDs of special child processes; 0 when not running */
static pid_t StartupPID = 0,
BgWriterPID = 0;
BgWriterPID = 0,
PgStatPID = 0;
/* Startup/shutdown state */
#define NoShutdown 0
@ -812,10 +813,9 @@ PostmasterMain(int argc, char *argv[])
whereToSendOutput = None;
/*
* Initialize and try to startup the statistics collector process
* Initialize the statistics collector stuff
*/
pgstat_init();
pgstat_start();
/*
* Load cached files for client authentication.
@ -1149,8 +1149,9 @@ ServerLoop(void)
}
/* If we have lost the stats collector, try to start a new one */
if (!pgstat_is_running)
pgstat_start();
if (PgStatPID == 0 &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
PgStatPID = pgstat_start();
/*
* Touch the socket and lock file at least every ten minutes, to ensure
@ -1509,14 +1510,6 @@ processCancelRequest(Port *port, void *pkt)
backendPID = (int) ntohl(canc->backendPID);
cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
if (backendPID == BgWriterPID)
{
ereport(DEBUG2,
(errmsg_internal("ignoring cancel request for bgwriter process %d",
backendPID)));
return;
}
/*
* See if we have a matching backend. In the EXEC_BACKEND case, we
* can no longer access the postmaster's own backend list, and must
@ -1698,6 +1691,7 @@ SIGHUP_handler(SIGNAL_ARGS)
SignalChildren(SIGHUP);
if (BgWriterPID != 0)
kill(BgWriterPID, SIGHUP);
/* PgStatPID does not currently need SIGHUP */
load_hba();
load_ident();
@ -1755,6 +1749,9 @@ pmdie(SIGNAL_ARGS)
/* And tell it to shut down */
if (BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
break;
case SIGINT:
@ -1796,6 +1793,9 @@ pmdie(SIGNAL_ARGS)
/* And tell it to shut down */
if (BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
break;
case SIGQUIT:
@ -1811,6 +1811,8 @@ pmdie(SIGNAL_ARGS)
kill(StartupPID, SIGQUIT);
if (BgWriterPID != 0)
kill(BgWriterPID, SIGQUIT);
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
if (DLGetHead(BackendList))
SignalChildren(SIGQUIT);
ExitPostmaster(0);
@ -1867,19 +1869,6 @@ reaper(SIGNAL_ARGS)
#endif /* WIN32 */
#endif /* HAVE_WAITPID */
/*
* Check if this child was the statistics collector. If so, try to
* start a new one. (If fail, we'll try again in future cycles of
* the main loop.)
*/
if (pgstat_ispgstat(pid))
{
LogChildExit(LOG, gettext("statistics collector process"),
pid, exitstatus);
pgstat_start();
continue;
}
/*
* Check if this child was a startup process.
*/
@ -1909,9 +1898,12 @@ reaper(SIGNAL_ARGS)
/*
* Go to shutdown mode if a shutdown request was pending.
* Otherwise, try to start the stats collector too.
*/
if (Shutdown > NoShutdown && BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
else if (PgStatPID == 0 && Shutdown == NoShutdown)
PgStatPID = pgstat_start();
continue;
}
@ -1921,6 +1913,7 @@ reaper(SIGNAL_ARGS)
*/
if (BgWriterPID != 0 && pid == BgWriterPID)
{
BgWriterPID = 0;
if (exitstatus == 0 && Shutdown > NoShutdown &&
!FatalError && !DLGetHead(BackendList))
{
@ -1936,12 +1929,28 @@ reaper(SIGNAL_ARGS)
/*
* Any unexpected exit of the bgwriter is treated as a crash.
*/
LogChildExit(DEBUG2, gettext("background writer process"),
LogChildExit(LOG, gettext("background writer process"),
pid, exitstatus);
HandleChildCrash(pid, exitstatus);
continue;
}
/*
* Was it the statistics collector? If so, just try to start a new
* one; no need to force reset of the rest of the system. (If fail,
* we'll try again in future cycles of the main loop.)
*/
if (PgStatPID != 0 && pid == PgStatPID)
{
PgStatPID = 0;
if (exitstatus != 0)
LogChildExit(LOG, gettext("statistics collector process"),
pid, exitstatus);
if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
PgStatPID = pgstat_start();
continue;
}
/*
* Else do standard backend child cleanup.
*/
@ -2113,6 +2122,17 @@ HandleChildCrash(int pid,
kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
}
/* Force a power-cycle of the pgstat processes too */
/* (Shouldn't be necessary, but just for luck) */
if (PgStatPID != 0 && !FatalError)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
"SIGQUIT",
(int) PgStatPID)));
kill(PgStatPID, SIGQUIT);
}
FatalError = true;
}
@ -2152,7 +2172,7 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
}
/*
* Send a signal to all backend children.
* Send a signal to all backend children (but NOT special children)
*/
static void
SignalChildren(int signal)
@ -2954,7 +2974,7 @@ PostmasterRandom(void)
}
/*
* Count up number of child processes.
* Count up number of child processes (regular backends only)
*/
static int
CountChildren(void)

View File

@ -5,7 +5,7 @@
*
* Copyright (c) 2001-2003, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/pgstat.h,v 1.23 2004/05/29 22:48:22 tgl Exp $
* $PostgreSQL: pgsql/src/include/pgstat.h,v 1.24 2004/06/14 18:08:19 tgl Exp $
* ----------
*/
#ifndef PGSTAT_H
@ -335,20 +335,13 @@ extern bool pgstat_collect_querystring;
extern bool pgstat_collect_tuplelevel;
extern bool pgstat_collect_blocklevel;
/* ----------
* Other global variables
* ----------
*/
extern bool pgstat_is_running;
/* ----------
* Functions called from postmaster
* ----------
*/
extern void pgstat_init(void);
extern void pgstat_start(void);
extern bool pgstat_ispgstat(int pid);
extern int pgstat_start(void);
extern void pgstat_beterm(int pid);
#ifdef EXEC_BACKEND