Change the pgstat logic so that the stats collector writes the stats file only

upon requests from backends, rather than on a fixed 500msec cycle.  (There's
still throttling logic to ensure it writes no more often than once per
500msec, though.)  This should result in a significant reduction in stats file
write traffic in typical scenarios where the stats are demanded only
infrequently.

This approach also means that the former difficulty with changing
stats_temp_directory on-the-fly has gone away, so remove the caution about
that as well as the thrashing we did to minimize the trouble window.

In passing, also fix pgstat_report_stat() so that we will send a stats
message if we have function call stats but not table stats to report;
this fixes a bug in the recent patch to support function-call stats.

Martin Pihlak
This commit is contained in:
Tom Lane 2008-11-03 01:17:08 +00:00
parent d7112cfa88
commit 3c2313f481
3 changed files with 204 additions and 65 deletions

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.191 2008/09/30 10:52:09 heikki Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.192 2008/11/03 01:17:08 tgl Exp $ -->
<chapter Id="runtime-config">
<title>Server Configuration</title>
@ -3347,9 +3347,6 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
improved performance.
This parameter can only be set in the <filename>postgresql.conf</>
file or on the server command line.
If this parameter is changed while the server is running, there is a
small window of time until the new statistics file has been written
during which the statistics functions might return no information.
</para>
</listitem>
</varlistentry>

View File

@ -13,7 +13,7 @@
*
* Copyright (c) 2001-2008, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.181 2008/08/25 18:55:43 mha Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.182 2008/11/03 01:17:08 tgl Exp $
* ----------
*/
#include "postgres.h"
@ -75,8 +75,14 @@
* Timer definitions.
* ----------
*/
#define PGSTAT_STAT_INTERVAL 500 /* How often to write the status file;
* in milliseconds. */
#define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file
* updates; in milliseconds. */
#define PGSTAT_RETRY_DELAY 10 /* How long to wait between statistics
* update requests; in milliseconds. */
#define PGSTAT_MAX_WAIT_TIME 5000 /* Maximum time to wait for a stats
* file update; in milliseconds. */
#define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to restart a
* failed statistics collector; in
@ -85,6 +91,8 @@
#define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
* death; in seconds. */
#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
/* ----------
* The initial size hints for the hash tables used in the collector.
@ -158,6 +166,12 @@ static TabStatusArray *pgStatTabList = NULL;
*/
static HTAB *pgStatFunctions = NULL;
/*
* Indicates if backend has some function stats that it hasn't yet
* sent to the collector.
*/
static bool have_function_stats = false;
/*
* Tuple insertion/deletion counts for an open transaction can't be propagated
* into PgStat_TableStatus counters until we know if it is going to commit
@ -201,8 +215,12 @@ static int localNumBackends = 0;
*/
static PgStat_GlobalStats globalStats;
/* Last time the collector successfully wrote the stats file */
static TimestampTz last_statwrite;
/* Latest statistics request time from backends */
static TimestampTz last_statrequest;
static volatile bool need_exit = false;
static volatile bool need_statwrite = false;
static volatile bool got_SIGHUP = false;
/*
@ -223,7 +241,6 @@ static pid_t pgstat_forkexec(void);
NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]);
static void pgstat_exit(SIGNAL_ARGS);
static void force_statwrite(SIGNAL_ARGS);
static void pgstat_beshutdown_hook(int code, Datum arg);
static void pgstat_sighup_handler(SIGNAL_ARGS);
@ -244,6 +261,7 @@ static void pgstat_setup_memcxt(void);
static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
static void pgstat_send(void *msg, int len);
static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len);
static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
@ -655,9 +673,8 @@ pgstat_report_stat(bool force)
int i;
/* Don't expend a clock check if nothing to do */
/* Note: we ignore pending function stats in this test ... OK? */
if (pgStatTabList == NULL ||
pgStatTabList->tsa_used == 0)
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0)
&& !have_function_stats)
return;
/*
@ -823,6 +840,8 @@ pgstat_send_funcstats(void)
if (msg.m_nentries > 0)
pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
msg.m_nentries * sizeof(PgStat_FunctionEntry));
have_function_stats = false;
}
@ -1242,6 +1261,24 @@ pgstat_ping(void)
pgstat_send(&msg, sizeof(msg));
}
/* ----------
* pgstat_send_inquiry() -
*
* Notify collector that we need fresh data.
* ts specifies the minimum acceptable timestamp for the stats file.
* ----------
*/
static void
pgstat_send_inquiry(TimestampTz ts)
{
PgStat_MsgInquiry msg;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
msg.inquiry_time = ts;
pgstat_send(&msg, sizeof(msg));
}
/*
* Initialize function call usage data.
* Called by the executor before invoking a function.
@ -1341,6 +1378,9 @@ pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu, bool finalize)
fs->f_numcalls++;
fs->f_time = f_total;
INSTR_TIME_ADD(fs->f_time_self, f_self);
/* indicate that we have something to send */
have_function_stats = true;
}
@ -2538,8 +2578,6 @@ pgstat_send_bgwriter(void)
NON_EXEC_STATIC void
PgstatCollectorMain(int argc, char *argv[])
{
struct itimerval write_timeout;
bool need_timer = false;
int len;
PgStat_Msg msg;
@ -2571,13 +2609,13 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Ignore all signals usually bound to some action in the postmaster,
* except SIGQUIT and SIGALRM.
* except SIGQUIT.
*/
pqsignal(SIGHUP, pgstat_sighup_handler);
pqsignal(SIGINT, SIG_IGN);
pqsignal(SIGTERM, SIG_IGN);
pqsignal(SIGQUIT, pgstat_exit);
pqsignal(SIGALRM, force_statwrite);
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, SIG_IGN);
pqsignal(SIGUSR2, SIG_IGN);
@ -2596,12 +2634,8 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Arrange to write the initial status file right away
*/
need_statwrite = true;
/* Preset the delay between status file writes */
MemSet(&write_timeout, 0, sizeof(struct itimerval));
write_timeout.it_value.tv_sec = PGSTAT_STAT_INTERVAL / 1000;
write_timeout.it_value.tv_usec = (PGSTAT_STAT_INTERVAL % 1000) * 1000;
last_statrequest = GetCurrentTimestamp();
last_statwrite = last_statrequest - 1;
/*
* Read in an existing statistics stats file or initialize the stats to
@ -2623,8 +2657,9 @@ PgstatCollectorMain(int argc, char *argv[])
* death of our parent postmaster.
*
* For performance reasons, we don't want to do a PostmasterIsAlive() test
* after every message; instead, do it at statwrite time and if
* select()/poll() is interrupted by timeout.
* after every message; instead, do it only when select()/poll() is
* interrupted by timeout. In essence, we'll stay alive as long as
* backends keep sending us stuff often, even if the postmaster is gone.
*/
for (;;)
{
@ -2638,32 +2673,19 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Reload configuration if we got SIGHUP from the postmaster.
* Also, signal a new write of the file, so we drop a new file as
* soon as possible of the directory for it changes.
*/
if (got_SIGHUP)
{
ProcessConfigFile(PGC_SIGHUP);
got_SIGHUP = false;
need_statwrite = true;
}
/*
* If time to write the stats file, do so. Note that the alarm
* interrupt isn't re-enabled immediately, but only after we next
* receive a stats message; so no cycles are wasted when there is
* nothing going on.
* Write the stats file if a new request has arrived that is not
* satisfied by existing file.
*/
if (need_statwrite)
{
/* Check for postmaster death; if so we'll write file below */
if (!PostmasterIsAlive(true))
break;
if (last_statwrite < last_statrequest)
pgstat_write_statsfile(false);
need_statwrite = false;
need_timer = true;
}
/*
* Wait for a message to arrive; but not for more than
@ -2756,6 +2778,10 @@ PgstatCollectorMain(int argc, char *argv[])
case PGSTAT_MTYPE_DUMMY:
break;
case PGSTAT_MTYPE_INQUIRY:
pgstat_recv_inquiry((PgStat_MsgInquiry *) &msg, len);
break;
case PGSTAT_MTYPE_TABSTAT:
pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, len);
break;
@ -2800,19 +2826,6 @@ PgstatCollectorMain(int argc, char *argv[])
default:
break;
}
/*
* If this is the first message after we wrote the stats file the
* last time, enable the alarm interrupt to make it be written
* again later.
*/
if (need_timer)
{
if (setitimer(ITIMER_REAL, &write_timeout, NULL))
ereport(ERROR,
(errmsg("could not set statistics collector timer: %m")));
need_timer = false;
}
}
else
{
@ -2841,13 +2854,6 @@ pgstat_exit(SIGNAL_ARGS)
need_exit = true;
}
/* SIGALRM signal handler for collector process */
static void
force_statwrite(SIGNAL_ARGS)
{
need_statwrite = true;
}
/* SIGHUP handler for collector process */
static void
pgstat_sighup_handler(SIGNAL_ARGS)
@ -2943,7 +2949,7 @@ pgstat_write_statsfile(bool permanent)
/*
* Open the statistics temp file to write out the current values.
*/
fpout = fopen(tmpfile, PG_BINARY_W);
fpout = AllocateFile(tmpfile, PG_BINARY_W);
if (fpout == NULL)
{
ereport(LOG,
@ -2953,6 +2959,11 @@ pgstat_write_statsfile(bool permanent)
return;
}
/*
* Set the timestamp of the stats file.
*/
globalStats.stats_timestamp = GetCurrentTimestamp();
/*
* Write the file header --- currently just a format ID.
*/
@ -3017,10 +3028,10 @@ pgstat_write_statsfile(bool permanent)
(errcode_for_file_access(),
errmsg("could not write temporary statistics file \"%s\": %m",
tmpfile)));
fclose(fpout);
FreeFile(fpout);
unlink(tmpfile);
}
else if (fclose(fpout) < 0)
else if (FreeFile(fpout) < 0)
{
ereport(LOG,
(errcode_for_file_access(),
@ -3036,6 +3047,22 @@ pgstat_write_statsfile(bool permanent)
tmpfile, statfile)));
unlink(tmpfile);
}
else
{
/*
* Successful write, so update last_statwrite.
*/
last_statwrite = globalStats.stats_timestamp;
/*
* It's not entirely clear whether there could be clock skew between
* backends and the collector; but just in case someone manages to
* send us a stats request time that's far in the future, reset it.
* This ensures that no inquiry message can cause more than one stats
* file write to occur.
*/
last_statrequest = last_statwrite;
}
if (permanent)
unlink(pgstat_stat_filename);
@ -3289,6 +3316,52 @@ done:
return dbhash;
}
/* ----------
* pgstat_read_statsfile_timestamp() -
*
* Attempt to fetch the timestamp of an existing stats file.
* Returns TRUE if successful (timestamp is stored at *ts).
* ----------
*/
static bool
pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
{
PgStat_GlobalStats myGlobalStats;
FILE *fpin;
int32 format_id;
const char *statfile = permanent?PGSTAT_STAT_PERMANENT_FILENAME:pgstat_stat_filename;
/*
* Try to open the status file.
*/
if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
return false;
/*
* Verify it's of the expected format.
*/
if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
|| format_id != PGSTAT_FILE_FORMAT_ID)
{
FreeFile(fpin);
return false;
}
/*
* Read global stats struct
*/
if (fread(&myGlobalStats, 1, sizeof(myGlobalStats), fpin) != sizeof(myGlobalStats))
{
FreeFile(fpin);
return false;
}
*ts = myGlobalStats.stats_timestamp;
FreeFile(fpin);
return true;
}
/*
* If not already done, read the statistics collector stats file into
* some hash tables. The results will be kept until pgstat_clear_snapshot()
@ -3297,11 +3370,50 @@ done:
static void
backend_read_statsfile(void)
{
TimestampTz min_ts;
int count;
/* already read it? */
if (pgStatDBHash)
return;
Assert(!pgStatRunningInCollector);
/*
* We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL msec
* before now. This indirectly ensures that the collector needn't write
* the file more often than PGSTAT_STAT_INTERVAL.
*
* Note that we don't recompute min_ts after sleeping; so we might end up
* accepting a file a bit older than PGSTAT_STAT_INTERVAL. In practice
* that shouldn't happen, though, as long as the sleep time is less than
* PGSTAT_STAT_INTERVAL; and we don't want to lie to the collector about
* what our cutoff time really is.
*/
min_ts = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
-PGSTAT_STAT_INTERVAL);
/*
* Loop until fresh enough stats file is available or we ran out of time.
* The stats inquiry message is sent repeatedly in case collector drops it.
*/
for (count = 0; count < PGSTAT_POLL_LOOP_COUNT; count++)
{
TimestampTz file_ts;
CHECK_FOR_INTERRUPTS();
if (pgstat_read_statsfile_timestamp(false, &file_ts) &&
file_ts >= min_ts)
break;
/* Not there or too old, so kick the collector and wait a bit */
pgstat_send_inquiry(min_ts);
pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
}
if (count >= PGSTAT_POLL_LOOP_COUNT)
elog(WARNING, "pgstat wait timeout");
/* Autovacuum launcher wants stats about all databases */
if (IsAutoVacuumLauncherProcess())
pgStatDBHash = pgstat_read_statsfile(InvalidOid, false);
@ -3353,6 +3465,20 @@ pgstat_clear_snapshot(void)
}
/* ----------
* pgstat_recv_inquiry() -
*
* Process stat inquiry requests.
* ----------
*/
static void
pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
{
if (msg->inquiry_time > last_statrequest)
last_statrequest = msg->inquiry_time;
}
/* ----------
* pgstat_recv_tabstat() -
*

View File

@ -5,7 +5,7 @@
*
* Copyright (c) 2001-2008, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/pgstat.h,v 1.78 2008/08/15 08:37:40 mha Exp $
* $PostgreSQL: pgsql/src/include/pgstat.h,v 1.79 2008/11/03 01:17:08 tgl Exp $
* ----------
*/
#ifndef PGSTAT_H
@ -33,6 +33,7 @@ typedef enum TrackFunctionsLevel
typedef enum StatMsgType
{
PGSTAT_MTYPE_DUMMY,
PGSTAT_MTYPE_INQUIRY,
PGSTAT_MTYPE_TABSTAT,
PGSTAT_MTYPE_TABPURGE,
PGSTAT_MTYPE_DROPDB,
@ -173,6 +174,19 @@ typedef struct PgStat_MsgDummy
} PgStat_MsgDummy;
/* ----------
* PgStat_MsgInquiry Sent by a backend to ask the collector
* to write the stats file.
* ----------
*/
typedef struct PgStat_MsgInquiry
{
PgStat_MsgHdr m_hdr;
TimestampTz inquiry_time; /* minimum acceptable file timestamp */
} PgStat_MsgInquiry;
/* ----------
* PgStat_TableEntry Per-table info in a MsgTabstat
* ----------
@ -392,6 +406,7 @@ typedef union PgStat_Msg
{
PgStat_MsgHdr msg_hdr;
PgStat_MsgDummy msg_dummy;
PgStat_MsgInquiry msg_inquiry;
PgStat_MsgTabstat msg_tabstat;
PgStat_MsgTabpurge msg_tabpurge;
PgStat_MsgDropdb msg_dropdb;
@ -413,7 +428,7 @@ typedef union PgStat_Msg
* ------------------------------------------------------------
*/
#define PGSTAT_FILE_FORMAT_ID 0x01A5BC97
#define PGSTAT_FILE_FORMAT_ID 0x01A5BC98
/* ----------
* PgStat_StatDBEntry The collector's data per database
@ -494,6 +509,7 @@ typedef struct PgStat_StatFuncEntry
*/
typedef struct PgStat_GlobalStats
{
TimestampTz stats_timestamp; /* time of stats file update */
PgStat_Counter timed_checkpoints;
PgStat_Counter requested_checkpoints;
PgStat_Counter buf_written_checkpoints;