Expose track_iotiming data via the statistics collector.

Ants Aasma's original patch to add timing information for buffer I/O
requests exposed this data at the relation level, which was judged too
costly.  I've here exposed it at the database level instead.
This commit is contained in:
Robert Haas 2012-04-05 11:37:31 -04:00
parent 05dbd4a773
commit 644828908f
10 changed files with 99 additions and 9 deletions

View File

@ -4295,8 +4295,11 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
default, because it will repeatedly query the operating system for
the current time, which may cause significant overhead on some
platforms. You can use the <xref linkend="pgtesttiming"> tool to
measure the overhead of timing on your system. Only superusers can
change this setting.
measure the overhead of timing on your system. Timing information is
displayed in <xref linkend="pg-stat-database-view">, in the output of
<xref linkend="sql-explain"> when the <literal>BUFFERS</> option is
used, and by <xref linkend="pgstatstatements">. Only superusers can
change this setting.
</para>
</listitem>
</varlistentry>

View File

@ -144,6 +144,11 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
statistics are collected about table and index accesses.
</para>
<para>
The parameter <xref linkend="guc-track-iotiming"> enables monitoring
of block read and write times.
</para>
<para>
The parameter <xref linkend="guc-track-functions"> enables tracking of
usage of user-defined functions.
@ -961,6 +966,20 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
This value can also be returned by directly calling
the <function>pg_stat_get_db_deadlocks</function> function.</entry>
</row>
<row>
<entry>block_read_time</entry>
<entry><type>bigint</></entry>
<entry>Time spent by backends reading data file blocks, in milliseconds.
The same value can be returned in microseconds by directly calling
the <function>pg_stat_get_db_block_time_read</function> function.</entry>
</row>
<row>
<entry>block_write_time</entry>
<entry><type>bigint</></entry>
<entry>Time spent by backends writing data file blocks, in milliseconds.
The same value can be returned in microseconds by directly calling
the <function>pg_stat_get_db_block_time_write</function> function.</entry>
</row>
<row>
<entry>stats_reset</entry>
<entry><type>timestamptz</></entry>

View File

@ -604,6 +604,8 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_temp_files(D.oid) AS temp_files,
pg_stat_get_db_temp_bytes(D.oid) AS temp_bytes,
pg_stat_get_db_deadlocks(D.oid) AS deadlocks,
pg_stat_get_db_block_time_read(D.oid) / 1000 AS block_read_time,
pg_stat_get_db_block_time_write(D.oid) / 1000 AS block_write_time,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM pg_database D;

View File

@ -197,6 +197,8 @@ static PgStat_SubXactStatus *pgStatXactStack = NULL;
static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockTimeRead = 0;
PgStat_Counter pgStatBlockTimeWrite = 0;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@ -782,13 +784,15 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
return;
/*
* Report accumulated xact commit/rollback whenever we send a normal
* tabstat message
* Report accumulated xact commit/rollback and I/O timings whenever we send
* a normal tabstat message
*/
if (OidIsValid(tsmsg->m_databaseid))
{
tsmsg->m_xact_commit = pgStatXactCommit;
tsmsg->m_xact_rollback = pgStatXactRollback;
tsmsg->m_block_time_read = pgStatBlockTimeRead;
tsmsg->m_block_time_write = pgStatBlockTimeWrite;
pgStatXactCommit = 0;
pgStatXactRollback = 0;
}
@ -796,6 +800,8 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
{
tsmsg->m_xact_commit = 0;
tsmsg->m_xact_rollback = 0;
tsmsg->m_block_time_read = 0;
tsmsg->m_block_time_write = 0;
}
n = tsmsg->m_nentries;
@ -3352,6 +3358,8 @@ pgstat_get_db_entry(Oid databaseid, bool create)
result->n_temp_files = 0;
result->n_temp_bytes = 0;
result->n_deadlocks = 0;
result->n_block_time_read = 0;
result->n_block_time_write = 0;
result->stat_reset_timestamp = GetCurrentTimestamp();
@ -4070,6 +4078,8 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
*/
dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
dbentry->n_block_time_read += msg->m_block_time_read;
dbentry->n_block_time_write += msg->m_block_time_write;
/*
* Process all table entries in the message.
@ -4266,6 +4276,8 @@ pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
dbentry->n_temp_bytes = 0;
dbentry->n_temp_files = 0;
dbentry->n_deadlocks = 0;
dbentry->n_block_time_read = 0;
dbentry->n_block_time_write = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();

View File

@ -450,6 +450,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
INSTR_TIME_ADD(pgBufferUsage.time_read, io_time);
}
@ -1888,7 +1889,8 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
{
XLogRecPtr recptr;
ErrorContextCallback errcontext;
instr_time io_start, io_end;
instr_time io_start,
io_time;
/*
* Acquire the buffer's io_in_progress lock. If StartBufferIO returns
@ -1947,8 +1949,10 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
if (track_iotiming)
{
INSTR_TIME_SET_CURRENT(io_end);
INSTR_TIME_ACCUM_DIFF(pgBufferUsage.time_write, io_end, io_start);
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
INSTR_TIME_ADD(pgBufferUsage.time_write, io_time);
}
pgBufferUsage.shared_blks_written++;

View File

@ -82,6 +82,8 @@ extern Datum pg_stat_get_db_deadlocks(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_db_stat_reset_time(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_db_temp_files(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_db_temp_bytes(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_db_block_time_read(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_db_block_time_write(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS);
@ -1357,6 +1359,36 @@ pg_stat_get_db_deadlocks(PG_FUNCTION_ARGS)
PG_RETURN_INT64(result);
}
Datum
pg_stat_get_db_block_time_read(PG_FUNCTION_ARGS)
{
Oid dbid = PG_GETARG_OID(0);
int64 result;
PgStat_StatDBEntry *dbentry;
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
result = 0;
else
result = (int64) (dbentry->n_block_time_read);
PG_RETURN_INT64(result);
}
Datum
pg_stat_get_db_block_time_write(PG_FUNCTION_ARGS)
{
Oid dbid = PG_GETARG_OID(0);
int64 result;
PgStat_StatDBEntry *dbentry;
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
result = 0;
else
result = (int64) (dbentry->n_block_time_write);
PG_RETURN_INT64(result);
}
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201203191
#define CATALOG_VERSION_NO 201204051
#endif

View File

@ -2662,6 +2662,10 @@ DATA(insert OID = 3150 ( pg_stat_get_db_temp_files PGNSP PGUID 12 1 0 0 0 f f f
DESCR("statistics: number of temporary files written");
DATA(insert OID = 3151 ( pg_stat_get_db_temp_bytes PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_temp_bytes _null_ _null_ _null_ ));
DESCR("statistics: number of bytes in temporary files written");
DATA(insert OID = 2844 ( pg_stat_get_db_block_time_read PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_block_time_read _null_ _null_ _null_ ));
DESCR("statistics: block read time in microseconds");
DATA(insert OID = 2845 ( pg_stat_get_db_block_time_write PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_block_time_write _null_ _null_ _null_ ));
DESCR("statistics: block write time in microseconds");
DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ ));
DESCR("statistics: number of timed checkpoints started by the bgwriter");
DATA(insert OID = 2770 ( pg_stat_get_bgwriter_requested_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_requested_checkpoints _null_ _null_ _null_ ));

View File

@ -233,6 +233,8 @@ typedef struct PgStat_MsgTabstat
int m_nentries;
int m_xact_commit;
int m_xact_rollback;
PgStat_Counter m_block_time_read;
PgStat_Counter m_block_time_write;
PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
} PgStat_MsgTabstat;
@ -536,6 +538,8 @@ typedef struct PgStat_StatDBEntry
PgStat_Counter n_temp_files;
PgStat_Counter n_temp_bytes;
PgStat_Counter n_deadlocks;
PgStat_Counter n_block_time_read; /* times in microseconds */
PgStat_Counter n_block_time_write;
TimestampTz stat_reset_timestamp;
@ -721,6 +725,12 @@ extern char *pgstat_stat_filename;
*/
extern PgStat_MsgBgWriter BgWriterStats;
/*
* Updated by pgstat_count_time_* macros.
*/
extern PgStat_Counter pgStatBlockTimeRead;
extern PgStat_Counter pgStatBlockTimeWrite;
/* ----------
* Functions called from postmaster
* ----------
@ -816,6 +826,10 @@ extern void pgstat_initstats(Relation rel);
if ((rel)->pgstat_info != NULL) \
(rel)->pgstat_info->t_counts.t_blocks_hit++; \
} while (0)
#define pgstat_count_buffer_read_time(n) \
pgStatBlockTimeRead += (n);
#define pgstat_count_buffer_write_time(n) \
pgStatBlockTimeWrite += (n);
extern void pgstat_count_heap_insert(Relation rel, int n);
extern void pgstat_count_heap_update(Relation rel, bool hot);

View File

@ -1296,7 +1296,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
pg_stat_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
pg_stat_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze, pg_stat_get_vacuum_count(c.oid) AS vacuum_count, pg_stat_get_autovacuum_count(c.oid) AS autovacuum_count, pg_stat_get_analyze_count(c.oid) AS analyze_count, pg_stat_get_autoanalyze_count(c.oid) AS autoanalyze_count FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname;
pg_stat_bgwriter | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_fsync_backend() AS buffers_backend_fsync, pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
pg_stat_database | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted, pg_stat_get_db_conflict_all(d.oid) AS conflicts, pg_stat_get_db_temp_files(d.oid) AS temp_files, pg_stat_get_db_temp_bytes(d.oid) AS temp_bytes, pg_stat_get_db_deadlocks(d.oid) AS deadlocks, pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset FROM pg_database d;
pg_stat_database | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted, pg_stat_get_db_conflict_all(d.oid) AS conflicts, pg_stat_get_db_temp_files(d.oid) AS temp_files, pg_stat_get_db_temp_bytes(d.oid) AS temp_bytes, pg_stat_get_db_deadlocks(d.oid) AS deadlocks, (pg_stat_get_db_block_time_read(d.oid) / 1000) AS block_read_time, (pg_stat_get_db_block_time_write(d.oid) / 1000) AS block_write_time, pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset FROM pg_database d;
pg_stat_database_conflicts | SELECT d.oid AS datid, d.datname, pg_stat_get_db_conflict_tablespace(d.oid) AS confl_tablespace, pg_stat_get_db_conflict_lock(d.oid) AS confl_lock, pg_stat_get_db_conflict_snapshot(d.oid) AS confl_snapshot, pg_stat_get_db_conflict_bufferpin(d.oid) AS confl_bufferpin, pg_stat_get_db_conflict_startup_deadlock(d.oid) AS confl_deadlock FROM pg_database d;
pg_stat_replication | SELECT s.pid, s.usesysid, u.rolname AS usename, s.application_name, s.client_addr, s.client_hostname, s.client_port, s.backend_start, w.state, w.sent_location, w.write_location, w.flush_location, w.replay_location, w.sync_priority, w.sync_state FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, waiting, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port), pg_authid u, pg_stat_get_wal_senders() w(pid, state, sent_location, write_location, flush_location, replay_location, sync_priority, sync_state) WHERE ((s.usesysid = u.oid) AND (s.pid = w.pid));
pg_stat_sys_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_indexes.schemaname ~ '^pg_toast'::text));