From 960869da0803427d14335bba24393f414b476e2c Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Sun, 17 Jan 2021 13:34:09 +0100 Subject: [PATCH] Add pg_stat_database counters for sessions and session time This add counters for number of sessions, the different kind of session termination types, and timers for how much time is spent in active vs idle in a database to pg_stat_database. Internally this also renames the parameter "force" to disconnect. This was the only use-case for the parameter before, so repurposing it to this mroe narrow usecase makes things cleaner than inventing something new. Author: Laurenz Albe Reviewed-By: Magnus Hagander, Soumyadeep Chakraborty, Masahiro Ikeda Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at --- doc/src/sgml/monitoring.sgml | 77 +++++++++++++++ src/backend/catalog/system_views.sql | 7 ++ src/backend/postmaster/pgstat.c | 134 ++++++++++++++++++++++++++- src/backend/tcop/postgres.c | 11 ++- src/backend/utils/adt/pgstatfuncs.c | 94 +++++++++++++++++++ src/backend/utils/error/elog.c | 8 ++ src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 32 +++++++ src/include/pgstat.h | 39 ++++++++ src/test/regress/expected/rules.out | 7 ++ 10 files changed, 405 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 3cdb1aff3c..f05140dd42 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3737,6 +3737,83 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i + + + session_time double precision + + + Time spent by database sessions in this database, in milliseconds + (note that statistics are only updated when the state of a session + changes, so if sessions have been idle for a long time, this idle time + won't be included) + + + + + + active_time double precision + + + Time spent executing SQL statements in this database, in milliseconds + (this corresponds to the states active and + fastpath function call in + + pg_stat_activity) + + + + + + idle_in_transaction_time double precision + + + Time spent idling while in a transaction in this database, in milliseconds + (this corresponds to the states idle in transaction and + idle in transaction (aborted) in + + pg_stat_activity) + + + + + + sessions bigint + + + Total number of sessions established to this database + + + + + + sessions_abandoned bigint + + + Number of database sessions to this database that were terminated + because connection to the client was lost + + + + + + sessions_fatal bigint + + + Number of database sessions to this database that were terminated + by fatal errors + + + + + + sessions_killed bigint + + + Number of database sessions to this database that were terminated + by operator intervention + + + stats_reset timestamp with time zone diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 5d89e77dbe..fa58afd9d7 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -924,6 +924,13 @@ CREATE VIEW pg_stat_database AS pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure, pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time, pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time, + pg_stat_get_db_session_time(D.oid) AS session_time, + pg_stat_get_db_active_time(D.oid) AS active_time, + pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time, + pg_stat_get_db_sessions(D.oid) AS sessions, + pg_stat_get_db_sessions_abandoned(D.oid) AS sessions_abandoned, + pg_stat_get_db_sessions_fatal(D.oid) AS sessions_fatal, + pg_stat_get_db_sessions_killed(D.oid) AS sessions_killed, pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset FROM ( SELECT 0 AS oid, NULL::name AS datname diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 3f24a33ef1..f75b52719d 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -258,6 +258,9 @@ static int pgStatXactCommit = 0; static int pgStatXactRollback = 0; PgStat_Counter pgStatBlockReadTime = 0; PgStat_Counter pgStatBlockWriteTime = 0; +static PgStat_Counter pgStatActiveTime = 0; +static PgStat_Counter pgStatTransactionIdleTime = 0; +SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL; /* Record that's written to 2PC state file when pgstat state is persisted */ typedef struct TwoPhasePgStatRecord @@ -343,6 +346,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg); static void pgstat_send_funcstats(void); static void pgstat_send_slru(void); static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid); +static void pgstat_send_connstats(bool disconnect, TimestampTz last_report); static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared); @@ -378,6 +382,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len); static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len); static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len); static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len); +static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len); static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len); static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len); @@ -855,10 +860,14 @@ allow_immediate_pgstat_restart(void) * per-table and function usage statistics to the collector. Note that this * is called only when not within a transaction, so it is fair to use * transaction stop time as an approximation of current time. + * + * "disconnect" is "true" only for the last call before the backend + * exits. This makes sure that no data is lost and that interrupted + * sessions are reported correctly. * ---------- */ void -pgstat_report_stat(bool force) +pgstat_report_stat(bool disconnect) { /* we assume this inits to all zeroes: */ static const PgStat_TableCounts all_zeroes; @@ -873,17 +882,22 @@ pgstat_report_stat(bool force) /* Don't expend a clock check if nothing to do */ if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) && pgStatXactCommit == 0 && pgStatXactRollback == 0 && - !have_function_stats) + !have_function_stats && !disconnect) return; /* * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL - * msec since we last sent one, or the caller wants to force stats out. + * msec since we last sent one, or the backend is about to exit. */ now = GetCurrentTransactionStopTimestamp(); - if (!force && + if (!disconnect && !TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL)) return; + + /* for backends, send connection statistics */ + if (MyBackendType == B_BACKEND) + pgstat_send_connstats(disconnect, last_report); + last_report = now; /* @@ -1351,6 +1365,48 @@ pgstat_drop_relation(Oid relid) #endif /* NOT_USED */ +/* ---------- + * pgstat_send_connstats() - + * + * Tell the collector about session statistics. + * The parameter "disconnect" will be true when the backend exits. + * "last_report" is the last time we were called (0 if never). + * ---------- + */ +static void +pgstat_send_connstats(bool disconnect, TimestampTz last_report) +{ + PgStat_MsgConn msg; + long secs; + int usecs; + + if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION); + msg.m_databaseid = MyDatabaseId; + + /* session time since the last report */ + TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report), + GetCurrentTimestamp(), + &secs, &usecs); + msg.m_session_time = secs * 1000000 + usecs; + + msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET; + + msg.m_active_time = pgStatActiveTime; + pgStatActiveTime = 0; + + msg.m_idle_in_xact_time = pgStatTransactionIdleTime; + pgStatTransactionIdleTime = 0; + + /* report a new session only the first time */ + msg.m_count = (last_report == 0) ? 1 : 0; + + pgstat_send(&msg, sizeof(PgStat_MsgConn)); +} + + /* ---------- * pgstat_reset_counters() - * @@ -3348,6 +3404,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str) } current_timestamp = GetCurrentTimestamp(); + /* + * If the state has changed from "active" or "idle in transaction", + * calculate the duration. + */ + if ((beentry->st_state == STATE_RUNNING || + beentry->st_state == STATE_FASTPATH || + beentry->st_state == STATE_IDLEINTRANSACTION || + beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) && + state != beentry->st_state) + { + long secs; + int usecs; + + TimestampDifference(beentry->st_state_start_timestamp, + current_timestamp, + &secs, &usecs); + + if (beentry->st_state == STATE_RUNNING || + beentry->st_state == STATE_FASTPATH) + pgStatActiveTime += secs * 1000000 + usecs; + else + pgStatTransactionIdleTime += secs * 1000000 + usecs; + } + /* * Now update the status entry */ @@ -4919,6 +4999,10 @@ PgstatCollectorMain(int argc, char *argv[]) pgstat_recv_replslot(&msg.msg_replslot, len); break; + case PGSTAT_MTYPE_CONNECTION: + pgstat_recv_connstat(&msg.msg_conn, len); + break; + default: break; } @@ -4993,6 +5077,13 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry) dbentry->last_checksum_failure = 0; dbentry->n_block_read_time = 0; dbentry->n_block_write_time = 0; + dbentry->n_sessions = 0; + dbentry->total_session_time = 0; + dbentry->total_active_time = 0; + dbentry->total_idle_in_xact_time = 0; + dbentry->n_sessions_abandoned = 0; + dbentry->n_sessions_fatal = 0; + dbentry->n_sessions_killed = 0; dbentry->stat_reset_timestamp = GetCurrentTimestamp(); dbentry->stats_timestamp = 0; @@ -6944,6 +7035,41 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len) } } +/* ---------- + * pgstat_recv_connstat() - + * + * Process connection information. + * ---------- + */ +static void +pgstat_recv_connstat(PgStat_MsgConn *msg, int len) +{ + PgStat_StatDBEntry *dbentry; + + dbentry = pgstat_get_db_entry(msg->m_databaseid, true); + + dbentry->n_sessions += msg->m_count; + dbentry->total_session_time += msg->m_session_time; + dbentry->total_active_time += msg->m_active_time; + dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time; + switch (msg->m_disconnect) + { + case DISCONNECT_NOT_YET: + case DISCONNECT_NORMAL: + /* we don't collect these */ + break; + case DISCONNECT_CLIENT_EOF: + dbentry->n_sessions_abandoned++; + break; + case DISCONNECT_FATAL: + dbentry->n_sessions_fatal++; + break; + case DISCONNECT_KILLED: + dbentry->n_sessions_killed++; + break; + } +} + /* ---------- * pgstat_recv_tempfile() - * diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 28055680aa..8dab9fd578 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -2865,6 +2865,9 @@ die(SIGNAL_ARGS) ProcDiePending = true; } + /* for the statistics collector */ + pgStatSessionEndCause = DISCONNECT_KILLED; + /* If we're still here, waken anything waiting on the process latch */ SetLatch(MyLatch); @@ -4579,9 +4582,15 @@ PostgresMain(int argc, char *argv[], * means unexpected loss of frontend connection. Either way, * perform normal shutdown. */ - case 'X': case EOF: + /* for the statistics collector */ + pgStatSessionEndCause = DISCONNECT_CLIENT_EOF; + + /* FALLTHROUGH */ + + case 'X': + /* * Reset whereToSendOutput to prevent ereport from attempting * to send any more messages to client. diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 5c12a165a1..62bff52638 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1631,6 +1631,100 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(result); } +Datum +pg_stat_get_db_session_time(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + double result = 0.0; + PgStat_StatDBEntry *dbentry; + + /* convert counter from microsec to millisec for display */ + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = ((double) dbentry->total_session_time) / 1000.0; + + PG_RETURN_FLOAT8(result); +} + +Datum +pg_stat_get_db_active_time(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + double result = 0.0; + PgStat_StatDBEntry *dbentry; + + /* convert counter from microsec to millisec for display */ + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = ((double) dbentry->total_active_time) / 1000.0; + + PG_RETURN_FLOAT8(result); +} + +Datum +pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + double result = 0.0; + PgStat_StatDBEntry *dbentry; + + /* convert counter from microsec to millisec for display */ + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = ((double) dbentry->total_idle_in_xact_time) / 1000.0; + + PG_RETURN_FLOAT8(result); +} + +Datum +pg_stat_get_db_sessions(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result = 0; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = (int64) (dbentry->n_sessions); + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_db_sessions_abandoned(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result = 0; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = (int64) (dbentry->n_sessions_abandoned); + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_db_sessions_fatal(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result = 0; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = (int64) (dbentry->n_sessions_fatal); + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_db_sessions_killed(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result = 0; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = (int64) (dbentry->n_sessions_killed); + + PG_RETURN_INT64(result); +} + Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS) { diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 7790f6ab25..80c2672461 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -72,6 +72,7 @@ #include "libpq/pqformat.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "pgstat.h" #include "postmaster/bgworker.h" #include "postmaster/postmaster.h" #include "postmaster/syslogger.h" @@ -656,6 +657,13 @@ errfinish(const char *filename, int lineno, const char *funcname) fflush(stdout); fflush(stderr); + /* + * Let the statistics collector know. Only mark the session as + * terminated by fatal error if there is no other known cause. + */ + if (pgStatSessionEndCause == DISCONNECT_NORMAL) + pgStatSessionEndCause = DISCONNECT_FATAL; + /* * Do normal process-exit cleanup, then return exit code 1 to indicate * FATAL termination. The postmaster may or may not consider this diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 747135dab4..385d108c29 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202101131 +#define CATALOG_VERSION_NO 202101171 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d27336adcd..dd64c3bd60 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5440,6 +5440,38 @@ proname => 'pg_stat_get_db_blk_write_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_db_blk_write_time' }, +{ oid => '9575', descr => 'statistics: session time, in milliseconds', + proname => 'pg_stat_get_db_session_time', provolatile => 's', + proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_session_time' }, +{ oid => '9576', descr => 'statistics: session active time, in milliseconds', + proname => 'pg_stat_get_db_active_time', provolatile => 's', + proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_active_time' }, +{ oid => '9577', + descr => 'statistics: session idle in transaction time, in milliseconds', + proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's', + proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_idle_in_transaction_time' }, +{ oid => '9578', descr => 'statistics: total number of sessions', + proname => 'pg_stat_get_db_sessions', provolatile => 's', proparallel => 'r', + prorettype => 'int8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_sessions' }, +{ oid => '9579', + descr => 'statistics: number of sessions disconnected by the client closing the network connection', + proname => 'pg_stat_get_db_sessions_abandoned', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_sessions_abandoned' }, +{ oid => '9580', + descr => 'statistics: number of sessions disconnected by fatal errors', + proname => 'pg_stat_get_db_sessions_fatal', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_sessions_fatal' }, +{ oid => '9581', + descr => 'statistics: number of sessions killed by administrative action', + proname => 'pg_stat_get_db_sessions_killed', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_sessions_killed' }, { oid => '3195', descr => 'statistics: information about WAL archiver', proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's', proparallel => 'r', prorettype => 'record', proargtypes => '', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index c38b689710..a384f6eb56 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -41,6 +41,16 @@ typedef enum TrackFunctionsLevel TRACK_FUNC_ALL } TrackFunctionsLevel; +/* Values to track the cause of session termination */ +typedef enum SessionEndType +{ + DISCONNECT_NOT_YET, /* still active */ + DISCONNECT_NORMAL, + DISCONNECT_CLIENT_EOF, + DISCONNECT_FATAL, + DISCONNECT_KILLED +} SessionEndType; + /* ---------- * The types of backend -> collector messages * ---------- @@ -71,6 +81,7 @@ typedef enum StatMsgType PGSTAT_MTYPE_DEADLOCK, PGSTAT_MTYPE_CHECKSUMFAILURE, PGSTAT_MTYPE_REPLSLOT, + PGSTAT_MTYPE_CONNECTION, } StatMsgType; /* ---------- @@ -622,6 +633,21 @@ typedef struct PgStat_MsgChecksumFailure TimestampTz m_failure_time; } PgStat_MsgChecksumFailure; +/* ---------- + * PgStat_MsgConn Sent by the backend to update connection statistics. + * ---------- + */ +typedef struct PgStat_MsgConn +{ + PgStat_MsgHdr m_hdr; + Oid m_databaseid; + PgStat_Counter m_count; + PgStat_Counter m_session_time; + PgStat_Counter m_active_time; + PgStat_Counter m_idle_in_xact_time; + SessionEndType m_disconnect; +} PgStat_MsgConn; + /* ---------- * PgStat_Msg Union over all possible messages. @@ -654,6 +680,7 @@ typedef union PgStat_Msg PgStat_MsgTempFile msg_tempfile; PgStat_MsgChecksumFailure msg_checksumfailure; PgStat_MsgReplSlot msg_replslot; + PgStat_MsgConn msg_conn; } PgStat_Msg; @@ -696,6 +723,13 @@ typedef struct PgStat_StatDBEntry TimestampTz last_checksum_failure; PgStat_Counter n_block_read_time; /* times in microseconds */ PgStat_Counter n_block_write_time; + PgStat_Counter n_sessions; + PgStat_Counter total_session_time; + PgStat_Counter total_active_time; + PgStat_Counter total_idle_in_xact_time; + PgStat_Counter n_sessions_abandoned; + PgStat_Counter n_sessions_fatal; + PgStat_Counter n_sessions_killed; TimestampTz stat_reset_timestamp; TimestampTz stats_timestamp; /* time of db stats file update */ @@ -1354,6 +1388,11 @@ extern PgStat_MsgWal WalStats; extern PgStat_Counter pgStatBlockReadTime; extern PgStat_Counter pgStatBlockWriteTime; +/* + * Updated by the traffic cop and in errfinish() + */ +extern SessionEndType pgStatSessionEndCause; + /* ---------- * Functions called from postmaster * ---------- diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index a687e99d1e..6173473de9 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1848,6 +1848,13 @@ pg_stat_database| SELECT d.oid AS datid, pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure, pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time, pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time, + pg_stat_get_db_session_time(d.oid) AS session_time, + pg_stat_get_db_active_time(d.oid) AS active_time, + pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time, + pg_stat_get_db_sessions(d.oid) AS sessions, + pg_stat_get_db_sessions_abandoned(d.oid) AS sessions_abandoned, + pg_stat_get_db_sessions_fatal(d.oid) AS sessions_fatal, + pg_stat_get_db_sessions_killed(d.oid) AS sessions_killed, pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset FROM ( SELECT 0 AS oid, NULL::name AS datname