Add pg_stat_database counters for sessions and session time

This add counters for number of sessions, the different kind of session
termination types, and timers for how much time is spent in active vs
idle in a database to pg_stat_database.

Internally this also renames the parameter "force" to disconnect. This
was the only use-case for the parameter before, so repurposing it to
this mroe narrow usecase makes things cleaner than inventing something
new.

Author: Laurenz Albe
Reviewed-By: Magnus Hagander, Soumyadeep Chakraborty, Masahiro Ikeda
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
This commit is contained in:
Magnus Hagander 2021-01-17 13:34:09 +01:00
parent 891a1d0bca
commit 960869da08
10 changed files with 405 additions and 6 deletions

View File

@ -3737,6 +3737,83 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>session_time</structfield> <type>double precision</type>
</para>
<para>
Time spent by database sessions in this database, in milliseconds
(note that statistics are only updated when the state of a session
changes, so if sessions have been idle for a long time, this idle time
won't be included)
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>active_time</structfield> <type>double precision</type>
</para>
<para>
Time spent executing SQL statements in this database, in milliseconds
(this corresponds to the states <literal>active</literal> and
<literal>fastpath function call</literal> in
<link linkend="monitoring-pg-stat-activity-view">
<structname>pg_stat_activity</structname></link>)
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>idle_in_transaction_time</structfield> <type>double precision</type>
</para>
<para>
Time spent idling while in a transaction in this database, in milliseconds
(this corresponds to the states <literal>idle in transaction</literal> and
<literal>idle in transaction (aborted)</literal> in
<link linkend="monitoring-pg-stat-activity-view">
<structname>pg_stat_activity</structname></link>)
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>sessions</structfield> <type>bigint</type>
</para>
<para>
Total number of sessions established to this database
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>sessions_abandoned</structfield> <type>bigint</type>
</para>
<para>
Number of database sessions to this database that were terminated
because connection to the client was lost
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>sessions_fatal</structfield> <type>bigint</type>
</para>
<para>
Number of database sessions to this database that were terminated
by fatal errors
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>sessions_killed</structfield> <type>bigint</type>
</para>
<para>
Number of database sessions to this database that were terminated
by operator intervention
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>

View File

@ -924,6 +924,13 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
pg_stat_get_db_session_time(D.oid) AS session_time,
pg_stat_get_db_active_time(D.oid) AS active_time,
pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
pg_stat_get_db_sessions(D.oid) AS sessions,
pg_stat_get_db_sessions_abandoned(D.oid) AS sessions_abandoned,
pg_stat_get_db_sessions_fatal(D.oid) AS sessions_fatal,
pg_stat_get_db_sessions_killed(D.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname

View File

@ -258,6 +258,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
static PgStat_Counter pgStatActiveTime = 0;
static PgStat_Counter pgStatTransactionIdleTime = 0;
SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@ -343,6 +346,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@ -378,6 +382,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@ -855,10 +860,14 @@ allow_immediate_pgstat_restart(void)
* per-table and function usage statistics to the collector. Note that this
* is called only when not within a transaction, so it is fair to use
* transaction stop time as an approximation of current time.
*
* "disconnect" is "true" only for the last call before the backend
* exits. This makes sure that no data is lost and that interrupted
* sessions are reported correctly.
* ----------
*/
void
pgstat_report_stat(bool force)
pgstat_report_stat(bool disconnect)
{
/* we assume this inits to all zeroes: */
static const PgStat_TableCounts all_zeroes;
@ -873,17 +882,22 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
!have_function_stats)
!have_function_stats && !disconnect)
return;
/*
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
* msec since we last sent one, or the caller wants to force stats out.
* msec since we last sent one, or the backend is about to exit.
*/
now = GetCurrentTransactionStopTimestamp();
if (!force &&
if (!disconnect &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
/* for backends, send connection statistics */
if (MyBackendType == B_BACKEND)
pgstat_send_connstats(disconnect, last_report);
last_report = now;
/*
@ -1351,6 +1365,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
/* ----------
* pgstat_send_connstats() -
*
* Tell the collector about session statistics.
* The parameter "disconnect" will be true when the backend exits.
* "last_report" is the last time we were called (0 if never).
* ----------
*/
static void
pgstat_send_connstats(bool disconnect, TimestampTz last_report)
{
PgStat_MsgConn msg;
long secs;
int usecs;
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
return;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
msg.m_databaseid = MyDatabaseId;
/* session time since the last report */
TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
GetCurrentTimestamp(),
&secs, &usecs);
msg.m_session_time = secs * 1000000 + usecs;
msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET;
msg.m_active_time = pgStatActiveTime;
pgStatActiveTime = 0;
msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
pgStatTransactionIdleTime = 0;
/* report a new session only the first time */
msg.m_count = (last_report == 0) ? 1 : 0;
pgstat_send(&msg, sizeof(PgStat_MsgConn));
}
/* ----------
* pgstat_reset_counters() -
*
@ -3348,6 +3404,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
/*
* If the state has changed from "active" or "idle in transaction",
* calculate the duration.
*/
if ((beentry->st_state == STATE_RUNNING ||
beentry->st_state == STATE_FASTPATH ||
beentry->st_state == STATE_IDLEINTRANSACTION ||
beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
state != beentry->st_state)
{
long secs;
int usecs;
TimestampDifference(beentry->st_state_start_timestamp,
current_timestamp,
&secs, &usecs);
if (beentry->st_state == STATE_RUNNING ||
beentry->st_state == STATE_FASTPATH)
pgStatActiveTime += secs * 1000000 + usecs;
else
pgStatTransactionIdleTime += secs * 1000000 + usecs;
}
/*
* Now update the status entry
*/
@ -4919,6 +4999,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
case PGSTAT_MTYPE_CONNECTION:
pgstat_recv_connstat(&msg.msg_conn, len);
break;
default:
break;
}
@ -4993,6 +5077,13 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
dbentry->n_sessions = 0;
dbentry->total_session_time = 0;
dbentry->total_active_time = 0;
dbentry->total_idle_in_xact_time = 0;
dbentry->n_sessions_abandoned = 0;
dbentry->n_sessions_fatal = 0;
dbentry->n_sessions_killed = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@ -6944,6 +7035,41 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
/* ----------
* pgstat_recv_connstat() -
*
* Process connection information.
* ----------
*/
static void
pgstat_recv_connstat(PgStat_MsgConn *msg, int len)
{
PgStat_StatDBEntry *dbentry;
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
dbentry->n_sessions += msg->m_count;
dbentry->total_session_time += msg->m_session_time;
dbentry->total_active_time += msg->m_active_time;
dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time;
switch (msg->m_disconnect)
{
case DISCONNECT_NOT_YET:
case DISCONNECT_NORMAL:
/* we don't collect these */
break;
case DISCONNECT_CLIENT_EOF:
dbentry->n_sessions_abandoned++;
break;
case DISCONNECT_FATAL:
dbentry->n_sessions_fatal++;
break;
case DISCONNECT_KILLED:
dbentry->n_sessions_killed++;
break;
}
}
/* ----------
* pgstat_recv_tempfile() -
*

View File

@ -2865,6 +2865,9 @@ die(SIGNAL_ARGS)
ProcDiePending = true;
}
/* for the statistics collector */
pgStatSessionEndCause = DISCONNECT_KILLED;
/* If we're still here, waken anything waiting on the process latch */
SetLatch(MyLatch);
@ -4579,9 +4582,15 @@ PostgresMain(int argc, char *argv[],
* means unexpected loss of frontend connection. Either way,
* perform normal shutdown.
*/
case 'X':
case EOF:
/* for the statistics collector */
pgStatSessionEndCause = DISCONNECT_CLIENT_EOF;
/* FALLTHROUGH */
case 'X':
/*
* Reset whereToSendOutput to prevent ereport from attempting
* to send any more messages to client.

View File

@ -1631,6 +1631,100 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
Datum
pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
{
Oid dbid = PG_GETARG_OID(0);
double result = 0.0;
PgStat_StatDBEntry *dbentry;
/* convert counter from microsec to millisec for display */
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
result = ((double) dbentry->total_session_time) / 1000.0;
PG_RETURN_FLOAT8(result);
}
Datum
pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
{
Oid dbid = PG_GETARG_OID(0);
double result = 0.0;
PgStat_StatDBEntry *dbentry;
/* convert counter from microsec to millisec for display */
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
result = ((double) dbentry->total_active_time) / 1000.0;
PG_RETURN_FLOAT8(result);
}
Datum
pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
{
Oid dbid = PG_GETARG_OID(0);
double result = 0.0;
PgStat_StatDBEntry *dbentry;
/* convert counter from microsec to millisec for display */
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
result = ((double) dbentry->total_idle_in_xact_time) / 1000.0;
PG_RETURN_FLOAT8(result);
}
Datum
pg_stat_get_db_sessions(PG_FUNCTION_ARGS)
{
Oid dbid = PG_GETARG_OID(0);
int64 result = 0;
PgStat_StatDBEntry *dbentry;
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
result = (int64) (dbentry->n_sessions);
PG_RETURN_INT64(result);
}
Datum
pg_stat_get_db_sessions_abandoned(PG_FUNCTION_ARGS)
{
Oid dbid = PG_GETARG_OID(0);
int64 result = 0;
PgStat_StatDBEntry *dbentry;
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
result = (int64) (dbentry->n_sessions_abandoned);
PG_RETURN_INT64(result);
}
Datum
pg_stat_get_db_sessions_fatal(PG_FUNCTION_ARGS)
{
Oid dbid = PG_GETARG_OID(0);
int64 result = 0;
PgStat_StatDBEntry *dbentry;
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
result = (int64) (dbentry->n_sessions_fatal);
PG_RETURN_INT64(result);
}
Datum
pg_stat_get_db_sessions_killed(PG_FUNCTION_ARGS)
{
Oid dbid = PG_GETARG_OID(0);
int64 result = 0;
PgStat_StatDBEntry *dbentry;
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
result = (int64) (dbentry->n_sessions_killed);
PG_RETURN_INT64(result);
}
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{

View File

@ -72,6 +72,7 @@
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/bgworker.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
@ -656,6 +657,13 @@ errfinish(const char *filename, int lineno, const char *funcname)
fflush(stdout);
fflush(stderr);
/*
* Let the statistics collector know. Only mark the session as
* terminated by fatal error if there is no other known cause.
*/
if (pgStatSessionEndCause == DISCONNECT_NORMAL)
pgStatSessionEndCause = DISCONNECT_FATAL;
/*
* Do normal process-exit cleanup, then return exit code 1 to indicate
* FATAL termination. The postmaster may or may not consider this

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202101131
#define CATALOG_VERSION_NO 202101171
#endif

View File

@ -5440,6 +5440,38 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
{ oid => '9575', descr => 'statistics: session time, in milliseconds',
proname => 'pg_stat_get_db_session_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_session_time' },
{ oid => '9576', descr => 'statistics: session active time, in milliseconds',
proname => 'pg_stat_get_db_active_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_active_time' },
{ oid => '9577',
descr => 'statistics: session idle in transaction time, in milliseconds',
proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
{ oid => '9578', descr => 'statistics: total number of sessions',
proname => 'pg_stat_get_db_sessions', provolatile => 's', proparallel => 'r',
prorettype => 'int8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_sessions' },
{ oid => '9579',
descr => 'statistics: number of sessions disconnected by the client closing the network connection',
proname => 'pg_stat_get_db_sessions_abandoned', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_sessions_abandoned' },
{ oid => '9580',
descr => 'statistics: number of sessions disconnected by fatal errors',
proname => 'pg_stat_get_db_sessions_fatal', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_sessions_fatal' },
{ oid => '9581',
descr => 'statistics: number of sessions killed by administrative action',
proname => 'pg_stat_get_db_sessions_killed', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_sessions_killed' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',

View File

@ -41,6 +41,16 @@ typedef enum TrackFunctionsLevel
TRACK_FUNC_ALL
} TrackFunctionsLevel;
/* Values to track the cause of session termination */
typedef enum SessionEndType
{
DISCONNECT_NOT_YET, /* still active */
DISCONNECT_NORMAL,
DISCONNECT_CLIENT_EOF,
DISCONNECT_FATAL,
DISCONNECT_KILLED
} SessionEndType;
/* ----------
* The types of backend -> collector messages
* ----------
@ -71,6 +81,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@ -622,6 +633,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
/* ----------
* PgStat_MsgConn Sent by the backend to update connection statistics.
* ----------
*/
typedef struct PgStat_MsgConn
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
PgStat_Counter m_count;
PgStat_Counter m_session_time;
PgStat_Counter m_active_time;
PgStat_Counter m_idle_in_xact_time;
SessionEndType m_disconnect;
} PgStat_MsgConn;
/* ----------
* PgStat_Msg Union over all possible messages.
@ -654,6 +680,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
PgStat_MsgConn msg_conn;
} PgStat_Msg;
@ -696,6 +723,13 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
PgStat_Counter n_sessions;
PgStat_Counter total_session_time;
PgStat_Counter total_active_time;
PgStat_Counter total_idle_in_xact_time;
PgStat_Counter n_sessions_abandoned;
PgStat_Counter n_sessions_fatal;
PgStat_Counter n_sessions_killed;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@ -1354,6 +1388,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
/*
* Updated by the traffic cop and in errfinish()
*/
extern SessionEndType pgStatSessionEndCause;
/* ----------
* Functions called from postmaster
* ----------

View File

@ -1848,6 +1848,13 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
pg_stat_get_db_session_time(d.oid) AS session_time,
pg_stat_get_db_active_time(d.oid) AS active_time,
pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
pg_stat_get_db_sessions(d.oid) AS sessions,
pg_stat_get_db_sessions_abandoned(d.oid) AS sessions_abandoned,
pg_stat_get_db_sessions_fatal(d.oid) AS sessions_fatal,
pg_stat_get_db_sessions_killed(d.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname