Fix performance regression from session statistics.

Session statistics, as introduced by 960869da08, had several shortcomings:

- an additional GetCurrentTimestamp() call that also impaired the accuracy of
  the data collected

  This can be avoided by passing the current timestamp we already have in
  pgstat_report_stat().

- an additional statistics UDP packet sent every 500ms

  This is solved by adding the new statistics to PgStat_MsgTabstat.
  This is conceptually ugly, because session statistics are not
  table statistics.  But the struct already contains data unrelated
  to tables, so there is not much damage done.

  Connection and disconnection are reported in separate messages, which
  reduces the number of additional messages to two messages per session and a
  slight increase in PgStat_MsgTabstat size (but the same number of table
  stats fit).

- Session time computation could overflow on systems where long is 32 bit.

Reported-By: Andres Freund <andres@anarazel.de>
Author: Andres Freund <andres@anarazel.de>
Author: Laurenz Albe <laurenz.albe@cybertec.at>
Discussion: https://postgr.es/m/20210801205501.nyxzxoelqoo4x2qc%40alap3.anarazel.de
Backpatch: 14-, where the feature was introduced.
This commit is contained in:
Andres Freund 2021-09-16 02:02:40 -07:00
parent dc899146db
commit 37a9aa6591
5 changed files with 158 additions and 80 deletions

View File

@ -246,6 +246,7 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
static PgStat_Counter pgLastSessionReportTime = 0;
PgStat_Counter pgStatActiveTime = 0;
PgStat_Counter pgStatTransactionIdleTime = 0;
SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
@ -330,11 +331,12 @@ static bool pgstat_db_requested(Oid databaseid);
static PgStat_StatReplSlotEntry *pgstat_get_replslot_entry(NameData name, bool create_it);
static void pgstat_reset_replslot(PgStat_StatReplSlotEntry *slotstats, TimestampTz ts);
static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static bool pgstat_should_report_connstat(void);
static void pgstat_report_disconnect(Oid dboid);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@ -366,7 +368,8 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
static void pgstat_recv_connect(PgStat_MsgConnect *msg, int len);
static void pgstat_recv_disconnect(PgStat_MsgDisconnect *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@ -890,12 +893,11 @@ pgstat_report_stat(bool disconnect)
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
/* for backends, send connection statistics */
if (MyBackendType == B_BACKEND)
pgstat_send_connstats(disconnect, last_report);
last_report = now;
if (disconnect)
pgstat_report_disconnect(MyDatabaseId);
/*
* Destroy pgStatTabHash before we start invalidating PgStat_TableEntry
* entries it points to. (Should we fail partway through the loop below,
@ -947,7 +949,7 @@ pgstat_report_stat(bool disconnect)
sizeof(PgStat_TableCounts));
if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
{
pgstat_send_tabstat(this_msg);
pgstat_send_tabstat(this_msg, now);
this_msg->m_nentries = 0;
}
}
@ -959,13 +961,14 @@ pgstat_report_stat(bool disconnect)
/*
* Send partial messages. Make sure that any pending xact commit/abort
* gets counted, even if there are no table stats to send.
* and connection stats get counted, even if there are no table stats to
* send.
*/
if (regular_msg.m_nentries > 0 ||
pgStatXactCommit > 0 || pgStatXactRollback > 0)
pgstat_send_tabstat(&regular_msg);
pgStatXactCommit > 0 || pgStatXactRollback > 0 || disconnect)
pgstat_send_tabstat(&regular_msg, now);
if (shared_msg.m_nentries > 0)
pgstat_send_tabstat(&shared_msg);
pgstat_send_tabstat(&shared_msg, now);
/* Now, send function statistics */
pgstat_send_funcstats();
@ -981,7 +984,7 @@ pgstat_report_stat(bool disconnect)
* Subroutine for pgstat_report_stat: finish and send a tabstat message
*/
static void
pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now)
{
int n;
int len;
@ -1000,10 +1003,34 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
tsmsg->m_xact_rollback = pgStatXactRollback;
tsmsg->m_block_read_time = pgStatBlockReadTime;
tsmsg->m_block_write_time = pgStatBlockWriteTime;
if (pgstat_should_report_connstat())
{
long secs;
int usecs;
/*
* pgLastSessionReportTime is initialized to MyStartTimestamp by
* pgstat_report_connect().
*/
TimestampDifference(pgLastSessionReportTime, now, &secs, &usecs);
pgLastSessionReportTime = now;
tsmsg->m_session_time = (PgStat_Counter) secs * 1000000 + usecs;
tsmsg->m_active_time = pgStatActiveTime;
tsmsg->m_idle_in_xact_time = pgStatTransactionIdleTime;
}
else
{
tsmsg->m_session_time = 0;
tsmsg->m_active_time = 0;
tsmsg->m_idle_in_xact_time = 0;
}
pgStatXactCommit = 0;
pgStatXactRollback = 0;
pgStatBlockReadTime = 0;
pgStatBlockWriteTime = 0;
pgStatActiveTime = 0;
pgStatTransactionIdleTime = 0;
}
else
{
@ -1011,6 +1038,9 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
tsmsg->m_xact_rollback = 0;
tsmsg->m_block_read_time = 0;
tsmsg->m_block_write_time = 0;
tsmsg->m_session_time = 0;
tsmsg->m_active_time = 0;
tsmsg->m_idle_in_xact_time = 0;
}
n = tsmsg->m_nentries;
@ -1378,49 +1408,6 @@ pgstat_drop_relation(Oid relid)
}
#endif /* NOT_USED */
/* ----------
* pgstat_send_connstats() -
*
* Tell the collector about session statistics.
* The parameter "disconnect" will be true when the backend exits.
* "last_report" is the last time we were called (0 if never).
* ----------
*/
static void
pgstat_send_connstats(bool disconnect, TimestampTz last_report)
{
PgStat_MsgConn msg;
long secs;
int usecs;
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
return;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
msg.m_databaseid = MyDatabaseId;
/* session time since the last report */
TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
GetCurrentTimestamp(),
&secs, &usecs);
msg.m_session_time = secs * 1000000 + usecs;
msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET;
msg.m_active_time = pgStatActiveTime;
pgStatActiveTime = 0;
msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
pgStatTransactionIdleTime = 0;
/* report a new session only the first time */
msg.m_count = (last_report == 0) ? 1 : 0;
pgstat_send(&msg, sizeof(PgStat_MsgConn));
}
/* ----------
* pgstat_reset_counters() -
*
@ -1759,6 +1746,63 @@ pgstat_report_tempfile(size_t filesize)
pgstat_send(&msg, sizeof(msg));
}
/* --------
* pgstat_report_connect() -
*
* Tell the collector about a new connection.
* --------
*/
void
pgstat_report_connect(Oid dboid)
{
PgStat_MsgConnect msg;
if (!pgstat_should_report_connstat())
return;
pgLastSessionReportTime = MyStartTimestamp;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECT);
msg.m_databaseid = MyDatabaseId;
pgstat_send(&msg, sizeof(PgStat_MsgConnect));
}
/* --------
* pgstat_report_disconnect() -
*
* Tell the collector about a disconnect.
* --------
*/
static void
pgstat_report_disconnect(Oid dboid)
{
PgStat_MsgDisconnect msg;
if (!pgstat_should_report_connstat())
return;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DISCONNECT);
msg.m_databaseid = MyDatabaseId;
msg.m_cause = pgStatSessionEndCause;
pgstat_send(&msg, sizeof(PgStat_MsgDisconnect));
}
/* --------
* pgstat_should_report_connstats() -
*
* We report session statistics only for normal backend processes. Parallel
* workers run in parallel, so they don't contribute to session times, even
* though they use CPU time. Walsender processes could be considered here,
* but they have different session characteristics from normal backends (for
* example, they are always "active"), so they would skew session statistics.
* ----------
*/
static bool
pgstat_should_report_connstat(void)
{
return MyBackendType == B_BACKEND;
}
/* ----------
* pgstat_report_replslot() -
*
@ -3465,8 +3509,12 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
case PGSTAT_MTYPE_CONNECTION:
pgstat_recv_connstat(&msg.msg_conn, len);
case PGSTAT_MTYPE_CONNECT:
pgstat_recv_connect(&msg.msg_connect, len);
break;
case PGSTAT_MTYPE_DISCONNECT:
pgstat_recv_disconnect(&msg.msg_disconnect, len);
break;
default:
@ -4904,6 +4952,10 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
dbentry->n_block_read_time += msg->m_block_read_time;
dbentry->n_block_write_time += msg->m_block_write_time;
dbentry->total_session_time += msg->m_session_time;
dbentry->total_active_time += msg->m_active_time;
dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time;
/*
* Process all table entries in the message.
*/
@ -5568,23 +5620,34 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
/* ----------
* pgstat_recv_connstat() -
* pgstat_recv_connect() -
*
* Process connection information.
* Process a CONNECT message.
* ----------
*/
static void
pgstat_recv_connstat(PgStat_MsgConn *msg, int len)
pgstat_recv_connect(PgStat_MsgConnect *msg, int len)
{
PgStat_StatDBEntry *dbentry;
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
dbentry->n_sessions++;
}
/* ----------
* pgstat_recv_disconnect() -
*
* Process a DISCONNECT message.
* ----------
*/
static void
pgstat_recv_disconnect(PgStat_MsgDisconnect *msg, int len)
{
PgStat_StatDBEntry *dbentry;
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
dbentry->n_sessions += msg->m_count;
dbentry->total_session_time += msg->m_session_time;
dbentry->total_active_time += msg->m_active_time;
dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time;
switch (msg->m_disconnect)
switch (msg->m_cause)
{
case DISCONNECT_NOT_YET:
case DISCONNECT_NORMAL:

View File

@ -4116,6 +4116,8 @@ PostgresMain(int argc, char *argv[],
if (IsUnderPostmaster && Log_disconnections)
on_proc_exit(log_disconnections, 0);
pgstat_report_connect(MyDatabaseId);
/* Perform initialization specific to a WAL sender process. */
if (am_walsender)
InitWalSender();

View File

@ -590,9 +590,9 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
if (beentry->st_state == STATE_RUNNING ||
beentry->st_state == STATE_FASTPATH)
pgstat_count_conn_active_time(secs * 1000000 + usecs);
pgstat_count_conn_active_time((PgStat_Counter) secs * 1000000 + usecs);
else
pgstat_count_conn_txn_idle_time(secs * 1000000 + usecs);
pgstat_count_conn_txn_idle_time((PgStat_Counter) secs * 1000000 + usecs);
}
/*

View File

@ -81,7 +81,8 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
PGSTAT_MTYPE_CONNECTION,
PGSTAT_MTYPE_CONNECT,
PGSTAT_MTYPE_DISCONNECT,
} StatMsgType;
/* ----------
@ -279,7 +280,7 @@ typedef struct PgStat_TableEntry
* ----------
*/
#define PGSTAT_NUM_TABENTRIES \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 2 * sizeof(PgStat_Counter)) \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 5 * sizeof(PgStat_Counter)) \
/ sizeof(PgStat_TableEntry))
typedef struct PgStat_MsgTabstat
@ -291,6 +292,9 @@ typedef struct PgStat_MsgTabstat
int m_xact_rollback;
PgStat_Counter m_block_read_time; /* times in microseconds */
PgStat_Counter m_block_write_time;
PgStat_Counter m_session_time;
PgStat_Counter m_active_time;
PgStat_Counter m_idle_in_xact_time;
PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
} PgStat_MsgTabstat;
@ -653,20 +657,26 @@ typedef struct PgStat_MsgChecksumFailure
} PgStat_MsgChecksumFailure;
/* ----------
* PgStat_MsgConn Sent by the backend to update connection statistics.
* PgStat_MsgConnect Sent by the backend upon connection
* establishment
* ----------
*/
typedef struct PgStat_MsgConn
typedef struct PgStat_MsgConnect
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
PgStat_Counter m_count;
PgStat_Counter m_session_time;
PgStat_Counter m_active_time;
PgStat_Counter m_idle_in_xact_time;
SessionEndType m_disconnect;
} PgStat_MsgConn;
} PgStat_MsgConnect;
/* ----------
* PgStat_MsgDisconnect Sent by the backend when disconnecting
* ----------
*/
typedef struct PgStat_MsgDisconnect
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
SessionEndType m_cause;
} PgStat_MsgDisconnect;
/* ----------
* PgStat_Msg Union over all possible messages.
@ -700,7 +710,8 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
PgStat_MsgConn msg_conn;
PgStat_MsgConnect msg_connect;
PgStat_MsgDisconnect msg_disconnect;
} PgStat_Msg;
@ -1010,6 +1021,7 @@ extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type t
extern void pgstat_reset_slru_counter(const char *);
extern void pgstat_reset_replslot_counter(const char *name);
extern void pgstat_report_connect(Oid dboid);
extern void pgstat_report_autovac(Oid dboid);
extern void pgstat_report_vacuum(Oid tableoid, bool shared,
PgStat_Counter livetuples, PgStat_Counter deadtuples);

View File

@ -1923,8 +1923,9 @@ PgStat_MsgArchiver
PgStat_MsgAutovacStart
PgStat_MsgBgWriter
PgStat_MsgChecksumFailure
PgStat_MsgConn
PgStat_MsgConnect
PgStat_MsgDeadlock
PgStat_MsgDisconnect
PgStat_MsgDropdb
PgStat_MsgDummy
PgStat_MsgFuncpurge