pg_stat_wal: Accumulate time as instr_time instead of microseconds

In instr_time.h it is stated that:

* When summing multiple measurements, it's recommended to leave the
* running sum in instr_time form (ie, use INSTR_TIME_ADD or
* INSTR_TIME_ACCUM_DIFF) and convert to a result format only at the end.

The reason for that is that converting to microseconds is not cheap, and can
loose precision.  Therefore this commit changes 'PendingWalStats' to use
'instr_time' instead of 'PgStat_Counter' while accumulating 'wal_write_time'
and 'wal_sync_time'.

Author: Nazir Bilal Yavuz <byavuz81@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
Discussion: https://postgr.es/m/1feedb83-7aa9-cb4b-5086-598349d3f555@gmail.com
This commit is contained in:
Andres Freund 2023-03-30 14:23:14 -07:00
parent 122376f028
commit ca7b3c4c00
5 changed files with 37 additions and 24 deletions

View File

@ -2206,8 +2206,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
instr_time duration;
INSTR_TIME_SET_CURRENT(duration);
INSTR_TIME_SUBTRACT(duration, start);
PendingWalStats.wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, duration, start);
}
PendingWalStats.wal_write++;
@ -8204,8 +8203,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
instr_time duration;
INSTR_TIME_SET_CURRENT(duration);
INSTR_TIME_SUBTRACT(duration, start);
PendingWalStats.wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, duration, start);
}
PendingWalStats.wal_sync++;

View File

@ -469,8 +469,7 @@ BufFileLoadBuffer(BufFile *file)
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
}
/* we choose not to advance curOffset here */
@ -544,8 +543,7 @@ BufFileDumpBuffer(BufFile *file)
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
}
file->curOffset += bytestowrite;

View File

@ -21,7 +21,7 @@
#include "executor/instrument.h"
PgStat_WalStats PendingWalStats = {0};
PgStat_PendingWalStats PendingWalStats = {0};
/*
* WAL usage counters saved from pgWALUsage at the previous call to
@ -70,7 +70,7 @@ bool
pgstat_flush_wal(bool nowait)
{
PgStatShared_Wal *stats_shmem = &pgStatLocal.shmem->wal;
WalUsage diff = {0};
WalUsage wal_usage_diff = {0};
Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
Assert(pgStatLocal.shmem != NULL &&
@ -88,25 +88,26 @@ pgstat_flush_wal(bool nowait)
* Calculate how much WAL usage counters were increased by subtracting the
* previous counters from the current ones.
*/
WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
PendingWalStats.wal_records = diff.wal_records;
PendingWalStats.wal_fpi = diff.wal_fpi;
PendingWalStats.wal_bytes = diff.wal_bytes;
WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevWalUsage);
if (!nowait)
LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
return true;
#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
WALSTAT_ACC(wal_records);
WALSTAT_ACC(wal_fpi);
WALSTAT_ACC(wal_bytes);
WALSTAT_ACC(wal_buffers_full);
WALSTAT_ACC(wal_write);
WALSTAT_ACC(wal_sync);
WALSTAT_ACC(wal_write_time);
WALSTAT_ACC(wal_sync_time);
#define WALSTAT_ACC(fld, var_to_add) \
(stats_shmem->stats.fld += var_to_add.fld)
#define WALSTAT_ACC_INSTR_TIME(fld) \
(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
WALSTAT_ACC(wal_records, wal_usage_diff);
WALSTAT_ACC(wal_fpi, wal_usage_diff);
WALSTAT_ACC(wal_bytes, wal_usage_diff);
WALSTAT_ACC(wal_buffers_full, PendingWalStats);
WALSTAT_ACC(wal_write, PendingWalStats);
WALSTAT_ACC(wal_sync, PendingWalStats);
WALSTAT_ACC_INSTR_TIME(wal_write_time);
WALSTAT_ACC_INSTR_TIME(wal_sync_time);
#undef WALSTAT_ACC_INSTR_TIME
#undef WALSTAT_ACC
LWLockRelease(&stats_shmem->lock);

View File

@ -435,6 +435,21 @@ typedef struct PgStat_WalStats
TimestampTz stat_reset_timestamp;
} PgStat_WalStats;
/*
* This struct stores wal-related durations as instr_time, which makes it
* cheaper and easier to accumulate them, by not requiring type
* conversions. During stats flush instr_time will be converted into
* microseconds.
*/
typedef struct PgStat_PendingWalStats
{
PgStat_Counter wal_buffers_full;
PgStat_Counter wal_write;
PgStat_Counter wal_sync;
instr_time wal_write_time;
instr_time wal_sync_time;
} PgStat_PendingWalStats;
/*
* Functions in pgstat.c
@ -748,7 +763,7 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
*/
/* updated directly by backends and background processes */
extern PGDLLIMPORT PgStat_WalStats PendingWalStats;
extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats;
#endif /* PGSTAT_H */

View File

@ -2053,6 +2053,7 @@ PgStat_Kind
PgStat_KindInfo
PgStat_LocalState
PgStat_PendingDroppedStatsItem
PgStat_PendingWalStats
PgStat_SLRUStats
PgStat_ShmemControl
PgStat_Snapshot