Add writeback to pg_stat_io
28e626bde0
added the concept of IOOps but neglected to include writeback operations.ac8d53dae5
added time spent doing these I/O operations. Without counting writeback, checkpointer write time in the log often differed substantially from that in pg_stat_io. To fix this, add IOOp IOOP_WRITEBACK and track writeback in pg_stat_io. Bumps catversion. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Reported-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20230419172326.dhgyo4wrrhulovt6%40awork3.anarazel.de
This commit is contained in:
parent
52676dc2e0
commit
093e5c57d5
|
@ -3867,6 +3867,32 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
|
||||||
</entry>
|
</entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry role="catalog_table_entry">
|
||||||
|
<para role="column_definition">
|
||||||
|
<structfield>writebacks</structfield> <type>bigint</type>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Number of units of size <varname>op_bytes</varname> which the process
|
||||||
|
requested the kernel write out to permanent storage.
|
||||||
|
</para>
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry role="catalog_table_entry">
|
||||||
|
<para role="column_definition">
|
||||||
|
<structfield>writeback_time</structfield> <type>double precision</type>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Time spent in writeback operations in milliseconds (if
|
||||||
|
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero). This
|
||||||
|
includes the time spent queueing write-out requests and, potentially,
|
||||||
|
the time spent to write out the dirty data.
|
||||||
|
</para>
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
<entry role="catalog_table_entry">
|
<entry role="catalog_table_entry">
|
||||||
<para role="column_definition">
|
<para role="column_definition">
|
||||||
|
|
|
@ -1131,6 +1131,8 @@ SELECT
|
||||||
b.read_time,
|
b.read_time,
|
||||||
b.writes,
|
b.writes,
|
||||||
b.write_time,
|
b.write_time,
|
||||||
|
b.writebacks,
|
||||||
|
b.writeback_time,
|
||||||
b.extends,
|
b.extends,
|
||||||
b.extend_time,
|
b.extend_time,
|
||||||
b.op_bytes,
|
b.op_bytes,
|
||||||
|
|
|
@ -1685,7 +1685,7 @@ again:
|
||||||
FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
|
FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
|
||||||
LWLockRelease(content_lock);
|
LWLockRelease(content_lock);
|
||||||
|
|
||||||
ScheduleBufferTagForWriteback(&BackendWritebackContext,
|
ScheduleBufferTagForWriteback(&BackendWritebackContext, io_context,
|
||||||
&buf_hdr->tag);
|
&buf_hdr->tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2725,8 +2725,11 @@ BufferSync(int flags)
|
||||||
CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
|
CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* issue all pending flushes */
|
/*
|
||||||
IssuePendingWritebacks(&wb_context);
|
* Issue all pending flushes. Only checkpointer calls BufferSync(), so
|
||||||
|
* IOContext will always be IOCONTEXT_NORMAL.
|
||||||
|
*/
|
||||||
|
IssuePendingWritebacks(&wb_context, IOCONTEXT_NORMAL);
|
||||||
|
|
||||||
pfree(per_ts_stat);
|
pfree(per_ts_stat);
|
||||||
per_ts_stat = NULL;
|
per_ts_stat = NULL;
|
||||||
|
@ -3110,7 +3113,11 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
|
||||||
|
|
||||||
UnpinBuffer(bufHdr);
|
UnpinBuffer(bufHdr);
|
||||||
|
|
||||||
ScheduleBufferTagForWriteback(wb_context, &tag);
|
/*
|
||||||
|
* SyncOneBuffer() is only called by checkpointer and bgwriter, so
|
||||||
|
* IOContext will always be IOCONTEXT_NORMAL.
|
||||||
|
*/
|
||||||
|
ScheduleBufferTagForWriteback(wb_context, IOCONTEXT_NORMAL, &tag);
|
||||||
|
|
||||||
return result | BUF_WRITTEN;
|
return result | BUF_WRITTEN;
|
||||||
}
|
}
|
||||||
|
@ -5445,7 +5452,8 @@ WritebackContextInit(WritebackContext *context, int *max_pending)
|
||||||
* Add buffer to list of pending writeback requests.
|
* Add buffer to list of pending writeback requests.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
ScheduleBufferTagForWriteback(WritebackContext *wb_context, BufferTag *tag)
|
ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context,
|
||||||
|
BufferTag *tag)
|
||||||
{
|
{
|
||||||
PendingWriteback *pending;
|
PendingWriteback *pending;
|
||||||
|
|
||||||
|
@ -5471,7 +5479,7 @@ ScheduleBufferTagForWriteback(WritebackContext *wb_context, BufferTag *tag)
|
||||||
* is now disabled.
|
* is now disabled.
|
||||||
*/
|
*/
|
||||||
if (wb_context->nr_pending >= *wb_context->max_pending)
|
if (wb_context->nr_pending >= *wb_context->max_pending)
|
||||||
IssuePendingWritebacks(wb_context);
|
IssuePendingWritebacks(wb_context, io_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ST_SORT sort_pending_writebacks
|
#define ST_SORT sort_pending_writebacks
|
||||||
|
@ -5489,8 +5497,9 @@ ScheduleBufferTagForWriteback(WritebackContext *wb_context, BufferTag *tag)
|
||||||
* error out - it's just a hint.
|
* error out - it's just a hint.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
IssuePendingWritebacks(WritebackContext *wb_context)
|
IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
|
||||||
{
|
{
|
||||||
|
instr_time io_start;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (wb_context->nr_pending == 0)
|
if (wb_context->nr_pending == 0)
|
||||||
|
@ -5503,6 +5512,8 @@ IssuePendingWritebacks(WritebackContext *wb_context)
|
||||||
sort_pending_writebacks(wb_context->pending_writebacks,
|
sort_pending_writebacks(wb_context->pending_writebacks,
|
||||||
wb_context->nr_pending);
|
wb_context->nr_pending);
|
||||||
|
|
||||||
|
io_start = pgstat_prepare_io_time();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Coalesce neighbouring writes, but nothing else. For that we iterate
|
* Coalesce neighbouring writes, but nothing else. For that we iterate
|
||||||
* through the, now sorted, array of pending flushes, and look forward to
|
* through the, now sorted, array of pending flushes, and look forward to
|
||||||
|
@ -5556,6 +5567,13 @@ IssuePendingWritebacks(WritebackContext *wb_context)
|
||||||
smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
|
smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Assume that writeback requests are only issued for buffers containing
|
||||||
|
* blocks of permanent relations.
|
||||||
|
*/
|
||||||
|
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context,
|
||||||
|
IOOP_WRITEBACK, io_start, wb_context->nr_pending);
|
||||||
|
|
||||||
wb_context->nr_pending = 0;
|
wb_context->nr_pending = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -424,6 +424,14 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
|
||||||
bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
|
bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Temporary tables are not logged and thus do not require fsync'ing.
|
||||||
|
* Writeback is not requested for temporary tables.
|
||||||
|
*/
|
||||||
|
if (io_object == IOOBJECT_TEMP_RELATION &&
|
||||||
|
(io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
|
||||||
|
return false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some IOOps are not valid in certain IOContexts and some IOOps are only
|
* Some IOOps are not valid in certain IOContexts and some IOOps are only
|
||||||
* valid in certain contexts.
|
* valid in certain contexts.
|
||||||
|
@ -448,12 +456,6 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
|
||||||
if (strategy_io_context && io_op == IOOP_FSYNC)
|
if (strategy_io_context && io_op == IOOP_FSYNC)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/*
|
|
||||||
* Temporary tables are not logged and thus do not require fsync'ing.
|
|
||||||
*/
|
|
||||||
if (io_context == IOCONTEXT_NORMAL &&
|
|
||||||
io_object == IOOBJECT_TEMP_RELATION && io_op == IOOP_FSYNC)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1268,6 +1268,8 @@ typedef enum io_stat_col
|
||||||
IO_COL_READ_TIME,
|
IO_COL_READ_TIME,
|
||||||
IO_COL_WRITES,
|
IO_COL_WRITES,
|
||||||
IO_COL_WRITE_TIME,
|
IO_COL_WRITE_TIME,
|
||||||
|
IO_COL_WRITEBACKS,
|
||||||
|
IO_COL_WRITEBACK_TIME,
|
||||||
IO_COL_EXTENDS,
|
IO_COL_EXTENDS,
|
||||||
IO_COL_EXTEND_TIME,
|
IO_COL_EXTEND_TIME,
|
||||||
IO_COL_CONVERSION,
|
IO_COL_CONVERSION,
|
||||||
|
@ -1303,6 +1305,8 @@ pgstat_get_io_op_index(IOOp io_op)
|
||||||
return IO_COL_REUSES;
|
return IO_COL_REUSES;
|
||||||
case IOOP_WRITE:
|
case IOOP_WRITE:
|
||||||
return IO_COL_WRITES;
|
return IO_COL_WRITES;
|
||||||
|
case IOOP_WRITEBACK:
|
||||||
|
return IO_COL_WRITEBACKS;
|
||||||
}
|
}
|
||||||
|
|
||||||
elog(ERROR, "unrecognized IOOp value: %d", io_op);
|
elog(ERROR, "unrecognized IOOp value: %d", io_op);
|
||||||
|
@ -1322,6 +1326,7 @@ pgstat_get_io_time_index(IOOp io_op)
|
||||||
{
|
{
|
||||||
case IOOP_READ:
|
case IOOP_READ:
|
||||||
case IOOP_WRITE:
|
case IOOP_WRITE:
|
||||||
|
case IOOP_WRITEBACK:
|
||||||
case IOOP_EXTEND:
|
case IOOP_EXTEND:
|
||||||
case IOOP_FSYNC:
|
case IOOP_FSYNC:
|
||||||
return pgstat_get_io_op_index(io_op) + 1;
|
return pgstat_get_io_op_index(io_op) + 1;
|
||||||
|
|
|
@ -57,6 +57,6 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* yyyymmddN */
|
/* yyyymmddN */
|
||||||
#define CATALOG_VERSION_NO 202305172
|
#define CATALOG_VERSION_NO 202305173
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -5734,9 +5734,9 @@
|
||||||
proname => 'pg_stat_get_io', provolatile => 'v',
|
proname => 'pg_stat_get_io', provolatile => 'v',
|
||||||
prorows => '30', proretset => 't',
|
prorows => '30', proretset => 't',
|
||||||
proparallel => 'r', prorettype => 'record', proargtypes => '',
|
proparallel => 'r', prorettype => 'record', proargtypes => '',
|
||||||
proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}',
|
proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}',
|
||||||
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
|
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
|
||||||
proargnames => '{backend_type,object,context,reads,read_time,writes,write_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
|
proargnames => '{backend_type,object,context,reads,read_time,writes,write_time,writebacks,writeback_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
|
||||||
prosrc => 'pg_stat_get_io' },
|
prosrc => 'pg_stat_get_io' },
|
||||||
|
|
||||||
{ oid => '1136', descr => 'statistics: information about WAL activity',
|
{ oid => '1136', descr => 'statistics: information about WAL activity',
|
||||||
|
|
|
@ -300,9 +300,10 @@ typedef enum IOOp
|
||||||
IOOP_READ,
|
IOOP_READ,
|
||||||
IOOP_REUSE,
|
IOOP_REUSE,
|
||||||
IOOP_WRITE,
|
IOOP_WRITE,
|
||||||
|
IOOP_WRITEBACK,
|
||||||
} IOOp;
|
} IOOp;
|
||||||
|
|
||||||
#define IOOP_NUM_TYPES (IOOP_WRITE + 1)
|
#define IOOP_NUM_TYPES (IOOP_WRITEBACK + 1)
|
||||||
|
|
||||||
typedef struct PgStat_BktypeIO
|
typedef struct PgStat_BktypeIO
|
||||||
{
|
{
|
||||||
|
|
|
@ -388,8 +388,9 @@ extern PGDLLIMPORT CkptSortItem *CkptBufferIds;
|
||||||
*/
|
*/
|
||||||
/* bufmgr.c */
|
/* bufmgr.c */
|
||||||
extern void WritebackContextInit(WritebackContext *context, int *max_pending);
|
extern void WritebackContextInit(WritebackContext *context, int *max_pending);
|
||||||
extern void IssuePendingWritebacks(WritebackContext *wb_context);
|
extern void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context);
|
||||||
extern void ScheduleBufferTagForWriteback(WritebackContext *wb_context, BufferTag *tag);
|
extern void ScheduleBufferTagForWriteback(WritebackContext *wb_context,
|
||||||
|
IOContext io_context, BufferTag *tag);
|
||||||
|
|
||||||
/* freelist.c */
|
/* freelist.c */
|
||||||
extern IOContext IOContextForStrategy(BufferAccessStrategy strategy);
|
extern IOContext IOContextForStrategy(BufferAccessStrategy strategy);
|
||||||
|
|
|
@ -1887,6 +1887,8 @@ pg_stat_io| SELECT backend_type,
|
||||||
read_time,
|
read_time,
|
||||||
writes,
|
writes,
|
||||||
write_time,
|
write_time,
|
||||||
|
writebacks,
|
||||||
|
writeback_time,
|
||||||
extends,
|
extends,
|
||||||
extend_time,
|
extend_time,
|
||||||
op_bytes,
|
op_bytes,
|
||||||
|
@ -1896,7 +1898,7 @@ pg_stat_io| SELECT backend_type,
|
||||||
fsyncs,
|
fsyncs,
|
||||||
fsync_time,
|
fsync_time,
|
||||||
stats_reset
|
stats_reset
|
||||||
FROM pg_stat_get_io() b(backend_type, object, context, reads, read_time, writes, write_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset);
|
FROM pg_stat_get_io() b(backend_type, object, context, reads, read_time, writes, write_time, writebacks, writeback_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset);
|
||||||
pg_stat_progress_analyze| SELECT s.pid,
|
pg_stat_progress_analyze| SELECT s.pid,
|
||||||
s.datid,
|
s.datid,
|
||||||
d.datname,
|
d.datname,
|
||||||
|
|
|
@ -1445,7 +1445,7 @@ SELECT pg_stat_have_stats('io', 0, 0);
|
||||||
t
|
t
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
|
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_pre_reset
|
||||||
FROM pg_stat_io \gset
|
FROM pg_stat_io \gset
|
||||||
SELECT pg_stat_reset_shared('io');
|
SELECT pg_stat_reset_shared('io');
|
||||||
pg_stat_reset_shared
|
pg_stat_reset_shared
|
||||||
|
@ -1453,7 +1453,7 @@ SELECT pg_stat_reset_shared('io');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
|
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_post_reset
|
||||||
FROM pg_stat_io \gset
|
FROM pg_stat_io \gset
|
||||||
SELECT :io_stats_post_reset < :io_stats_pre_reset;
|
SELECT :io_stats_post_reset < :io_stats_pre_reset;
|
||||||
?column?
|
?column?
|
||||||
|
|
|
@ -716,10 +716,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext
|
||||||
|
|
||||||
-- Test IO stats reset
|
-- Test IO stats reset
|
||||||
SELECT pg_stat_have_stats('io', 0, 0);
|
SELECT pg_stat_have_stats('io', 0, 0);
|
||||||
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
|
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_pre_reset
|
||||||
FROM pg_stat_io \gset
|
FROM pg_stat_io \gset
|
||||||
SELECT pg_stat_reset_shared('io');
|
SELECT pg_stat_reset_shared('io');
|
||||||
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
|
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_post_reset
|
||||||
FROM pg_stat_io \gset
|
FROM pg_stat_io \gset
|
||||||
SELECT :io_stats_post_reset < :io_stats_pre_reset;
|
SELECT :io_stats_post_reset < :io_stats_pre_reset;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue