Track shared buffer hits in pg_stat_io

Among other things, this should make it easier to calculate a useful cache hit
ratio by excluding buffer reads via buffer access strategies. As buffer access
strategies reuse buffers (and thus evict the prior buffer contents), it is
normal to see reads on repeated scans of the same data.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CAAKRu_beMa9Hzih40%3DXPYqhDVz6tsgUGTrhZXRo%3Dunp%2Bszb%3DUA%40mail.gmail.com
This commit is contained in:
Andres Freund 2023-03-30 19:22:40 -07:00
parent 6c3b697b19
commit 8aaa04b32d
12 changed files with 109 additions and 47 deletions

View File

@ -3855,6 +3855,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</entry>
</row>
<row>
<entry role="catalog_table_entry">
<para role="column_definition">
<structfield>hits</structfield> <type>bigint</type>
</para>
<para>
The number of times a desired block was found in a shared buffer.
</para>
</entry>
</row>
<row>
<entry role="catalog_table_entry">
<para role="column_definition">

View File

@ -1128,6 +1128,7 @@ SELECT
b.writes,
b.extends,
b.op_bytes,
b.hits,
b.evictions,
b.reuses,
b.fsyncs,

View File

@ -472,7 +472,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
bool *foundPtr, IOContext *io_context);
bool *foundPtr, IOContext io_context);
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln,
IOObject io_object, IOContext io_context);
static void FindAndDropRelationBuffers(RelFileLocator rlocator,
@ -850,13 +850,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if (isLocalBuf)
{
/*
* LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
* do not use a BufferAccessStrategy for I/O of temporary tables.
* We do not use a BufferAccessStrategy for I/O of temporary tables.
* However, in some cases, the "strategy" may not be NULL, so we can't
* rely on IOContextForStrategy() to set the right IOContext for us.
* This may happen in cases like CREATE TEMPORARY TABLE AS...
*/
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
io_context = IOCONTEXT_NORMAL;
io_object = IOOBJECT_TEMP_RELATION;
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
if (found)
pgBufferUsage.local_blks_hit++;
else if (isExtend)
@ -871,8 +872,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is
* not currently in memory.
*/
io_context = IOContextForStrategy(strategy);
io_object = IOOBJECT_RELATION;
bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
strategy, &found, &io_context);
strategy, &found, io_context);
if (found)
pgBufferUsage.shared_blks_hit++;
else if (isExtend)
@ -892,6 +895,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
/* Just need to update stats before we exit */
*hit = true;
VacuumPageHit++;
pgstat_count_io_op(io_object, io_context, IOOP_HIT);
if (VacuumCostActive)
VacuumCostBalance += VacuumCostPageHit;
@ -987,16 +991,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
if (isLocalBuf)
{
bufBlock = LocalBufHdrGetBlock(bufHdr);
io_object = IOOBJECT_TEMP_RELATION;
}
else
{
bufBlock = BufHdrGetBlock(bufHdr);
io_object = IOOBJECT_RELATION;
}
bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
if (isExtend)
{
@ -1139,7 +1134,7 @@ static BufferDesc *
BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
bool *foundPtr, IOContext *io_context)
bool *foundPtr, IOContext io_context)
{
bool from_ring;
BufferTag newTag; /* identity of requested block */
@ -1193,11 +1188,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
{
/*
* If we get here, previous attempts to read the buffer must
* have failed ... but we shall bravely try again. Set
* io_context since we will in fact need to count an IO
* Operation.
* have failed ... but we shall bravely try again.
*/
*io_context = IOContextForStrategy(strategy);
*foundPtr = false;
}
}
@ -1211,8 +1203,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
LWLockRelease(newPartitionLock);
*io_context = IOContextForStrategy(strategy);
/* Loop here in case we have to try another victim buffer */
for (;;)
{
@ -1295,7 +1285,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
smgr->smgr_rlocator.locator.dbOid,
smgr->smgr_rlocator.locator.relNumber);
FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context);
FlushBuffer(buf, NULL, IOOBJECT_RELATION, io_context);
LWLockRelease(BufferDescriptorGetContentLock(buf));
ScheduleBufferTagForWriteback(&BackendWritebackContext,
@ -1494,7 +1484,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* we may have been forced to release the buffer due to concurrent
* pinners or erroring out.
*/
pgstat_count_io_op(IOOBJECT_RELATION, *io_context,
pgstat_count_io_op(IOOBJECT_RELATION, io_context,
from_ring ? IOOP_REUSE : IOOP_EVICT);
}

View File

@ -108,7 +108,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
*/
BufferDesc *
LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
bool *foundPtr, IOContext *io_context)
bool *foundPtr)
{
BufferTag newTag; /* identity of requested block */
LocalBufferLookupEnt *hresult;
@ -128,14 +128,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
hresult = (LocalBufferLookupEnt *)
hash_search(LocalBufHash, &newTag, HASH_FIND, NULL);
/*
* IO Operations on local buffers are only done in IOCONTEXT_NORMAL. Set
* io_context here (instead of after a buffer hit would have returned) for
* convenience since we don't have to worry about the overhead of calling
* IOContextForStrategy().
*/
*io_context = IOCONTEXT_NORMAL;
if (hresult)
{
b = hresult->id;
@ -239,6 +231,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
buf_state &= ~BM_DIRTY;
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
/* Temporary table I/O does not use Buffer Access Strategies */
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
pgBufferUsage.local_blks_written++;
}

View File

@ -344,7 +344,7 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
* Some BackendTypes will not do certain IOOps.
*/
if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
(io_op == IOOP_READ || io_op == IOOP_EVICT))
(io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
return false;
if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||

View File

@ -1259,6 +1259,7 @@ typedef enum io_stat_col
IO_COL_WRITES,
IO_COL_EXTENDS,
IO_COL_CONVERSION,
IO_COL_HITS,
IO_COL_EVICTIONS,
IO_COL_REUSES,
IO_COL_FSYNCS,
@ -1277,16 +1278,18 @@ pgstat_get_io_op_index(IOOp io_op)
{
case IOOP_EVICT:
return IO_COL_EVICTIONS;
case IOOP_EXTEND:
return IO_COL_EXTENDS;
case IOOP_FSYNC:
return IO_COL_FSYNCS;
case IOOP_HIT:
return IO_COL_HITS;
case IOOP_READ:
return IO_COL_READS;
case IOOP_REUSE:
return IO_COL_REUSES;
case IOOP_WRITE:
return IO_COL_WRITES;
case IOOP_EXTEND:
return IO_COL_EXTENDS;
case IOOP_FSYNC:
return IO_COL_FSYNCS;
}
elog(ERROR, "unrecognized IOOp value: %d", io_op);

View File

@ -5749,9 +5749,9 @@
proname => 'pg_stat_get_io', provolatile => 'v',
prorows => '30', proretset => 't',
proparallel => 'r', prorettype => 'record', proargtypes => '',
proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}',
proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}',
prosrc => 'pg_stat_get_io' },
{ oid => '1136', descr => 'statistics: information about WAL activity',

View File

@ -296,6 +296,7 @@ typedef enum IOOp
IOOP_EVICT,
IOOP_EXTEND,
IOOP_FSYNC,
IOOP_HIT,
IOOP_READ,
IOOP_REUSE,
IOOP_WRITE,

View File

@ -419,7 +419,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
ForkNumber forkNum,
BlockNumber blockNum);
extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum, bool *foundPtr, IOContext *io_context);
BlockNumber blockNum, bool *foundPtr);
extern void MarkLocalBufferDirty(Buffer buffer);
extern void DropRelationLocalBuffers(RelFileLocator rlocator,
ForkNumber forkNum,

View File

@ -1884,11 +1884,12 @@ pg_stat_io| SELECT backend_type,
writes,
extends,
op_bytes,
hits,
evictions,
reuses,
fsyncs,
stats_reset
FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset);
FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset);
pg_stat_progress_analyze| SELECT s.pid,
s.datid,
d.datname,

View File

@ -1131,6 +1131,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
-- - writes of shared buffers to permanent storage
-- - extends of relations using shared buffers
-- - fsyncs done to ensure the durability of data dirtying shared buffers
-- - shared buffer hits
-- There is no test for blocks evicted from shared buffers, because we cannot
-- be sure of the state of shared buffers at the point the test is run.
-- Create a regular table and insert some data to generate IOCONTEXT_NORMAL
@ -1208,6 +1209,47 @@ SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
t
(1 row)
SELECT sum(hits) AS io_sum_shared_before_hits
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
-- Select from the table again to count hits.
-- Ensure we generate hits by forcing a nested loop self-join with no
-- materialize node. The outer side's buffer will stay pinned, preventing its
-- eviction, while we loop through the inner side and generate hits.
BEGIN;
SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off;
SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off;
-- ensure plan stays as we expect it to
EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
QUERY PLAN
-------------------------------------------
Aggregate
-> Nested Loop
Join Filter: (t1.a = t2.a)
-> Seq Scan on test_io_shared t1
-> Seq Scan on test_io_shared t2
(5 rows)
SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
count
-------
100
(1 row)
COMMIT;
SELECT pg_stat_force_next_flush();
pg_stat_force_next_flush
--------------------------
(1 row)
SELECT sum(hits) AS io_sum_shared_after_hits
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
?column?
----------
t
(1 row)
DROP TABLE test_io_shared;
-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
-- - eviction of local buffers in order to reuse them
@ -1342,7 +1384,7 @@ SELECT pg_stat_have_stats('io', 0, 0);
t
(1 row)
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
FROM pg_stat_io \gset
SELECT pg_stat_reset_shared('io');
pg_stat_reset_shared
@ -1350,7 +1392,7 @@ SELECT pg_stat_reset_shared('io');
(1 row)
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
FROM pg_stat_io \gset
SELECT :io_stats_post_reset < :io_stats_pre_reset;
?column?

View File

@ -541,6 +541,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
-- - writes of shared buffers to permanent storage
-- - extends of relations using shared buffers
-- - fsyncs done to ensure the durability of data dirtying shared buffers
-- - shared buffer hits
-- There is no test for blocks evicted from shared buffers, because we cannot
-- be sure of the state of shared buffers at the point the test is run.
@ -587,6 +588,25 @@ SELECT pg_stat_force_next_flush();
SELECT sum(reads) AS io_sum_shared_after_reads
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
SELECT sum(hits) AS io_sum_shared_before_hits
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
-- Select from the table again to count hits.
-- Ensure we generate hits by forcing a nested loop self-join with no
-- materialize node. The outer side's buffer will stay pinned, preventing its
-- eviction, while we loop through the inner side and generate hits.
BEGIN;
SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off;
SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off;
-- ensure plan stays as we expect it to
EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
COMMIT;
SELECT pg_stat_force_next_flush();
SELECT sum(hits) AS io_sum_shared_after_hits
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
DROP TABLE test_io_shared;
-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
@ -674,10 +694,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext
-- Test IO stats reset
SELECT pg_stat_have_stats('io', 0, 0);
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
FROM pg_stat_io \gset
SELECT pg_stat_reset_shared('io');
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
FROM pg_stat_io \gset
SELECT :io_stats_post_reset < :io_stats_pre_reset;