Add bt_multi_page_stats() function to contrib/pageinspect.

This is like the existing bt_page_stats() function, but it can
report on a range of pages rather than just one at a time.

I don't have a huge amount of faith in the portability of the
new test cases, but they do pass in a 32-bit FreeBSD VM here.
Further adjustment may be needed depending on buildfarm results.

Hamid Akhtar, reviewed by Naeem Akhter, Bertrand Drouvot,
Bharath Rupireddy, and myself

Discussion: https://postgr.es/m/CANugjht-=oGMRmNJKMqnBC69y7vr+wHDmm0ZK6-1pJsxoBKBbA@mail.gmail.com
This commit is contained in:
Tom Lane 2023-01-02 13:02:29 -05:00
parent e351f85418
commit 1fd3dd2048
8 changed files with 428 additions and 80 deletions

View File

@ -13,7 +13,7 @@ OBJS = \
rawpage.o
EXTENSION = pageinspect
DATA = pageinspect--1.10--1.11.sql \
DATA = pageinspect--1.11--1.12.sql pageinspect--1.10--1.11.sql \
pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \
pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \
pageinspect--1.5.sql pageinspect--1.5--1.6.sql \

View File

@ -46,17 +46,13 @@ PG_FUNCTION_INFO_V1(bt_page_items);
PG_FUNCTION_INFO_V1(bt_page_items_bytea);
PG_FUNCTION_INFO_V1(bt_page_stats_1_9);
PG_FUNCTION_INFO_V1(bt_page_stats);
PG_FUNCTION_INFO_V1(bt_multi_page_stats);
#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
#define DatumGetItemPointer(X) ((ItemPointer) DatumGetPointer(X))
#define ItemPointerGetDatum(X) PointerGetDatum(X)
/* note: BlockNumber is unsigned, hence can't be negative */
#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \
elog(ERROR, "block number out of range"); }
/* ------------------------------------------------
* structure for single btree page statistics
* ------------------------------------------------
@ -80,6 +76,29 @@ typedef struct BTPageStat
BTCycleId btpo_cycleid;
} BTPageStat;
/*
* cross-call data structure for SRF for page stats
*/
typedef struct ua_page_stats
{
Oid relid;
int64 blkno;
int64 blk_count;
bool allpages;
} ua_page_stats;
/*
* cross-call data structure for SRF for page items
*/
typedef struct ua_page_items
{
Page page;
OffsetNumber offset;
bool leafpage;
bool rightmost;
TupleDesc tupd;
} ua_page_items;
/* -------------------------------------------------
* GetBTPageStatistics()
@ -176,10 +195,68 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
stat->avg_item_size = 0;
}
/* -----------------------------------------------
* check_relation_block_range()
*
* Verify that a block number (given as int64) is valid for the relation.
* -----------------------------------------------
*/
static void
check_relation_block_range(Relation rel, int64 blkno)
{
/* Ensure we can cast to BlockNumber */
if (blkno < 0 || blkno > MaxBlockNumber)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid block number %lld",
(long long) blkno)));
if ((BlockNumber) (blkno) >= RelationGetNumberOfBlocks(rel))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("block number %lld is out of range",
(long long) blkno)));
}
/* -----------------------------------------------
* bt_index_block_validate()
*
* Validate index type is btree and block number
* is valid (and not the metapage).
* -----------------------------------------------
*/
static void
bt_index_block_validate(Relation rel, int64 blkno)
{
if (!IS_INDEX(rel) || !IS_BTREE(rel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a %s index",
RelationGetRelationName(rel), "btree")));
/*
* Reject attempts to read non-local temporary relations; we would be
* likely to get wrong data since we have no visibility into the owning
* session's local buffers.
*/
if (RELATION_IS_OTHER_TEMP(rel))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot access temporary tables of other sessions")));
if (blkno == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("block 0 is a meta page")));
check_relation_block_range(rel, blkno);
}
/* -----------------------------------------------
* bt_page_stats()
*
* Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
* Arguments are index relation name and block number
* -----------------------------------------------
*/
static Datum
@ -205,33 +282,7 @@ bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
rel = relation_openrv(relrv, AccessShareLock);
if (!IS_INDEX(rel) || !IS_BTREE(rel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a %s index",
RelationGetRelationName(rel), "btree")));
/*
* Reject attempts to read non-local temporary relations; we would be
* likely to get wrong data since we have no visibility into the owning
* session's local buffers.
*/
if (RELATION_IS_OTHER_TEMP(rel))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot access temporary tables of other sessions")));
if (blkno < 0 || blkno > MaxBlockNumber)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid block number")));
if (blkno == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("block 0 is a meta page")));
CHECK_RELATION_BLOCK_RANGE(rel, blkno);
bt_index_block_validate(rel, blkno);
buffer = ReadBuffer(rel, blkno);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
@ -284,17 +335,144 @@ bt_page_stats(PG_FUNCTION_ARGS)
}
/*
* cross-call data structure for SRF
/* -----------------------------------------------
* bt_multi_page_stats()
*
* Usage: SELECT * FROM bt_page_stats('t1_pkey', 1, 2);
* Arguments are index relation name, first block number, number of blocks
* (but number of blocks can be negative to mean "read all the rest")
* -----------------------------------------------
*/
struct user_args
Datum
bt_multi_page_stats(PG_FUNCTION_ARGS)
{
Page page;
OffsetNumber offset;
bool leafpage;
bool rightmost;
TupleDesc tupd;
};
Relation rel;
ua_page_stats *uargs;
FuncCallContext *fctx;
MemoryContext mctx;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use pageinspect functions")));
if (SRF_IS_FIRSTCALL())
{
text *relname = PG_GETARG_TEXT_PP(0);
int64 blkno = PG_GETARG_INT64(1);
int64 blk_count = PG_GETARG_INT64(2);
RangeVar *relrv;
fctx = SRF_FIRSTCALL_INIT();
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
rel = relation_openrv(relrv, AccessShareLock);
/* Check that rel is a valid btree index and 1st block number is OK */
bt_index_block_validate(rel, blkno);
/*
* Check if upper bound of the specified range is valid. If only one
* page is requested, skip as we've already validated the page. (Also,
* it's important to skip this if blk_count is negative.)
*/
if (blk_count > 1)
check_relation_block_range(rel, blkno + blk_count - 1);
/* Save arguments for reuse */
mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
uargs = palloc(sizeof(ua_page_stats));
uargs->relid = RelationGetRelid(rel);
uargs->blkno = blkno;
uargs->blk_count = blk_count;
uargs->allpages = (blk_count < 0);
fctx->user_fctx = uargs;
MemoryContextSwitchTo(mctx);
/*
* To avoid possibly leaking a relcache reference if the SRF isn't run
* to completion, we close and re-open the index rel each time
* through, using the index's OID for re-opens to ensure we get the
* same rel. Keep the AccessShareLock though, to ensure it doesn't go
* away underneath us.
*/
relation_close(rel, NoLock);
}
fctx = SRF_PERCALL_SETUP();
uargs = fctx->user_fctx;
/* We should have lock already */
rel = relation_open(uargs->relid, NoLock);
/* In all-pages mode, recheck the index length each time */
if (uargs->allpages)
uargs->blk_count = RelationGetNumberOfBlocks(rel) - uargs->blkno;
if (uargs->blk_count > 0)
{
/* We need to fetch next block statistics */
Buffer buffer;
Datum result;
HeapTuple tuple;
int j;
char *values[11];
BTPageStat stat;
TupleDesc tupleDesc;
buffer = ReadBuffer(rel, uargs->blkno);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
/* keep compiler quiet */
stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
GetBTPageStatistics(uargs->blkno, buffer, &stat);
UnlockReleaseBuffer(buffer);
relation_close(rel, NoLock);
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
j = 0;
values[j++] = psprintf("%u", stat.blkno);
values[j++] = psprintf("%c", stat.type);
values[j++] = psprintf("%u", stat.live_items);
values[j++] = psprintf("%u", stat.dead_items);
values[j++] = psprintf("%u", stat.avg_item_size);
values[j++] = psprintf("%u", stat.page_size);
values[j++] = psprintf("%u", stat.free_size);
values[j++] = psprintf("%u", stat.btpo_prev);
values[j++] = psprintf("%u", stat.btpo_next);
values[j++] = psprintf("%u", stat.btpo_level);
values[j++] = psprintf("%d", stat.btpo_flags);
/* Construct tuple to be returned */
tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
values);
result = HeapTupleGetDatum(tuple);
/*
* Move to the next block number and decrement the number of blocks
* still to be fetched
*/
uargs->blkno++;
uargs->blk_count--;
SRF_RETURN_NEXT(fctx, result);
}
/* Done, so finally we can release the index lock */
relation_close(rel, AccessShareLock);
SRF_RETURN_DONE(fctx);
}
/*-------------------------------------------------------
* bt_page_print_tuples()
@ -303,7 +481,7 @@ struct user_args
* ------------------------------------------------------
*/
static Datum
bt_page_print_tuples(struct user_args *uargs)
bt_page_print_tuples(ua_page_items *uargs)
{
Page page = uargs->page;
OffsetNumber offset = uargs->offset;
@ -453,7 +631,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
Datum result;
FuncCallContext *fctx;
MemoryContext mctx;
struct user_args *uargs;
ua_page_items *uargs;
if (!superuser())
ereport(ERROR,
@ -473,33 +651,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
rel = relation_openrv(relrv, AccessShareLock);
if (!IS_INDEX(rel) || !IS_BTREE(rel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a %s index",
RelationGetRelationName(rel), "btree")));
/*
* Reject attempts to read non-local temporary relations; we would be
* likely to get wrong data since we have no visibility into the
* owning session's local buffers.
*/
if (RELATION_IS_OTHER_TEMP(rel))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot access temporary tables of other sessions")));
if (blkno < 0 || blkno > MaxBlockNumber)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid block number")));
if (blkno == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("block 0 is a meta page")));
CHECK_RELATION_BLOCK_RANGE(rel, blkno);
bt_index_block_validate(rel, blkno);
buffer = ReadBuffer(rel, blkno);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
@ -511,7 +663,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
*/
mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
uargs = palloc(sizeof(struct user_args));
uargs = palloc(sizeof(ua_page_items));
uargs->page = palloc(BLCKSZ);
memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
@ -587,7 +739,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
bytea *raw_page = PG_GETARG_BYTEA_P(0);
Datum result;
FuncCallContext *fctx;
struct user_args *uargs;
ua_page_items *uargs;
if (!superuser())
ereport(ERROR,
@ -603,7 +755,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
fctx = SRF_FIRSTCALL_INIT();
mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
uargs = palloc(sizeof(struct user_args));
uargs = palloc(sizeof(ua_page_items));
uargs->page = get_page_from_raw(raw_page);

View File

@ -15,7 +15,7 @@ last_cleanup_num_tuples | -1
allequalimage | t
SELECT * FROM bt_page_stats('test1_a_idx', -1);
ERROR: invalid block number
ERROR: invalid block number -1
SELECT * FROM bt_page_stats('test1_a_idx', 0);
ERROR: block 0 is a meta page
SELECT * FROM bt_page_stats('test1_a_idx', 1);
@ -33,9 +33,122 @@ btpo_level | 0
btpo_flags | 3
SELECT * FROM bt_page_stats('test1_a_idx', 2);
ERROR: block number out of range
ERROR: block number 2 is out of range
-- bt_multi_page_stats() function returns a set of records of page statistics.
CREATE TABLE test2 AS (SELECT generate_series(1, 1000)::int8 AS col1);
CREATE INDEX test2_col1_idx ON test2(col1);
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 0, 1);
ERROR: block 0 is a meta page
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, -1);
-[ RECORD 1 ]-+-----
blkno | 1
type | l
live_items | 367
dead_items | 0
avg_item_size | 16
page_size | 8192
free_size | 808
btpo_prev | 0
btpo_next | 2
btpo_level | 0
btpo_flags | 1
-[ RECORD 2 ]-+-----
blkno | 2
type | l
live_items | 367
dead_items | 0
avg_item_size | 16
page_size | 8192
free_size | 808
btpo_prev | 1
btpo_next | 4
btpo_level | 0
btpo_flags | 1
-[ RECORD 3 ]-+-----
blkno | 3
type | r
live_items | 3
dead_items | 0
avg_item_size | 13
page_size | 8192
free_size | 8096
btpo_prev | 0
btpo_next | 0
btpo_level | 1
btpo_flags | 2
-[ RECORD 4 ]-+-----
blkno | 4
type | l
live_items | 268
dead_items | 0
avg_item_size | 16
page_size | 8192
free_size | 2788
btpo_prev | 2
btpo_next | 0
btpo_level | 0
btpo_flags | 1
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 0);
(0 rows)
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 2);
-[ RECORD 1 ]-+-----
blkno | 1
type | l
live_items | 367
dead_items | 0
avg_item_size | 16
page_size | 8192
free_size | 808
btpo_prev | 0
btpo_next | 2
btpo_level | 0
btpo_flags | 1
-[ RECORD 2 ]-+-----
blkno | 2
type | l
live_items | 367
dead_items | 0
avg_item_size | 16
page_size | 8192
free_size | 808
btpo_prev | 1
btpo_next | 4
btpo_level | 0
btpo_flags | 1
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 3, 2);
-[ RECORD 1 ]-+-----
blkno | 3
type | r
live_items | 3
dead_items | 0
avg_item_size | 13
page_size | 8192
free_size | 8096
btpo_prev | 0
btpo_next | 0
btpo_level | 1
btpo_flags | 2
-[ RECORD 2 ]-+-----
blkno | 4
type | l
live_items | 268
dead_items | 0
avg_item_size | 16
page_size | 8192
free_size | 2788
btpo_prev | 2
btpo_next | 0
btpo_level | 0
btpo_flags | 1
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 7, 2);
ERROR: block number 7 is out of range
DROP TABLE test2;
SELECT * FROM bt_page_items('test1_a_idx', -1);
ERROR: invalid block number
ERROR: invalid block number -1
SELECT * FROM bt_page_items('test1_a_idx', 0);
ERROR: block 0 is a meta page
SELECT * FROM bt_page_items('test1_a_idx', 1);
@ -51,7 +164,7 @@ htid | (0,1)
tids |
SELECT * FROM bt_page_items('test1_a_idx', 2);
ERROR: block number out of range
ERROR: block number 2 is out of range
SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', -1));
ERROR: invalid block number
SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 0));

View File

@ -36,6 +36,7 @@ install_data(
'pageinspect--1.8--1.9.sql',
'pageinspect--1.9--1.10.sql',
'pageinspect--1.10--1.11.sql',
'pageinspect--1.11--1.12.sql',
'pageinspect.control',
kwargs: contrib_data_args,
)

View File

@ -0,0 +1,23 @@
/* contrib/pageinspect/pageinspect--1.11--1.12.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.12'" to load this file. \quit
--
-- bt_multi_page_stats()
--
CREATE FUNCTION bt_multi_page_stats(IN relname text, IN blkno int8, IN blk_count int8,
OUT blkno int8,
OUT type "char",
OUT live_items int4,
OUT dead_items int4,
OUT avg_item_size int4,
OUT page_size int4,
OUT free_size int4,
OUT btpo_prev int8,
OUT btpo_next int8,
OUT btpo_level int8,
OUT btpo_flags int4)
RETURNS SETOF record
AS 'MODULE_PATHNAME', 'bt_multi_page_stats'
LANGUAGE C STRICT PARALLEL RESTRICTED;

View File

@ -1,5 +1,5 @@
# pageinspect extension
comment = 'inspect the contents of database pages at a low level'
default_version = '1.11'
default_version = '1.12'
module_pathname = '$libdir/pageinspect'
relocatable = true

View File

@ -11,6 +11,17 @@ SELECT * FROM bt_page_stats('test1_a_idx', 0);
SELECT * FROM bt_page_stats('test1_a_idx', 1);
SELECT * FROM bt_page_stats('test1_a_idx', 2);
-- bt_multi_page_stats() function returns a set of records of page statistics.
CREATE TABLE test2 AS (SELECT generate_series(1, 1000)::int8 AS col1);
CREATE INDEX test2_col1_idx ON test2(col1);
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 0, 1);
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, -1);
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 0);
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 2);
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 3, 2);
SELECT * FROM bt_multi_page_stats('test2_col1_idx', 7, 2);
DROP TABLE test2;
SELECT * FROM bt_page_items('test1_a_idx', -1);
SELECT * FROM bt_page_items('test1_a_idx', 0);
SELECT * FROM bt_page_items('test1_a_idx', 1);

View File

@ -326,7 +326,7 @@ allequalimage | f
<listitem>
<para>
<function>bt_page_stats</function> returns summary information about
single pages of B-tree indexes. For example:
a data page of a B-tree index. For example:
<screen>
test=# SELECT * FROM bt_page_stats('pg_cast_oid_index', 1);
-[ RECORD 1 ]-+-----
@ -346,6 +346,54 @@ btpo_flags | 3
</listitem>
</varlistentry>
<varlistentry>
<term>
<function>bt_multi_page_stats(relname text, blkno bigint, blk_count bigint) returns setof record</function>
<indexterm>
<primary>bt_multi_page_stats</primary>
</indexterm>
</term>
<listitem>
<para>
<function>bt_multi_page_stats</function> returns the same information
as <function>bt_page_stats</function>, but does so for each page of the
range of pages beginning at <parameter>blkno</parameter> and extending
for <parameter>blk_count</parameter> pages.
If <parameter>blk_count</parameter> is negative, all pages
from <parameter>blkno</parameter> to the end of the index are reported
on. For example:
<screen>
test=# SELECT * FROM bt_multi_page_stats('pg_proc_oid_index', 5, 2);
-[ RECORD 1 ]-+-----
blkno | 5
type | l
live_items | 367
dead_items | 0
avg_item_size | 16
page_size | 8192
free_size | 808
btpo_prev | 4
btpo_next | 6
btpo_level | 0
btpo_flags | 1
-[ RECORD 2 ]-+-----
blkno | 6
type | l
live_items | 367
dead_items | 0
avg_item_size | 16
page_size | 8192
free_size | 808
btpo_prev | 5
btpo_next | 7
btpo_level | 0
btpo_flags | 1
</screen>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<function>bt_page_items(relname text, blkno bigint) returns setof record</function>