diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index ad5a3ac511..95e030b396 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -13,7 +13,7 @@ OBJS = \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.10--1.11.sql \ +DATA = pageinspect--1.11--1.12.sql pageinspect--1.10--1.11.sql \ pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \ pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \ diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c index 9375d55e14..e4e5dc3c81 100644 --- a/contrib/pageinspect/btreefuncs.c +++ b/contrib/pageinspect/btreefuncs.c @@ -46,17 +46,13 @@ PG_FUNCTION_INFO_V1(bt_page_items); PG_FUNCTION_INFO_V1(bt_page_items_bytea); PG_FUNCTION_INFO_V1(bt_page_stats_1_9); PG_FUNCTION_INFO_V1(bt_page_stats); +PG_FUNCTION_INFO_V1(bt_multi_page_stats); #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX) #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) #define DatumGetItemPointer(X) ((ItemPointer) DatumGetPointer(X)) #define ItemPointerGetDatum(X) PointerGetDatum(X) -/* note: BlockNumber is unsigned, hence can't be negative */ -#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \ - if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \ - elog(ERROR, "block number out of range"); } - /* ------------------------------------------------ * structure for single btree page statistics * ------------------------------------------------ @@ -80,6 +76,29 @@ typedef struct BTPageStat BTCycleId btpo_cycleid; } BTPageStat; +/* + * cross-call data structure for SRF for page stats + */ +typedef struct ua_page_stats +{ + Oid relid; + int64 blkno; + int64 blk_count; + bool allpages; +} ua_page_stats; + +/* + * cross-call data structure for SRF for page items + */ +typedef struct ua_page_items +{ + Page page; + OffsetNumber offset; + bool leafpage; + bool rightmost; + TupleDesc tupd; +} ua_page_items; + /* ------------------------------------------------- * GetBTPageStatistics() @@ -176,10 +195,68 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) stat->avg_item_size = 0; } +/* ----------------------------------------------- + * check_relation_block_range() + * + * Verify that a block number (given as int64) is valid for the relation. + * ----------------------------------------------- + */ +static void +check_relation_block_range(Relation rel, int64 blkno) +{ + /* Ensure we can cast to BlockNumber */ + if (blkno < 0 || blkno > MaxBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number %lld", + (long long) blkno))); + + if ((BlockNumber) (blkno) >= RelationGetNumberOfBlocks(rel)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("block number %lld is out of range", + (long long) blkno))); +} + +/* ----------------------------------------------- + * bt_index_block_validate() + * + * Validate index type is btree and block number + * is valid (and not the metapage). + * ----------------------------------------------- + */ +static void +bt_index_block_validate(Relation rel, int64 blkno) +{ + if (!IS_INDEX(rel) || !IS_BTREE(rel)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a %s index", + RelationGetRelationName(rel), "btree"))); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + if (blkno == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("block 0 is a meta page"))); + + check_relation_block_range(rel, blkno); +} + /* ----------------------------------------------- * bt_page_stats() * * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1); + * Arguments are index relation name and block number * ----------------------------------------------- */ static Datum @@ -205,33 +282,7 @@ bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); - if (!IS_INDEX(rel) || !IS_BTREE(rel)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is not a %s index", - RelationGetRelationName(rel), "btree"))); - - /* - * Reject attempts to read non-local temporary relations; we would be - * likely to get wrong data since we have no visibility into the owning - * session's local buffers. - */ - if (RELATION_IS_OTHER_TEMP(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot access temporary tables of other sessions"))); - - if (blkno < 0 || blkno > MaxBlockNumber) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid block number"))); - - if (blkno == 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("block 0 is a meta page"))); - - CHECK_RELATION_BLOCK_RANGE(rel, blkno); + bt_index_block_validate(rel, blkno); buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); @@ -284,17 +335,144 @@ bt_page_stats(PG_FUNCTION_ARGS) } -/* - * cross-call data structure for SRF +/* ----------------------------------------------- + * bt_multi_page_stats() + * + * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1, 2); + * Arguments are index relation name, first block number, number of blocks + * (but number of blocks can be negative to mean "read all the rest") + * ----------------------------------------------- */ -struct user_args +Datum +bt_multi_page_stats(PG_FUNCTION_ARGS) { - Page page; - OffsetNumber offset; - bool leafpage; - bool rightmost; - TupleDesc tupd; -}; + Relation rel; + ua_page_stats *uargs; + FuncCallContext *fctx; + MemoryContext mctx; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pageinspect functions"))); + + if (SRF_IS_FIRSTCALL()) + { + text *relname = PG_GETARG_TEXT_PP(0); + int64 blkno = PG_GETARG_INT64(1); + int64 blk_count = PG_GETARG_INT64(2); + RangeVar *relrv; + + fctx = SRF_FIRSTCALL_INIT(); + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + /* Check that rel is a valid btree index and 1st block number is OK */ + bt_index_block_validate(rel, blkno); + + /* + * Check if upper bound of the specified range is valid. If only one + * page is requested, skip as we've already validated the page. (Also, + * it's important to skip this if blk_count is negative.) + */ + if (blk_count > 1) + check_relation_block_range(rel, blkno + blk_count - 1); + + /* Save arguments for reuse */ + mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + uargs = palloc(sizeof(ua_page_stats)); + + uargs->relid = RelationGetRelid(rel); + uargs->blkno = blkno; + uargs->blk_count = blk_count; + uargs->allpages = (blk_count < 0); + + fctx->user_fctx = uargs; + + MemoryContextSwitchTo(mctx); + + /* + * To avoid possibly leaking a relcache reference if the SRF isn't run + * to completion, we close and re-open the index rel each time + * through, using the index's OID for re-opens to ensure we get the + * same rel. Keep the AccessShareLock though, to ensure it doesn't go + * away underneath us. + */ + relation_close(rel, NoLock); + } + + fctx = SRF_PERCALL_SETUP(); + uargs = fctx->user_fctx; + + /* We should have lock already */ + rel = relation_open(uargs->relid, NoLock); + + /* In all-pages mode, recheck the index length each time */ + if (uargs->allpages) + uargs->blk_count = RelationGetNumberOfBlocks(rel) - uargs->blkno; + + if (uargs->blk_count > 0) + { + /* We need to fetch next block statistics */ + Buffer buffer; + Datum result; + HeapTuple tuple; + int j; + char *values[11]; + BTPageStat stat; + TupleDesc tupleDesc; + + buffer = ReadBuffer(rel, uargs->blkno); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + /* keep compiler quiet */ + stat.btpo_prev = stat.btpo_next = InvalidBlockNumber; + stat.btpo_flags = stat.free_size = stat.avg_item_size = 0; + + GetBTPageStatistics(uargs->blkno, buffer, &stat); + + UnlockReleaseBuffer(buffer); + relation_close(rel, NoLock); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + j = 0; + values[j++] = psprintf("%u", stat.blkno); + values[j++] = psprintf("%c", stat.type); + values[j++] = psprintf("%u", stat.live_items); + values[j++] = psprintf("%u", stat.dead_items); + values[j++] = psprintf("%u", stat.avg_item_size); + values[j++] = psprintf("%u", stat.page_size); + values[j++] = psprintf("%u", stat.free_size); + values[j++] = psprintf("%u", stat.btpo_prev); + values[j++] = psprintf("%u", stat.btpo_next); + values[j++] = psprintf("%u", stat.btpo_level); + values[j++] = psprintf("%d", stat.btpo_flags); + + /* Construct tuple to be returned */ + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), + values); + + result = HeapTupleGetDatum(tuple); + + /* + * Move to the next block number and decrement the number of blocks + * still to be fetched + */ + uargs->blkno++; + uargs->blk_count--; + + SRF_RETURN_NEXT(fctx, result); + } + + /* Done, so finally we can release the index lock */ + relation_close(rel, AccessShareLock); + SRF_RETURN_DONE(fctx); +} /*------------------------------------------------------- * bt_page_print_tuples() @@ -303,7 +481,7 @@ struct user_args * ------------------------------------------------------ */ static Datum -bt_page_print_tuples(struct user_args *uargs) +bt_page_print_tuples(ua_page_items *uargs) { Page page = uargs->page; OffsetNumber offset = uargs->offset; @@ -453,7 +631,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) Datum result; FuncCallContext *fctx; MemoryContext mctx; - struct user_args *uargs; + ua_page_items *uargs; if (!superuser()) ereport(ERROR, @@ -473,33 +651,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); - if (!IS_INDEX(rel) || !IS_BTREE(rel)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is not a %s index", - RelationGetRelationName(rel), "btree"))); - - /* - * Reject attempts to read non-local temporary relations; we would be - * likely to get wrong data since we have no visibility into the - * owning session's local buffers. - */ - if (RELATION_IS_OTHER_TEMP(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot access temporary tables of other sessions"))); - - if (blkno < 0 || blkno > MaxBlockNumber) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid block number"))); - - if (blkno == 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("block 0 is a meta page"))); - - CHECK_RELATION_BLOCK_RANGE(rel, blkno); + bt_index_block_validate(rel, blkno); buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); @@ -511,7 +663,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) */ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); - uargs = palloc(sizeof(struct user_args)); + uargs = palloc(sizeof(ua_page_items)); uargs->page = palloc(BLCKSZ); memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ); @@ -587,7 +739,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS) bytea *raw_page = PG_GETARG_BYTEA_P(0); Datum result; FuncCallContext *fctx; - struct user_args *uargs; + ua_page_items *uargs; if (!superuser()) ereport(ERROR, @@ -603,7 +755,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS) fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); - uargs = palloc(sizeof(struct user_args)); + uargs = palloc(sizeof(ua_page_items)); uargs->page = get_page_from_raw(raw_page); diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out index 035a81a759..0aa5d73322 100644 --- a/contrib/pageinspect/expected/btree.out +++ b/contrib/pageinspect/expected/btree.out @@ -15,7 +15,7 @@ last_cleanup_num_tuples | -1 allequalimage | t SELECT * FROM bt_page_stats('test1_a_idx', -1); -ERROR: invalid block number +ERROR: invalid block number -1 SELECT * FROM bt_page_stats('test1_a_idx', 0); ERROR: block 0 is a meta page SELECT * FROM bt_page_stats('test1_a_idx', 1); @@ -33,9 +33,122 @@ btpo_level | 0 btpo_flags | 3 SELECT * FROM bt_page_stats('test1_a_idx', 2); -ERROR: block number out of range +ERROR: block number 2 is out of range +-- bt_multi_page_stats() function returns a set of records of page statistics. +CREATE TABLE test2 AS (SELECT generate_series(1, 1000)::int8 AS col1); +CREATE INDEX test2_col1_idx ON test2(col1); +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 0, 1); +ERROR: block 0 is a meta page +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, -1); +-[ RECORD 1 ]-+----- +blkno | 1 +type | l +live_items | 367 +dead_items | 0 +avg_item_size | 16 +page_size | 8192 +free_size | 808 +btpo_prev | 0 +btpo_next | 2 +btpo_level | 0 +btpo_flags | 1 +-[ RECORD 2 ]-+----- +blkno | 2 +type | l +live_items | 367 +dead_items | 0 +avg_item_size | 16 +page_size | 8192 +free_size | 808 +btpo_prev | 1 +btpo_next | 4 +btpo_level | 0 +btpo_flags | 1 +-[ RECORD 3 ]-+----- +blkno | 3 +type | r +live_items | 3 +dead_items | 0 +avg_item_size | 13 +page_size | 8192 +free_size | 8096 +btpo_prev | 0 +btpo_next | 0 +btpo_level | 1 +btpo_flags | 2 +-[ RECORD 4 ]-+----- +blkno | 4 +type | l +live_items | 268 +dead_items | 0 +avg_item_size | 16 +page_size | 8192 +free_size | 2788 +btpo_prev | 2 +btpo_next | 0 +btpo_level | 0 +btpo_flags | 1 + +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 0); +(0 rows) + +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 2); +-[ RECORD 1 ]-+----- +blkno | 1 +type | l +live_items | 367 +dead_items | 0 +avg_item_size | 16 +page_size | 8192 +free_size | 808 +btpo_prev | 0 +btpo_next | 2 +btpo_level | 0 +btpo_flags | 1 +-[ RECORD 2 ]-+----- +blkno | 2 +type | l +live_items | 367 +dead_items | 0 +avg_item_size | 16 +page_size | 8192 +free_size | 808 +btpo_prev | 1 +btpo_next | 4 +btpo_level | 0 +btpo_flags | 1 + +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 3, 2); +-[ RECORD 1 ]-+----- +blkno | 3 +type | r +live_items | 3 +dead_items | 0 +avg_item_size | 13 +page_size | 8192 +free_size | 8096 +btpo_prev | 0 +btpo_next | 0 +btpo_level | 1 +btpo_flags | 2 +-[ RECORD 2 ]-+----- +blkno | 4 +type | l +live_items | 268 +dead_items | 0 +avg_item_size | 16 +page_size | 8192 +free_size | 2788 +btpo_prev | 2 +btpo_next | 0 +btpo_level | 0 +btpo_flags | 1 + +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 7, 2); +ERROR: block number 7 is out of range +DROP TABLE test2; SELECT * FROM bt_page_items('test1_a_idx', -1); -ERROR: invalid block number +ERROR: invalid block number -1 SELECT * FROM bt_page_items('test1_a_idx', 0); ERROR: block 0 is a meta page SELECT * FROM bt_page_items('test1_a_idx', 1); @@ -51,7 +164,7 @@ htid | (0,1) tids | SELECT * FROM bt_page_items('test1_a_idx', 2); -ERROR: block number out of range +ERROR: block number 2 is out of range SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', -1)); ERROR: invalid block number SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 0)); diff --git a/contrib/pageinspect/meson.build b/contrib/pageinspect/meson.build index bc8b35b2a7..8e1897a0ea 100644 --- a/contrib/pageinspect/meson.build +++ b/contrib/pageinspect/meson.build @@ -36,6 +36,7 @@ install_data( 'pageinspect--1.8--1.9.sql', 'pageinspect--1.9--1.10.sql', 'pageinspect--1.10--1.11.sql', + 'pageinspect--1.11--1.12.sql', 'pageinspect.control', kwargs: contrib_data_args, ) diff --git a/contrib/pageinspect/pageinspect--1.11--1.12.sql b/contrib/pageinspect/pageinspect--1.11--1.12.sql new file mode 100644 index 0000000000..70c3abccf5 --- /dev/null +++ b/contrib/pageinspect/pageinspect--1.11--1.12.sql @@ -0,0 +1,23 @@ +/* contrib/pageinspect/pageinspect--1.11--1.12.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.12'" to load this file. \quit + +-- +-- bt_multi_page_stats() +-- +CREATE FUNCTION bt_multi_page_stats(IN relname text, IN blkno int8, IN blk_count int8, + OUT blkno int8, + OUT type "char", + OUT live_items int4, + OUT dead_items int4, + OUT avg_item_size int4, + OUT page_size int4, + OUT free_size int4, + OUT btpo_prev int8, + OUT btpo_next int8, + OUT btpo_level int8, + OUT btpo_flags int4) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'bt_multi_page_stats' +LANGUAGE C STRICT PARALLEL RESTRICTED; diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control index f277413dd8..b2804e9b12 100644 --- a/contrib/pageinspect/pageinspect.control +++ b/contrib/pageinspect/pageinspect.control @@ -1,5 +1,5 @@ # pageinspect extension comment = 'inspect the contents of database pages at a low level' -default_version = '1.11' +default_version = '1.12' module_pathname = '$libdir/pageinspect' relocatable = true diff --git a/contrib/pageinspect/sql/btree.sql b/contrib/pageinspect/sql/btree.sql index 1f554f0f67..102ebdefe3 100644 --- a/contrib/pageinspect/sql/btree.sql +++ b/contrib/pageinspect/sql/btree.sql @@ -11,6 +11,17 @@ SELECT * FROM bt_page_stats('test1_a_idx', 0); SELECT * FROM bt_page_stats('test1_a_idx', 1); SELECT * FROM bt_page_stats('test1_a_idx', 2); +-- bt_multi_page_stats() function returns a set of records of page statistics. +CREATE TABLE test2 AS (SELECT generate_series(1, 1000)::int8 AS col1); +CREATE INDEX test2_col1_idx ON test2(col1); +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 0, 1); +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, -1); +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 0); +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 2); +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 3, 2); +SELECT * FROM bt_multi_page_stats('test2_col1_idx', 7, 2); +DROP TABLE test2; + SELECT * FROM bt_page_items('test1_a_idx', -1); SELECT * FROM bt_page_items('test1_a_idx', 0); SELECT * FROM bt_page_items('test1_a_idx', 1); diff --git a/doc/src/sgml/pageinspect.sgml b/doc/src/sgml/pageinspect.sgml index d4ee34ee0f..04aca998e7 100644 --- a/doc/src/sgml/pageinspect.sgml +++ b/doc/src/sgml/pageinspect.sgml @@ -326,7 +326,7 @@ allequalimage | f bt_page_stats returns summary information about - single pages of B-tree indexes. For example: + a data page of a B-tree index. For example: test=# SELECT * FROM bt_page_stats('pg_cast_oid_index', 1); -[ RECORD 1 ]-+----- @@ -346,6 +346,54 @@ btpo_flags | 3 + + + bt_multi_page_stats(relname text, blkno bigint, blk_count bigint) returns setof record + + bt_multi_page_stats + + + + + + bt_multi_page_stats returns the same information + as bt_page_stats, but does so for each page of the + range of pages beginning at blkno and extending + for blk_count pages. + If blk_count is negative, all pages + from blkno to the end of the index are reported + on. For example: + +test=# SELECT * FROM bt_multi_page_stats('pg_proc_oid_index', 5, 2); +-[ RECORD 1 ]-+----- +blkno | 5 +type | l +live_items | 367 +dead_items | 0 +avg_item_size | 16 +page_size | 8192 +free_size | 808 +btpo_prev | 4 +btpo_next | 6 +btpo_level | 0 +btpo_flags | 1 +-[ RECORD 2 ]-+----- +blkno | 6 +type | l +live_items | 367 +dead_items | 0 +avg_item_size | 16 +page_size | 8192 +free_size | 808 +btpo_prev | 5 +btpo_next | 7 +btpo_level | 0 +btpo_flags | 1 + + + + + bt_page_items(relname text, blkno bigint) returns setof record