/* * contrib/pgstattuple/pgstatindex.c * * * pgstatindex * * Copyright (c) 2006 Satoshi Nagayasu * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose, without fee, and without a * written agreement is hereby granted, provided that the above * copyright notice and this paragraph and the following two * paragraphs appear in all copies. * * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. */ #include "postgres.h" #include "access/gin_private.h" #include "access/hash.h" #include "access/htup_details.h" #include "access/nbtree.h" #include "access/relation.h" #include "access/table.h" #include "catalog/namespace.h" #include "catalog/pg_am.h" #include "funcapi.h" #include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/rel.h" #include "utils/varlena.h" /* * Because of backward-compatibility issue, we have decided to have * two types of interfaces, with regclass-type input arg and text-type * input arg, for each function. * * Those functions which have text-type input arg will be deprecated * in the future release. */ PG_FUNCTION_INFO_V1(pgstatindex); PG_FUNCTION_INFO_V1(pgstatindexbyid); PG_FUNCTION_INFO_V1(pg_relpages); PG_FUNCTION_INFO_V1(pg_relpagesbyid); PG_FUNCTION_INFO_V1(pgstatginindex); PG_FUNCTION_INFO_V1(pgstathashindex); PG_FUNCTION_INFO_V1(pgstatindex_v1_5); PG_FUNCTION_INFO_V1(pgstatindexbyid_v1_5); PG_FUNCTION_INFO_V1(pg_relpages_v1_5); PG_FUNCTION_INFO_V1(pg_relpagesbyid_v1_5); PG_FUNCTION_INFO_V1(pgstatginindex_v1_5); Datum pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo); #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX) #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) #define IS_GIN(r) ((r)->rd_rel->relam == GIN_AM_OID) #define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID) /* ------------------------------------------------ * A structure for a whole btree index statistics * used by pgstatindex(). * ------------------------------------------------ */ typedef struct BTIndexStat { uint32 version; uint32 level; BlockNumber root_blkno; uint64 internal_pages; uint64 leaf_pages; uint64 empty_pages; uint64 deleted_pages; uint64 max_avail; uint64 free_space; uint64 fragments; } BTIndexStat; /* ------------------------------------------------ * A structure for a whole GIN index statistics * used by pgstatginindex(). * ------------------------------------------------ */ typedef struct GinIndexStat { int32 version; BlockNumber pending_pages; int64 pending_tuples; } GinIndexStat; /* ------------------------------------------------ * A structure for a whole HASH index statistics * used by pgstathashindex(). * ------------------------------------------------ */ typedef struct HashIndexStat { int32 version; int32 space_per_page; BlockNumber bucket_pages; BlockNumber overflow_pages; BlockNumber bitmap_pages; BlockNumber unused_pages; int64 live_items; int64 dead_items; uint64 free_space; } HashIndexStat; static Datum pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo); static int64 pg_relpages_impl(Relation rel); static void GetHashPageStats(Page page, HashIndexStat *stats); /* ------------------------------------------------------ * pgstatindex() * * Usage: SELECT * FROM pgstatindex('t1_pkey'); * * The superuser() check here must be kept as the library might be upgraded * without the extension being upgraded, meaning that in pre-1.5 installations * these functions could be called by any user. * ------------------------------------------------------ */ Datum pgstatindex(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_PP(0); Relation rel; RangeVar *relrv; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use pgstattuple functions"))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo)); } /* * As of pgstattuple version 1.5, we no longer need to check if the user * is a superuser because we REVOKE EXECUTE on the function from PUBLIC. * Users can then grant access to it based on their policies. * * Otherwise identical to pgstatindex (above). */ Datum pgstatindex_v1_5(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_PP(0); Relation rel; RangeVar *relrv; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo)); } /* * The superuser() check here must be kept as the library might be upgraded * without the extension being upgraded, meaning that in pre-1.5 installations * these functions could be called by any user. */ Datum pgstatindexbyid(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Relation rel; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use pgstattuple functions"))); rel = relation_open(relid, AccessShareLock); PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo)); } /* No need for superuser checks in v1.5, see above */ Datum pgstatindexbyid_v1_5(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Relation rel; rel = relation_open(relid, AccessShareLock); PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo)); } static Datum pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo) { Datum result; BlockNumber nblocks; BlockNumber blkno; BTIndexStat indexStat; BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); if (!IS_INDEX(rel) || !IS_BTREE(rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("relation \"%s\" is not a btree index", RelationGetRelationName(rel)))); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); /* * A !indisready index could lead to ERRCODE_DATA_CORRUPTED later, so exit * early. We're capable of assessing an indisready&&!indisvalid index, * but the results could be confusing. For example, the index's size * could be too low for a valid index of the table. */ if (!rel->rd_index->indisvalid) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("index \"%s\" is not valid", RelationGetRelationName(rel)))); /* * Read metapage */ { Buffer buffer = ReadBufferExtended(rel, MAIN_FORKNUM, 0, RBM_NORMAL, bstrategy); Page page = BufferGetPage(buffer); BTMetaPageData *metad = BTPageGetMeta(page); indexStat.version = metad->btm_version; indexStat.level = metad->btm_level; indexStat.root_blkno = metad->btm_root; ReleaseBuffer(buffer); } /* -- init counters -- */ indexStat.internal_pages = 0; indexStat.leaf_pages = 0; indexStat.empty_pages = 0; indexStat.deleted_pages = 0; indexStat.max_avail = 0; indexStat.free_space = 0; indexStat.fragments = 0; /* * Scan all blocks except the metapage */ nblocks = RelationGetNumberOfBlocks(rel); for (blkno = 1; blkno < nblocks; blkno++) { Buffer buffer; Page page; BTPageOpaque opaque; CHECK_FOR_INTERRUPTS(); /* Read and lock buffer */ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); opaque = BTPageGetOpaque(page); /* * Determine page type, and update totals. * * Note that we arbitrarily bucket deleted pages together without * considering if they're leaf pages or internal pages. */ if (P_ISDELETED(opaque)) indexStat.deleted_pages++; else if (P_IGNORE(opaque)) indexStat.empty_pages++; /* this is the "half dead" state */ else if (P_ISLEAF(opaque)) { int max_avail; max_avail = BLCKSZ - (BLCKSZ - ((PageHeader) page)->pd_special + SizeOfPageHeaderData); indexStat.max_avail += max_avail; indexStat.free_space += PageGetFreeSpace(page); indexStat.leaf_pages++; /* * If the next leaf is on an earlier block, it means a * fragmentation. */ if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno) indexStat.fragments++; } else indexStat.internal_pages++; /* Unlock and release buffer */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); } relation_close(rel, AccessShareLock); /*---------------------------- * Build a result tuple *---------------------------- */ { TupleDesc tupleDesc; int j; char *values[10]; HeapTuple tuple; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); j = 0; values[j++] = psprintf("%d", indexStat.version); values[j++] = psprintf("%d", indexStat.level); values[j++] = psprintf(INT64_FORMAT, (1 + /* include the metapage in index_size */ indexStat.leaf_pages + indexStat.internal_pages + indexStat.deleted_pages + indexStat.empty_pages) * BLCKSZ); values[j++] = psprintf("%u", indexStat.root_blkno); values[j++] = psprintf(INT64_FORMAT, indexStat.internal_pages); values[j++] = psprintf(INT64_FORMAT, indexStat.leaf_pages); values[j++] = psprintf(INT64_FORMAT, indexStat.empty_pages); values[j++] = psprintf(INT64_FORMAT, indexStat.deleted_pages); if (indexStat.max_avail > 0) values[j++] = psprintf("%.2f", 100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0); else values[j++] = pstrdup("NaN"); if (indexStat.leaf_pages > 0) values[j++] = psprintf("%.2f", (double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0); else values[j++] = pstrdup("NaN"); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = HeapTupleGetDatum(tuple); } return result; } /* -------------------------------------------------------- * pg_relpages() * * Get the number of pages of the table/index. * * Usage: SELECT pg_relpages('t1'); * SELECT pg_relpages('t1_pkey'); * * Must keep superuser() check, see above. * -------------------------------------------------------- */ Datum pg_relpages(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_PP(0); Relation rel; RangeVar *relrv; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use pgstattuple functions"))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); PG_RETURN_INT64(pg_relpages_impl(rel)); } /* No need for superuser checks in v1.5, see above */ Datum pg_relpages_v1_5(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_PP(0); Relation rel; RangeVar *relrv; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); PG_RETURN_INT64(pg_relpages_impl(rel)); } /* Must keep superuser() check, see above. */ Datum pg_relpagesbyid(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Relation rel; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use pgstattuple functions"))); rel = relation_open(relid, AccessShareLock); PG_RETURN_INT64(pg_relpages_impl(rel)); } /* No need for superuser checks in v1.5, see above */ Datum pg_relpagesbyid_v1_5(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Relation rel; rel = relation_open(relid, AccessShareLock); PG_RETURN_INT64(pg_relpages_impl(rel)); } static int64 pg_relpages_impl(Relation rel) { int64 relpages; if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get page count of relation \"%s\"", RelationGetRelationName(rel)), errdetail_relkind_not_supported(rel->rd_rel->relkind))); /* note: this will work OK on non-local temp tables */ relpages = RelationGetNumberOfBlocks(rel); relation_close(rel, AccessShareLock); return relpages; } /* ------------------------------------------------------ * pgstatginindex() * * Usage: SELECT * FROM pgstatginindex('ginindex'); * * Must keep superuser() check, see above. * ------------------------------------------------------ */ Datum pgstatginindex(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use pgstattuple functions"))); PG_RETURN_DATUM(pgstatginindex_internal(relid, fcinfo)); } /* No need for superuser checks in v1.5, see above */ Datum pgstatginindex_v1_5(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); PG_RETURN_DATUM(pgstatginindex_internal(relid, fcinfo)); } Datum pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo) { Relation rel; Buffer buffer; Page page; GinMetaPageData *metadata; GinIndexStat stats; HeapTuple tuple; TupleDesc tupleDesc; Datum values[3]; bool nulls[3] = {false, false, false}; Datum result; rel = relation_open(relid, AccessShareLock); if (!IS_INDEX(rel) || !IS_GIN(rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("relation \"%s\" is not a GIN index", RelationGetRelationName(rel)))); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary indexes of other sessions"))); /* see pgstatindex_impl */ if (!rel->rd_index->indisvalid) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("index \"%s\" is not valid", RelationGetRelationName(rel)))); /* * Read metapage */ buffer = ReadBuffer(rel, GIN_METAPAGE_BLKNO); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); metadata = GinPageGetMeta(page); stats.version = metadata->ginVersion; stats.pending_pages = metadata->nPendingPages; stats.pending_tuples = metadata->nPendingHeapTuples; UnlockReleaseBuffer(buffer); relation_close(rel, AccessShareLock); /* * Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); values[0] = Int32GetDatum(stats.version); values[1] = UInt32GetDatum(stats.pending_pages); values[2] = Int64GetDatum(stats.pending_tuples); /* * Build and return the tuple */ tuple = heap_form_tuple(tupleDesc, values, nulls); result = HeapTupleGetDatum(tuple); return result; } /* ------------------------------------------------------ * pgstathashindex() * * Usage: SELECT * FROM pgstathashindex('hashindex'); * ------------------------------------------------------ */ Datum pgstathashindex(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); BlockNumber nblocks; BlockNumber blkno; Relation rel; HashIndexStat stats; BufferAccessStrategy bstrategy; HeapTuple tuple; TupleDesc tupleDesc; Datum values[8]; bool nulls[8] = {0}; Buffer metabuf; HashMetaPage metap; float8 free_percent; uint64 total_space; rel = relation_open(relid, AccessShareLock); if (!IS_INDEX(rel) || !IS_HASH(rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("relation \"%s\" is not a hash index", RelationGetRelationName(rel)))); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary indexes of other sessions"))); /* see pgstatindex_impl */ if (!rel->rd_index->indisvalid) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("index \"%s\" is not valid", RelationGetRelationName(rel)))); /* Get the information we need from the metapage. */ memset(&stats, 0, sizeof(stats)); metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); stats.version = metap->hashm_version; stats.space_per_page = metap->hashm_bsize; _hash_relbuf(rel, metabuf); /* Get the current relation length */ nblocks = RelationGetNumberOfBlocks(rel); /* prepare access strategy for this index */ bstrategy = GetAccessStrategy(BAS_BULKREAD); /* Start from blkno 1 as 0th block is metapage */ for (blkno = 1; blkno < nblocks; blkno++) { Buffer buf; Page page; CHECK_FOR_INTERRUPTS(); buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buf, BUFFER_LOCK_SHARE); page = (Page) BufferGetPage(buf); if (PageIsNew(page)) stats.unused_pages++; else if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData))) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("index \"%s\" contains corrupted page at block %u", RelationGetRelationName(rel), BufferGetBlockNumber(buf)))); else { HashPageOpaque opaque; int pagetype; opaque = HashPageGetOpaque(page); pagetype = opaque->hasho_flag & LH_PAGE_TYPE; if (pagetype == LH_BUCKET_PAGE) { stats.bucket_pages++; GetHashPageStats(page, &stats); } else if (pagetype == LH_OVERFLOW_PAGE) { stats.overflow_pages++; GetHashPageStats(page, &stats); } else if (pagetype == LH_BITMAP_PAGE) stats.bitmap_pages++; else if (pagetype == LH_UNUSED_PAGE) stats.unused_pages++; else ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("unexpected page type 0x%04X in HASH index \"%s\" block %u", opaque->hasho_flag, RelationGetRelationName(rel), BufferGetBlockNumber(buf)))); } UnlockReleaseBuffer(buf); } /* Done accessing the index */ index_close(rel, AccessShareLock); /* Count unused pages as free space. */ stats.free_space += (uint64) stats.unused_pages * stats.space_per_page; /* * Total space available for tuples excludes the metapage and the bitmap * pages. */ total_space = (uint64) (nblocks - (stats.bitmap_pages + 1)) * stats.space_per_page; if (total_space == 0) free_percent = 0.0; else free_percent = 100.0 * stats.free_space / total_space; /* * Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); tupleDesc = BlessTupleDesc(tupleDesc); /* * Build and return the tuple */ values[0] = Int32GetDatum(stats.version); values[1] = Int64GetDatum((int64) stats.bucket_pages); values[2] = Int64GetDatum((int64) stats.overflow_pages); values[3] = Int64GetDatum((int64) stats.bitmap_pages); values[4] = Int64GetDatum((int64) stats.unused_pages); values[5] = Int64GetDatum(stats.live_items); values[6] = Int64GetDatum(stats.dead_items); values[7] = Float8GetDatum(free_percent); tuple = heap_form_tuple(tupleDesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); } /* ------------------------------------------------- * GetHashPageStats() * * Collect statistics of single hash page * ------------------------------------------------- */ static void GetHashPageStats(Page page, HashIndexStat *stats) { OffsetNumber maxoff = PageGetMaxOffsetNumber(page); int off; /* count live and dead tuples, and free space */ for (off = FirstOffsetNumber; off <= maxoff; off++) { ItemId id = PageGetItemId(page, off); if (!ItemIdIsDead(id)) stats->live_items++; else stats->dead_items++; } stats->free_space += PageGetExactFreeSpace(page); }