/* * btreefuncs.c * * Copyright (c) 2006 Satoshi Nagayasu * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose, without fee, and without a * written agreement is hereby granted, provided that the above * copyright notice and this paragraph and the following two * paragraphs appear in all copies. * * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. */ #include "postgres.h" #include "fmgr.h" #include "funcapi.h" #include "access/heapam.h" #include "access/itup.h" #include "access/nbtree.h" #include "access/transam.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" #include "utils/builtins.h" #include "utils/inval.h" PG_FUNCTION_INFO_V1(bt_metap); PG_FUNCTION_INFO_V1(bt_page_items); PG_FUNCTION_INFO_V1(bt_page_stats); extern Datum bt_metap(PG_FUNCTION_ARGS); extern Datum bt_page_items(PG_FUNCTION_ARGS); extern Datum bt_page_stats(PG_FUNCTION_ARGS); #define BTMETAP_TYPE "public.bt_metap_type" #define BTMETAP_NCOLUMNS 6 #define BTPAGEITEMS_TYPE "public.bt_page_items_type" #define BTPAGEITEMS_NCOLUMNS 6 #define BTPAGESTATS_TYPE "public.bt_page_stats_type" #define BTPAGESTATS_NCOLUMNS 11 #define IS_INDEX(r) ((r)->rd_rel->relkind == 'i') #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) #define CHECK_PAGE_OFFSET_RANGE(page, offset) { \ if ( !(FirstOffsetNumber<=(offset) && \ (offset)<=PageGetMaxOffsetNumber(page)) ) \ elog(ERROR, "Page offset number out of range."); } #define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \ if ( (blkno)<0 && RelationGetNumberOfBlocks((rel))<=(blkno) ) \ elog(ERROR, "Block number out of range."); } /* ------------------------------------------------ * structure for single btree page statistics * ------------------------------------------------ */ typedef struct BTPageStat { uint32 blkno; uint32 live_items; uint32 dead_items; uint32 page_size; uint32 max_avail; uint32 free_size; uint32 avg_item_size; char type; /* opaque data */ BlockNumber btpo_prev; BlockNumber btpo_next; union { uint32 level; TransactionId xact; } btpo; uint16 btpo_flags; BTCycleId btpo_cycleid; } BTPageStat; /* ------------------------------------------------ * A structure for a whole btree index statistics * used by pgstatindex(). * ------------------------------------------------ */ typedef struct BTIndexStat { uint32 magic; uint32 version; BlockNumber root_blkno; uint32 level; BlockNumber fastroot; uint32 fastlevel; uint32 live_items; uint32 dead_items; uint32 root_pages; uint32 internal_pages; uint32 leaf_pages; uint32 empty_pages; uint32 deleted_pages; uint32 page_size; uint32 avg_item_size; uint32 max_avail; uint32 free_space; } BTIndexStat; /* ------------------------------------------------- * GetBTPageStatistics() * * Collect statistics of single b-tree leaf page * ------------------------------------------------- */ static void GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat * stat) { Page page = BufferGetPage(buffer); PageHeader phdr = (PageHeader) page; OffsetNumber maxoff = PageGetMaxOffsetNumber(page); BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); int item_size = 0; int off; stat->blkno = blkno; stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData); stat->dead_items = stat->live_items = 0; stat->page_size = PageGetPageSize(page); /* page type (flags) */ if (P_ISDELETED(opaque)) { stat->type = 'd'; stat->btpo.xact = opaque->btpo.xact; return; } else if (P_IGNORE(opaque)) stat->type = 'e'; else if (P_ISLEAF(opaque)) stat->type = 'l'; else if (P_ISROOT(opaque)) stat->type = 'r'; else stat->type = 'i'; /* btpage opaque data */ stat->btpo_prev = opaque->btpo_prev; stat->btpo_next = opaque->btpo_next; stat->btpo.level = opaque->btpo.level; stat->btpo_flags = opaque->btpo_flags; stat->btpo_cycleid = opaque->btpo_cycleid; /* count live and dead tuples, and free space */ for (off = FirstOffsetNumber; off <= maxoff; off++) { IndexTuple itup; ItemId id = PageGetItemId(page, off); itup = (IndexTuple) PageGetItem(page, id); item_size += IndexTupleSize(itup); if (!ItemIdDeleted(id)) stat->live_items++; else stat->dead_items++; } stat->free_size = PageGetFreeSpace(page); if ((stat->live_items + stat->dead_items) > 0) stat->avg_item_size = item_size / (stat->live_items + stat->dead_items); else stat->avg_item_size = 0; } /* ----------------------------------------------- * bt_page() * * Usage: SELECT * FROM bt_page('t1_pkey', 0); * ----------------------------------------------- */ Datum bt_page_stats(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); uint32 blkno = PG_GETARG_UINT32(1); Buffer buffer; Relation rel; RangeVar *relrv; Datum result; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); CHECK_RELATION_BLOCK_RANGE(rel, blkno); buffer = ReadBuffer(rel, blkno); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "bt_page_stats() can be used only on b-tree index."); if (blkno == 0) elog(ERROR, "Block 0 is a meta page."); { HeapTuple tuple; TupleDesc tupleDesc; int j; char *values[BTPAGESTATS_NCOLUMNS]; BTPageStat stat; /* keep compiler quiet */ stat.btpo_prev = stat.btpo_next = InvalidBlockNumber; stat.btpo_flags = stat.free_size = stat.avg_item_size = 0; GetBTPageStatistics(blkno, buffer, &stat); tupleDesc = RelationNameGetTupleDesc(BTPAGESTATS_TYPE); j = 0; values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.blkno); values[j] = palloc(32); snprintf(values[j++], 32, "%c", stat.type); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.live_items); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.dead_items); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.avg_item_size); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.page_size); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.free_size); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.btpo_prev); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.btpo_next); values[j] = palloc(32); if (stat.type == 'd') snprintf(values[j++], 32, "%d", stat.btpo.xact); else snprintf(values[j++], 32, "%d", stat.btpo.level); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.btpo_flags); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); } ReleaseBuffer(buffer); relation_close(rel, AccessShareLock); PG_RETURN_DATUM(result); } /*------------------------------------------------------- * bt_page_items() * * Get IndexTupleData set in a leaf page * * Usage: SELECT * FROM bt_page_items('t1_pkey', 0); *------------------------------------------------------- */ /* --------------------------------------------------- * data structure for SRF to hold a scan information * --------------------------------------------------- */ struct user_args { TupleDesc tupd; Relation rel; Buffer buffer; Page page; uint16 offset; }; Datum bt_page_items(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); uint32 blkno = PG_GETARG_UINT32(1); RangeVar *relrv; Datum result; char *values[BTPAGEITEMS_NCOLUMNS]; BTPageOpaque opaque; HeapTuple tuple; ItemId id; FuncCallContext *fctx; MemoryContext mctx; struct user_args *uargs = NULL; if (blkno == 0) elog(ERROR, "Block 0 is a meta page."); if (SRF_IS_FIRSTCALL()) { fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); uargs = palloc(sizeof(struct user_args)); uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE); uargs->offset = FirstOffsetNumber; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); uargs->rel = relation_openrv(relrv, AccessShareLock); CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno); uargs->buffer = ReadBuffer(uargs->rel, blkno); if (!IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel)) elog(ERROR, "bt_page_items() can be used only on b-tree index."); uargs->page = BufferGetPage(uargs->buffer); opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); if (P_ISDELETED(opaque)) elog(NOTICE, "bt_page_items(): this page is deleted."); fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); fctx->user_fctx = uargs; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); uargs = fctx->user_fctx; if (fctx->call_cntr < fctx->max_calls) { IndexTuple itup; id = PageGetItemId(uargs->page, uargs->offset); if (!ItemIdIsValid(id)) elog(ERROR, "Invalid ItemId."); itup = (IndexTuple) PageGetItem(uargs->page, id); { int j = 0; BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)); values[j] = palloc(32); snprintf(values[j++], 32, "%d", uargs->offset); values[j] = palloc(32); snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid); values[j] = palloc(32); snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup)); values[j] = palloc(32); snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f'); values[j] = palloc(32); snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); { int off; char *dump; char *ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); dump = palloc(IndexTupleSize(itup) * 3); memset(dump, 0, IndexTupleSize(itup) * 3); for (off = 0; off < IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); off++) { if (dump[0] == '\0') sprintf(dump, "%02x", *(ptr + off) & 0xff); else { char buf[4]; sprintf(buf, " %02x", *(ptr + off) & 0xff); strcat(dump, buf); } } values[j] = dump; } tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values); result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple); } uargs->offset = uargs->offset + 1; SRF_RETURN_NEXT(fctx, result); } else { ReleaseBuffer(uargs->buffer); relation_close(uargs->rel, AccessShareLock); SRF_RETURN_DONE(fctx); } } /* ------------------------------------------------ * bt_metap() * * Get a btree meta-page information * * Usage: SELECT * FROM bt_metap('t1_pkey') * ------------------------------------------------ */ Datum bt_metap(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); Buffer buffer; Relation rel; RangeVar *relrv; Datum result; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "bt_metap() can be used only on b-tree index."); buffer = ReadBuffer(rel, 0); { BTMetaPageData *metad; TupleDesc tupleDesc; int j; char *values[BTMETAP_NCOLUMNS]; HeapTuple tuple; Page page = BufferGetPage(buffer); metad = BTPageGetMeta(page); tupleDesc = RelationNameGetTupleDesc(BTMETAP_TYPE); j = 0; values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_magic); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_version); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_root); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_level); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_fastroot); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_fastlevel); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); } ReleaseBuffer(buffer); relation_close(rel, AccessShareLock); PG_RETURN_DATUM(result); }