diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile index 862585cc01..6083dabefd 100644 --- a/contrib/pgstattuple/Makefile +++ b/contrib/pgstattuple/Makefile @@ -1,10 +1,10 @@ # contrib/pgstattuple/Makefile MODULE_big = pgstattuple -OBJS = pgstattuple.o pgstatindex.o $(WIN32RES) +OBJS = pgstattuple.o pgstatindex.o pgstatapprox.o $(WIN32RES) EXTENSION = pgstattuple -DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql +DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql PGFILEDESC = "pgstattuple - tuple-level statistics" REGRESS = pgstattuple diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c new file mode 100644 index 0000000000..ae5ed56f98 --- /dev/null +++ b/contrib/pgstattuple/pgstatapprox.c @@ -0,0 +1,273 @@ +/*------------------------------------------------------------------------- + * + * pgstatapproc.c + * Bloat estimation functions + * + * Copyright (c) 2014-2015, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pgstattuple/pgstatapprox.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/visibilitymap.h" +#include "access/transam.h" +#include "access/xact.h" +#include "access/multixact.h" +#include "access/htup_details.h" +#include "catalog/namespace.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/freespace.h" +#include "storage/procarray.h" +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/tqual.h" +#include "commands/vacuum.h" + +PG_FUNCTION_INFO_V1(pgstattuple_approx); + +typedef struct output_type +{ + uint64 table_len; + uint64 scanned_percent; + uint64 tuple_count; + uint64 tuple_len; + double tuple_percent; + uint64 dead_tuple_count; + uint64 dead_tuple_len; + double dead_tuple_percent; + uint64 free_space; + double free_percent; +} output_type; + +#define NUM_OUTPUT_COLUMNS 10 + +/* + * This function takes an already open relation and scans its pages, + * skipping those that have the corresponding visibility map bit set. + * For pages we skip, we find the free space from the free space map + * and approximate tuple_len on that basis. For the others, we count + * the exact number of dead tuples etc. + * + * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but + * we do not try to avoid skipping single pages. + */ +static void +statapprox_heap(Relation rel, output_type *stat) +{ + BlockNumber scanned, + nblocks, + blkno; + Buffer vmbuffer = InvalidBuffer; + BufferAccessStrategy bstrategy; + TransactionId OldestXmin; + uint64 misc_count = 0; + + OldestXmin = GetOldestXmin(rel, true); + bstrategy = GetAccessStrategy(BAS_BULKREAD); + + nblocks = RelationGetNumberOfBlocks(rel); + scanned = 0; + + for (blkno = 0; blkno < nblocks; blkno++) + { + Buffer buf; + Page page; + OffsetNumber offnum, + maxoff; + Size freespace; + + CHECK_FOR_INTERRUPTS(); + + /* + * If the page has only visible tuples, then we can find out the + * free space from the FSM and move on. + */ + if (visibilitymap_test(rel, blkno, &vmbuffer)) + { + freespace = GetRecordedFreeSpace(rel, blkno); + stat->tuple_len += BLCKSZ - freespace; + stat->free_space += freespace; + continue; + } + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, + RBM_NORMAL, bstrategy); + + LockBuffer(buf, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buf); + + /* + * It's not safe to call PageGetHeapFreeSpace() on new pages, so + * we treat them as being free space for our purposes. + */ + if (!PageIsNew(page)) + stat->free_space += PageGetHeapFreeSpace(page); + else + stat->free_space += BLCKSZ - SizeOfPageHeaderData; + + if (PageIsNew(page) || PageIsEmpty(page)) + { + UnlockReleaseBuffer(buf); + continue; + } + + scanned++; + + /* + * Look at each tuple on the page and decide whether it's live + * or dead, then count it and its size. Unlike lazy_scan_heap, + * we can afford to ignore problems and special cases. + */ + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleData tuple; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) || + ItemIdIsDead(itemid)) + { + continue; + } + + Assert(ItemIdIsNormal(itemid)); + + ItemPointerSet(&(tuple.t_self), blkno, offnum); + + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + + /* + * We count live and dead tuples, but we also need to add up + * others in order to feed vac_estimate_reltuples. + */ + switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) + { + case HEAPTUPLE_RECENTLY_DEAD: + misc_count++; + /* Fall through */ + case HEAPTUPLE_DEAD: + stat->dead_tuple_len += tuple.t_len; + stat->dead_tuple_count++; + break; + case HEAPTUPLE_LIVE: + stat->tuple_len += tuple.t_len; + stat->tuple_count++; + break; + case HEAPTUPLE_INSERT_IN_PROGRESS: + case HEAPTUPLE_DELETE_IN_PROGRESS: + misc_count++; + break; + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + break; + } + } + + UnlockReleaseBuffer(buf); + } + + stat->table_len = (uint64) nblocks * BLCKSZ; + stat->tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned, + stat->tuple_count+misc_count); + + /* + * Calculate percentages if the relation has one or more pages. + */ + if (nblocks != 0) + { + stat->scanned_percent = 100 * scanned / nblocks; + stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len; + stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len; + stat->free_percent = 100.0 * stat->free_space / stat->table_len; + } + + if (BufferIsValid(vmbuffer)) + { + ReleaseBuffer(vmbuffer); + vmbuffer = InvalidBuffer; + } +} + +/* + * Returns estimated live/dead tuple statistics for the given relid. + */ +Datum +pgstattuple_approx(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + output_type stat = {0}; + TupleDesc tupdesc; + bool nulls[NUM_OUTPUT_COLUMNS]; + Datum values[NUM_OUTPUT_COLUMNS]; + HeapTuple ret; + int i = 0; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use pgstattuple functions")))); + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupdesc->natts != NUM_OUTPUT_COLUMNS) + elog(ERROR, "incorrect number of output arguments"); + + rel = relation_open(relid, AccessShareLock); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + /* + * We support only ordinary relations and materialised views, + * because we depend on the visibility map and free space map + * for our estimates about unscanned pages. + */ + if (!(rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_MATVIEW)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" is not a table or materialized view", + RelationGetRelationName(rel)))); + + statapprox_heap(rel, &stat); + + relation_close(rel, AccessShareLock); + + memset(nulls, 0, sizeof(nulls)); + + values[i++] = Int64GetDatum(stat.table_len); + values[i++] = Float8GetDatum(stat.scanned_percent); + values[i++] = Int64GetDatum(stat.tuple_count); + values[i++] = Int64GetDatum(stat.tuple_len); + values[i++] = Float8GetDatum(stat.tuple_percent); + values[i++] = Int64GetDatum(stat.dead_tuple_count); + values[i++] = Int64GetDatum(stat.dead_tuple_len); + values[i++] = Float8GetDatum(stat.dead_tuple_percent); + values[i++] = Int64GetDatum(stat.free_space); + values[i++] = Float8GetDatum(stat.free_percent); + + ret = heap_form_tuple(tupdesc, values, nulls); + return HeapTupleGetDatum(ret); +} diff --git a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql new file mode 100644 index 0000000000..99301a27ca --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql @@ -0,0 +1,18 @@ +/* contrib/pgstattuple/pgstattuple--1.2--1.3.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.3'" to load this file. \quit + +CREATE FUNCTION pgstattuple_approx(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT approx_free_space BIGINT, -- estimated free space in bytes + OUT approx_free_percent FLOAT8) -- free space in % (based on estimate) +AS 'MODULE_PATHNAME', 'pgstattuple_approx' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple--1.2.sql b/contrib/pgstattuple/pgstattuple--1.3.sql similarity index 72% rename from contrib/pgstattuple/pgstattuple--1.2.sql rename to contrib/pgstattuple/pgstattuple--1.3.sql index e5fa2f58da..f3996e74a8 100644 --- a/contrib/pgstattuple/pgstattuple--1.2.sql +++ b/contrib/pgstattuple/pgstattuple--1.3.sql @@ -1,4 +1,4 @@ -/* contrib/pgstattuple/pgstattuple--1.2.sql */ +/* contrib/pgstattuple/pgstattuple--1.3.sql */ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit @@ -77,3 +77,19 @@ CREATE FUNCTION pg_relpages(IN relname regclass) RETURNS BIGINT AS 'MODULE_PATHNAME', 'pg_relpagesbyid' LANGUAGE C STRICT; + +/* New stuff in 1.3 begins here */ + +CREATE FUNCTION pgstattuple_approx(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT approx_free_space BIGINT, -- estimated free space in bytes + OUT approx_free_percent FLOAT8) -- free space in % (based on estimate) +AS 'MODULE_PATHNAME', 'pgstattuple_approx' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control index a7cf47fd92..c03b180143 100644 --- a/contrib/pgstattuple/pgstattuple.control +++ b/contrib/pgstattuple/pgstattuple.control @@ -1,5 +1,5 @@ # pgstattuple extension comment = 'show tuple-level statistics' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/pgstattuple' relocatable = true diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml index 9cabd71166..b31fee9b0e 100644 --- a/doc/src/sgml/pgstattuple.sgml +++ b/doc/src/sgml/pgstattuple.sgml @@ -358,6 +358,140 @@ pending_tuples | 0 + + + + pgstattuple_approx + + pgstattuple_approx(regclass) returns record + + + + + pgstattuple_approx is a faster alternative to + pgstattuple that returns approximate results. + The argument is the target relation's OID. + For example: + +test=> SELECT * FROM pgstattuple_approx('pg_catalog.pg_proc'::regclass); +-[ RECORD 1 ]--------+------- +table_len | 573440 +scanned_percent | 2 +approx_tuple_count | 2740 +approx_tuple_len | 561210 +approx_tuple_percent | 97.87 +dead_tuple_count | 0 +dead_tuple_len | 0 +dead_tuple_percent | 0 +approx_free_space | 11996 +approx_free_percent | 2.09 + + The output columns are described in . + + + + Whereas pgstattuple always performs a + full-table scan and returns an exact count of live and dead tuples + (and their sizes) and free space, pgstattuple_approx + tries to avoid the full-table scan and returns exact dead tuple + statistics along with an approximation of the number and + size of live tuples and free space. + + + + It does this by skipping pages that have only visible tuples + according to the visibility map (if a page has the corresponding VM + bit set, then it is assumed to contain no dead tuples). For such + pages, it derives the free space value from the free space map, and + assumes that the rest of the space on the page is taken up by live + tuples. + + + + For pages that cannot be skipped, it scans each tuple, recording its + presence and size in the appropriate counters, and adding up the + free space on the page. At the end, it estimates the total number of + live tuples based on the number of pages and tuples scanned (in the + same way that VACUUM estimates pg_class.reltuples). + + + + <function>pgstattuple_approx</function> Output Columns + + + + Column + Type + Description + + + + + + table_len + bigint + Physical relation length in bytes (exact) + + + scanned_percent + float8 + Percentage of table scanned + + + tuple_count + bigint + Number of live tuples (estimated) + + + tuple_len + bigint + Total length of live tuples in bytes (estimated) + + + tuple_percent + float8 + Percentage of live tuples + + + dead_tuple_count + bigint + Number of dead tuples (exact) + + + dead_tuple_len + bigint + Total length of dead tuples in bytes (exact) + + + dead_tuple_percent + float8 + Percentage of dead tuples + + + approx_free_space + bigint + Total free space in bytes (estimated) + + + approx_free_percent + float8 + Percentage of free space + + + + +
+ + + In the above output, the free space figures may not match the + pgstattuple output exactly, because the free + space map gives us an exact figure, but is not guaranteed to be + accurate to the byte. + + +
+
+ @@ -365,7 +499,7 @@ pending_tuples | 0 Authors - Tatsuo Ishii and Satoshi Nagayasu + Tatsuo Ishii, Satoshi Nagayasu and Abhijit Menon-Sen