From e6858e665731c0f56d3ecc9fbb245c32d24f8ef7 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 14 Oct 2011 17:23:01 -0400 Subject: [PATCH] Measure the number of all-visible pages for use in index-only scan costing. Add a column pg_class.relallvisible to remember the number of pages that were all-visible according to the visibility map as of the last VACUUM (or ANALYZE, or some other operations that update pg_class.relpages). Use relallvisible/relpages, instead of an arbitrary constant, to estimate how many heap page fetches can be avoided during an index-only scan. This is pretty primitive and will no doubt see refinements once we've acquired more field experience with the index-only scan mechanism, but it's way better than using a constant. Note: I had to adjust an underspecified query in the window.sql regression test, because it was changing answers when the plan changed to use an index-only scan. Some of the adjacent tests perhaps should be adjusted as well, but I didn't do that here. --- doc/src/sgml/catalogs.sgml | 13 +++++ src/backend/access/hash/hash.c | 3 +- src/backend/access/heap/visibilitymap.c | 68 +++++++++++++++++++++++++ src/backend/catalog/heap.c | 4 ++ src/backend/catalog/index.c | 52 +++++++++++++------ src/backend/commands/analyze.c | 11 +++- src/backend/commands/cluster.c | 5 ++ src/backend/commands/vacuum.c | 6 +++ src/backend/commands/vacuumlazy.c | 20 ++++++-- src/backend/nodes/outfuncs.c | 1 + src/backend/optimizer/path/costsize.c | 18 +++---- src/backend/optimizer/util/plancat.c | 30 +++++++++-- src/backend/optimizer/util/relnode.c | 2 + src/backend/utils/cache/relcache.c | 2 + src/include/access/visibilitymap.h | 3 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_class.h | 45 ++++++++-------- src/include/commands/vacuum.h | 1 + src/include/nodes/relation.h | 4 +- src/include/optimizer/plancat.h | 2 +- src/test/regress/expected/window.out | 23 +++++---- src/test/regress/sql/window.sql | 3 +- 22 files changed, 246 insertions(+), 72 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 0495bd03bd..e830c5f3d4 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1654,6 +1654,19 @@ + + relallvisible + int4 + + + Number of pages that are marked all-visible in the table's + visibility map. This is only an estimate used by the + planner. It is updated by VACUUM, + ANALYZE, and a few DDL commands such as + CREATE INDEX. + + + reltoastrelid oid diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index bfb10897d2..770b3ef76d 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -55,6 +55,7 @@ hashbuild(PG_FUNCTION_ARGS) IndexBuildResult *result; BlockNumber relpages; double reltuples; + double allvisfrac; uint32 num_buckets; HashBuildState buildstate; @@ -67,7 +68,7 @@ hashbuild(PG_FUNCTION_ARGS) RelationGetRelationName(index)); /* Estimate the number of rows currently present in the table */ - estimate_rel_size(heap, NULL, &relpages, &reltuples); + estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac); /* Initialize the hash index metadata page and initial buckets */ num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM); diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 5a0511f198..919e8de042 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -16,6 +16,8 @@ * visibilitymap_pin_ok - check whether correct map page is already pinned * visibilitymap_set - set a bit in a previously pinned page * visibilitymap_test - test if a bit is set + * visibilitymap_count - count number of bits set in visibility map + * visibilitymap_truncate - truncate the visibility map * * NOTES * @@ -110,6 +112,26 @@ #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) #define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE) +/* table for fast counting of set bits */ +static const uint8 number_of_ones[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + /* prototypes for internal routines */ static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend); static void vm_extend(Relation rel, BlockNumber nvmblocks); @@ -307,6 +329,52 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf) return result; } +/* + * visibilitymap_count - count number of bits set in visibility map + * + * Note: we ignore the possibility of race conditions when the table is being + * extended concurrently with the call. New pages added to the table aren't + * going to be marked all-visible, so they won't affect the result. + */ +BlockNumber +visibilitymap_count(Relation rel) +{ + BlockNumber result = 0; + BlockNumber mapBlock; + + for (mapBlock = 0; ; mapBlock++) + { + Buffer mapBuffer; + unsigned char *map; + int i; + + /* + * Read till we fall off the end of the map. We assume that any + * extra bytes in the last page are zeroed, so we don't bother + * excluding them from the count. + */ + mapBuffer = vm_readbuf(rel, mapBlock, false); + if (!BufferIsValid(mapBuffer)) + break; + + /* + * We choose not to lock the page, since the result is going to be + * immediately stale anyway if anyone is concurrently setting or + * clearing bits, and we only really need an approximate value. + */ + map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer)); + + for (i = 0; i < MAPSIZE; i++) + { + result += number_of_ones[map[i]]; + } + + ReleaseBuffer(mapBuffer); + } + + return result; +} + /* * visibilitymap_truncate - truncate the visibility map * diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 2aaf77523f..e11d896ec8 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -772,6 +772,7 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace); values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages); values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples); + values[Anum_pg_class_relallvisible - 1] = Int32GetDatum(rd_rel->relallvisible); values[Anum_pg_class_reltoastrelid - 1] = ObjectIdGetDatum(rd_rel->reltoastrelid); values[Anum_pg_class_reltoastidxid - 1] = ObjectIdGetDatum(rd_rel->reltoastidxid); values[Anum_pg_class_relhasindex - 1] = BoolGetDatum(rd_rel->relhasindex); @@ -845,16 +846,19 @@ AddNewRelationTuple(Relation pg_class_desc, /* The relation is real, but as yet empty */ new_rel_reltup->relpages = 0; new_rel_reltup->reltuples = 0; + new_rel_reltup->relallvisible = 0; break; case RELKIND_SEQUENCE: /* Sequences always have a known size */ new_rel_reltup->relpages = 1; new_rel_reltup->reltuples = 1; + new_rel_reltup->relallvisible = 0; break; default: /* Views, etc, have no disk storage */ new_rel_reltup->relpages = 0; new_rel_reltup->reltuples = 0; + new_rel_reltup->relallvisible = 0; break; } diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 67ade8f5e9..99e130c1b0 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -26,6 +26,7 @@ #include "access/relscan.h" #include "access/sysattr.h" #include "access/transam.h" +#include "access/visibilitymap.h" #include "access/xact.h" #include "bootstrap/bootstrap.h" #include "catalog/catalog.h" @@ -1059,7 +1060,7 @@ index_create(Relation heapRelation, true, isprimary, InvalidOid, - heapRelation->rd_rel->reltuples); + -1.0); /* Make the above update visible */ CommandCounterIncrement(); } @@ -1225,7 +1226,7 @@ index_constraint_create(Relation heapRelation, true, true, InvalidOid, - heapRelation->rd_rel->reltuples); + -1.0); /* * If needed, mark the index as primary and/or deferred in pg_index. @@ -1533,9 +1534,10 @@ FormIndexDatum(IndexInfo *indexInfo, * isprimary: if true, set relhaspkey true; else no change * reltoastidxid: if not InvalidOid, set reltoastidxid to this value; * else no change - * reltuples: set reltuples to this value + * reltuples: if >= 0, set reltuples to this value; else no change * - * relpages is also updated (using RelationGetNumberOfBlocks()). + * If reltuples >= 0, relpages and relallvisible are also updated (using + * RelationGetNumberOfBlocks() and visibilitymap_count()). * * NOTE: an important side-effect of this operation is that an SI invalidation * message is sent out to all backends --- including me --- causing relcache @@ -1550,7 +1552,6 @@ index_update_stats(Relation rel, bool hasindex, bool isprimary, Oid reltoastidxid, double reltuples) { - BlockNumber relpages = RelationGetNumberOfBlocks(rel); Oid relid = RelationGetRelid(rel); Relation pg_class; HeapTuple tuple; @@ -1586,9 +1587,11 @@ index_update_stats(Relation rel, * It is safe to use a non-transactional update even though our * transaction could still fail before committing. Setting relhasindex * true is safe even if there are no indexes (VACUUM will eventually fix - * it), likewise for relhaspkey. And of course the relpages and reltuples - * counts are correct (or at least more so than the old values) - * regardless. + * it), likewise for relhaspkey. And of course the new relpages and + * reltuples counts are correct regardless. However, we don't want to + * change relpages (or relallvisible) if the caller isn't providing an + * updated reltuples count, because that would bollix the + * reltuples/relpages ratio which is what's really important. */ pg_class = heap_open(RelationRelationId, RowExclusiveLock); @@ -1650,15 +1653,32 @@ index_update_stats(Relation rel, dirty = true; } } - if (rd_rel->reltuples != (float4) reltuples) + + if (reltuples >= 0) { - rd_rel->reltuples = (float4) reltuples; - dirty = true; - } - if (rd_rel->relpages != (int32) relpages) - { - rd_rel->relpages = (int32) relpages; - dirty = true; + BlockNumber relpages = RelationGetNumberOfBlocks(rel); + BlockNumber relallvisible; + + if (rd_rel->relkind != RELKIND_INDEX) + relallvisible = visibilitymap_count(rel); + else /* don't bother for indexes */ + relallvisible = 0; + + if (rd_rel->relpages != (int32) relpages) + { + rd_rel->relpages = (int32) relpages; + dirty = true; + } + if (rd_rel->reltuples != (float4) reltuples) + { + rd_rel->reltuples = (float4) reltuples; + dirty = true; + } + if (rd_rel->relallvisible != (int32) relallvisible) + { + rd_rel->relallvisible = (int32) relallvisible; + dirty = true; + } } /* diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 18d44c572c..32985a4a0a 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -19,6 +19,7 @@ #include "access/transam.h" #include "access/tupconvert.h" #include "access/tuptoaster.h" +#include "access/visibilitymap.h" #include "access/xact.h" #include "catalog/index.h" #include "catalog/indexing.h" @@ -534,7 +535,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh) if (!inh) vac_update_relstats(onerel, RelationGetNumberOfBlocks(onerel), - totalrows, hasindex, InvalidTransactionId); + totalrows, + visibilitymap_count(onerel), + hasindex, + InvalidTransactionId); /* * Same for indexes. Vacuum always scans all indexes, so if we're part of @@ -551,7 +555,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh) totalindexrows = ceil(thisdata->tupleFract * totalrows); vac_update_relstats(Irel[ind], RelationGetNumberOfBlocks(Irel[ind]), - totalindexrows, false, InvalidTransactionId); + totalindexrows, + 0, + false, + InvalidTransactionId); } } diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 8200d2095a..edec44d2c3 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1205,6 +1205,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, { int4 swap_pages; float4 swap_tuples; + int4 swap_allvisible; swap_pages = relform1->relpages; relform1->relpages = relform2->relpages; @@ -1213,6 +1214,10 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, swap_tuples = relform1->reltuples; relform1->reltuples = relform2->reltuples; relform2->reltuples = swap_tuples; + + swap_allvisible = relform1->relallvisible; + relform1->relallvisible = relform2->relallvisible; + relform2->relallvisible = swap_allvisible; } /* diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7fe787ecb7..f42504cf9f 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -569,6 +569,7 @@ vac_estimate_reltuples(Relation relation, bool is_analyze, void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, + BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid) { Oid relid = RelationGetRelid(relation); @@ -599,6 +600,11 @@ vac_update_relstats(Relation relation, pgcform->reltuples = (float4) num_tuples; dirty = true; } + if (pgcform->relallvisible != (int32) num_all_visible_pages) + { + pgcform->relallvisible = (int32) num_all_visible_pages; + dirty = true; + } if (pgcform->relhasindex != hasindex) { pgcform->relhasindex = hasindex; diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index cf8337b9e5..b197b45c12 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -158,6 +158,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, TransactionId freezeTableLimit; BlockNumber new_rel_pages; double new_rel_tuples; + BlockNumber new_rel_allvisible; TransactionId new_frozen_xid; /* measure elapsed time iff autovacuum logging requires it */ @@ -222,6 +223,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, * density") with nonzero relpages and reltuples=0 (which means "zero * tuple density") unless there's some actual evidence for the latter. * + * We do update relallvisible even in the corner case, since if the + * table is all-visible we'd definitely like to know that. But clamp + * the value to be not more than what we're setting relpages to. + * * Also, don't change relfrozenxid if we skipped any pages, since then * we don't know for certain that all tuples have a newer xmin. */ @@ -233,12 +238,18 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, new_rel_tuples = vacrelstats->old_rel_tuples; } + new_rel_allvisible = visibilitymap_count(onerel); + if (new_rel_allvisible > new_rel_pages) + new_rel_allvisible = new_rel_pages; + new_frozen_xid = FreezeLimit; if (vacrelstats->scanned_pages < vacrelstats->rel_pages) new_frozen_xid = InvalidTransactionId; vac_update_relstats(onerel, - new_rel_pages, new_rel_tuples, + new_rel_pages, + new_rel_tuples, + new_rel_allvisible, vacrelstats->hasindex, new_frozen_xid); @@ -1063,8 +1074,11 @@ lazy_cleanup_index(Relation indrel, */ if (!stats->estimated_count) vac_update_relstats(indrel, - stats->num_pages, stats->num_index_tuples, - false, InvalidTransactionId); + stats->num_pages, + stats->num_index_tuples, + 0, + false, + InvalidTransactionId); ereport(elevel, (errmsg("index \"%s\" now contains %.0f row versions in %u pages", diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index eba3d6d579..98a02b27dd 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1743,6 +1743,7 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node) WRITE_NODE_FIELD(indexlist); WRITE_UINT_FIELD(pages); WRITE_FLOAT_FIELD(tuples, "%.0f"); + WRITE_FLOAT_FIELD(allvisfrac, "%.6f"); WRITE_NODE_FIELD(subplan); WRITE_NODE_FIELD(subroot); WRITE_NODE_FIELD(baserestrictinfo); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 45c5524d30..f821b508d6 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -120,9 +120,6 @@ bool enable_material = true; bool enable_mergejoin = true; bool enable_hashjoin = true; -/* Possibly this should become a GUC too */ -static double visibility_fraction = 0.9; - typedef struct { PlannerInfo *root; @@ -324,9 +321,10 @@ cost_index(IndexPath *path, PlannerInfo *root, * * If it's an index-only scan, then we will not need to fetch any heap * pages for which the visibility map shows all tuples are visible. - * Unfortunately, we have no stats as to how much of the heap is - * all-visible, and that's likely to be a rather unstable number anyway. - * We use an arbitrary constant visibility_fraction to estimate this. + * Hence, reduce the estimated number of heap fetches accordingly. + * We use the measured fraction of the entire heap that is all-visible, + * which might not be particularly relevant to the subset of the heap + * that this query will fetch; but it's not clear how to do better. *---------- */ if (outer_rel != NULL && outer_rel->rows > 1) @@ -347,7 +345,7 @@ cost_index(IndexPath *path, PlannerInfo *root, root); if (indexonly) - pages_fetched = ceil(pages_fetched * visibility_fraction); + pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans; @@ -369,7 +367,7 @@ cost_index(IndexPath *path, PlannerInfo *root, root); if (indexonly) - pages_fetched = ceil(pages_fetched * visibility_fraction); + pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans; } @@ -385,7 +383,7 @@ cost_index(IndexPath *path, PlannerInfo *root, root); if (indexonly) - pages_fetched = ceil(pages_fetched * visibility_fraction); + pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ max_IO_cost = pages_fetched * spc_random_page_cost; @@ -394,7 +392,7 @@ cost_index(IndexPath *path, PlannerInfo *root, pages_fetched = ceil(indexSelectivity * (double) baserel->pages); if (indexonly) - pages_fetched = ceil(pages_fetched * visibility_fraction); + pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); min_IO_cost = spc_random_page_cost; if (pages_fetched > 1) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 0b3675f146..aa436004f8 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -116,7 +116,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, */ if (!inhparent) estimate_rel_size(relation, rel->attr_widths - rel->min_attr, - &rel->pages, &rel->tuples); + &rel->pages, &rel->tuples, &rel->allvisfrac); /* * Make list of indexes. Ignore indexes on system catalogs if told to. @@ -339,8 +339,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, } else { + double allvisfrac; /* dummy */ + estimate_rel_size(indexRelation, NULL, - &info->pages, &info->tuples); + &info->pages, &info->tuples, &allvisfrac); if (info->tuples > rel->tuples) info->tuples = rel->tuples; } @@ -369,17 +371,21 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, /* * estimate_rel_size - estimate # pages and # tuples in a table or index * + * We also estimate the fraction of the pages that are marked all-visible in + * the visibility map, for use in estimation of index-only scans. + * * If attr_widths isn't NULL, it points to the zero-index entry of the * relation's attr_widths[] cache; we fill this in if we have need to compute * the attribute widths for estimation purposes. */ void estimate_rel_size(Relation rel, int32 *attr_widths, - BlockNumber *pages, double *tuples) + BlockNumber *pages, double *tuples, double *allvisfrac) { BlockNumber curpages; BlockNumber relpages; double reltuples; + BlockNumber relallvisible; double density; switch (rel->rd_rel->relkind) @@ -432,11 +438,13 @@ estimate_rel_size(Relation rel, int32 *attr_widths, if (curpages == 0) { *tuples = 0; + *allvisfrac = 0; break; } /* coerce values in pg_class to more desirable types */ relpages = (BlockNumber) rel->rd_rel->relpages; reltuples = (double) rel->rd_rel->reltuples; + relallvisible = (BlockNumber) rel->rd_rel->relallvisible; /* * If it's an index, discount the metapage while estimating the @@ -480,21 +488,37 @@ estimate_rel_size(Relation rel, int32 *attr_widths, density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width; } *tuples = rint(density * (double) curpages); + + /* + * We use relallvisible as-is, rather than scaling it up like we + * do for the pages and tuples counts, on the theory that any + * pages added since the last VACUUM are most likely not marked + * all-visible. But costsize.c wants it converted to a fraction. + */ + if (relallvisible == 0 || curpages <= 0) + *allvisfrac = 0; + else if ((double) relallvisible >= curpages) + *allvisfrac = 1; + else + *allvisfrac = (double) relallvisible / curpages; break; case RELKIND_SEQUENCE: /* Sequences always have a known size */ *pages = 1; *tuples = 1; + *allvisfrac = 0; break; case RELKIND_FOREIGN_TABLE: /* Just use whatever's in pg_class */ *pages = rel->rd_rel->relpages; *tuples = rel->rd_rel->reltuples; + *allvisfrac = 0; break; default: /* else it has no disk storage; probably shouldn't get here? */ *pages = 0; *tuples = 0; + *allvisfrac = 0; break; } } diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 1df727d9fc..37187e2073 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -109,6 +109,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->indexlist = NIL; rel->pages = 0; rel->tuples = 0; + rel->allvisfrac = 0; rel->subplan = NULL; rel->subroot = NULL; rel->baserestrictinfo = NIL; @@ -362,6 +363,7 @@ build_join_rel(PlannerInfo *root, joinrel->indexlist = NIL; joinrel->pages = 0; joinrel->tuples = 0; + joinrel->allvisfrac = 0; joinrel->subplan = NULL; joinrel->subroot = NULL; joinrel->baserestrictinfo = NIL; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 9f6b12707b..603e4c1b62 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1414,6 +1414,7 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_rel->relpages = 0; relation->rd_rel->reltuples = 0; + relation->rd_rel->relallvisible = 0; relation->rd_rel->relkind = RELKIND_RELATION; relation->rd_rel->relhasoids = hasoids; relation->rd_rel->relnatts = (int16) natts; @@ -2668,6 +2669,7 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) { classform->relpages = 0; /* it's empty until further notice */ classform->reltuples = 0; + classform->relallvisible = 0; } classform->relfrozenxid = freezeXid; diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h index 7d62c12640..4e5c0a0e4e 100644 --- a/src/include/access/visibilitymap.h +++ b/src/include/access/visibilitymap.h @@ -27,6 +27,7 @@ extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf); extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr, Buffer vmbuf); extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf); -extern void visibilitymap_truncate(Relation rel, BlockNumber heapblk); +extern BlockNumber visibilitymap_count(Relation rel); +extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks); #endif /* VISIBILITYMAP_H */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index e4eb7b1294..8fff3675ef 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201110071 +#define CATALOG_VERSION_NO 201110141 #endif diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index e00618026e..06120e481e 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -45,6 +45,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO Oid reltablespace; /* identifier of table space for relation */ int4 relpages; /* # of blocks (not always up-to-date) */ float4 reltuples; /* # of tuples (not always up-to-date) */ + int4 relallvisible; /* # of all-visible blocks (not always + * up-to-date) */ Oid reltoastrelid; /* OID of toast table; 0 if none */ Oid reltoastidxid; /* if toast table, OID of chunk_id index */ bool relhasindex; /* T if has (or has had) any indexes */ @@ -92,7 +94,7 @@ typedef FormData_pg_class *Form_pg_class; * ---------------- */ -#define Natts_pg_class 26 +#define Natts_pg_class 27 #define Anum_pg_class_relname 1 #define Anum_pg_class_relnamespace 2 #define Anum_pg_class_reltype 3 @@ -103,22 +105,23 @@ typedef FormData_pg_class *Form_pg_class; #define Anum_pg_class_reltablespace 8 #define Anum_pg_class_relpages 9 #define Anum_pg_class_reltuples 10 -#define Anum_pg_class_reltoastrelid 11 -#define Anum_pg_class_reltoastidxid 12 -#define Anum_pg_class_relhasindex 13 -#define Anum_pg_class_relisshared 14 -#define Anum_pg_class_relpersistence 15 -#define Anum_pg_class_relkind 16 -#define Anum_pg_class_relnatts 17 -#define Anum_pg_class_relchecks 18 -#define Anum_pg_class_relhasoids 19 -#define Anum_pg_class_relhaspkey 20 -#define Anum_pg_class_relhasrules 21 -#define Anum_pg_class_relhastriggers 22 -#define Anum_pg_class_relhassubclass 23 -#define Anum_pg_class_relfrozenxid 24 -#define Anum_pg_class_relacl 25 -#define Anum_pg_class_reloptions 26 +#define Anum_pg_class_relallvisible 11 +#define Anum_pg_class_reltoastrelid 12 +#define Anum_pg_class_reltoastidxid 13 +#define Anum_pg_class_relhasindex 14 +#define Anum_pg_class_relisshared 15 +#define Anum_pg_class_relpersistence 16 +#define Anum_pg_class_relkind 17 +#define Anum_pg_class_relnatts 18 +#define Anum_pg_class_relchecks 19 +#define Anum_pg_class_relhasoids 20 +#define Anum_pg_class_relhaspkey 21 +#define Anum_pg_class_relhasrules 22 +#define Anum_pg_class_relhastriggers 23 +#define Anum_pg_class_relhassubclass 24 +#define Anum_pg_class_relfrozenxid 25 +#define Anum_pg_class_relacl 26 +#define Anum_pg_class_reloptions 27 /* ---------------- * initial contents of pg_class @@ -130,13 +133,13 @@ typedef FormData_pg_class *Form_pg_class; */ /* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */ -DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ )); +DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ )); DESCR(""); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index cfbe0c4392..d8fd0caa6b 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -149,6 +149,7 @@ extern double vac_estimate_reltuples(Relation relation, bool is_analyze, extern void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, + BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid); extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 45ca52e516..ef84e9f138 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -319,6 +319,7 @@ typedef struct PlannerInfo * (always NIL if it's not a table) * pages - number of disk pages in relation (zero if not a table) * tuples - number of tuples in relation (not considering restrictions) + * allvisfrac - fraction of disk pages that are marked all-visible * subplan - plan for subquery (NULL if it's not a subquery) * subroot - PlannerInfo for subquery (NULL if it's not a subquery) * @@ -402,8 +403,9 @@ typedef struct RelOptInfo Relids *attr_needed; /* array indexed [min_attr .. max_attr] */ int32 *attr_widths; /* array indexed [min_attr .. max_attr] */ List *indexlist; /* list of IndexOptInfo */ - BlockNumber pages; + BlockNumber pages; /* size estimates derived from pg_class */ double tuples; + double allvisfrac; struct Plan *subplan; /* if subquery */ PlannerInfo *subroot; /* if subquery */ diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index c0b8eda813..05843615d6 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -29,7 +29,7 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel); extern void estimate_rel_size(Relation rel, int32 *attr_widths, - BlockNumber *pages, double *tuples); + BlockNumber *pages, double *tuples, double *allvisfrac); extern int32 get_relation_data_width(Oid relid, int32 *attr_widths); diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 048d463533..fde375cc9f 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -901,21 +901,22 @@ WINDOW w AS (order by four range between current row and unbounded following); (10 rows) SELECT sum(unique1) over - (rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING), + (order by unique1 + rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING), unique1 FROM tenk1 WHERE unique1 < 10; sum | unique1 -----+--------- - 4 | 4 - 6 | 2 - 3 | 1 - 7 | 6 - 15 | 9 - 17 | 8 - 13 | 5 - 8 | 3 - 10 | 7 - 7 | 0 + 0 | 0 + 1 | 1 + 3 | 2 + 5 | 3 + 7 | 4 + 9 | 5 + 11 | 6 + 13 | 7 + 15 | 8 + 17 | 9 (10 rows) CREATE TEMP VIEW v_window AS diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 268430a260..d8e9e7e3b1 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -211,7 +211,8 @@ FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four range between current row and unbounded following); SELECT sum(unique1) over - (rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING), + (order by unique1 + rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING), unique1 FROM tenk1 WHERE unique1 < 10;