diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 82d7dd18a8..812e241f44 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -268,10 +268,13 @@ findParents(GinBtree btree, GinBtreeStack *stack, /* * Insert value (stored in GinBtree) to tree described by stack * + * During an index build, buildStats is non-null and the counters + * it contains should be incremented as needed. + * * NB: the passed-in stack is freed, as though by freeGinBtreeStack. */ void -ginInsertValue(GinBtree btree, GinBtreeStack *stack) +ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats) { GinBtreeStack *parent = stack; BlockNumber rootBlkno = InvalidBuffer; @@ -330,6 +333,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) ((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno; + /* During index build, count the newly-split page */ + if (buildStats) + { + if (btree->isData) + buildStats->nDataPages++; + else + buildStats->nEntryPages++; + } + parent = stack->parent; if (parent == NULL) @@ -381,6 +393,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) freeGinBtreeStack(stack); + /* During index build, count the newly-added root page */ + if (buildStats) + { + if (btree->isData) + buildStats->nDataPages++; + else + buildStats->nEntryPages++; + } + return; } else diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index c590d56f7c..5e2f6e76d3 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -592,9 +592,11 @@ void prepareDataScan(GinBtree btree, Relation index) { memset(btree, 0, sizeof(GinBtreeData)); + btree->index = index; - btree->isMoveRight = dataIsMoveRight; + btree->findChildPage = dataLocateItem; + btree->isMoveRight = dataIsMoveRight; btree->findItem = dataLocateLeafItem; btree->findChildPtr = dataFindChildPtr; btree->getLeftMostPage = dataGetLeftMostPage; @@ -603,6 +605,7 @@ prepareDataScan(GinBtree btree, Relation index) btree->splitPage = dataSplitPage; btree->fillRoot = dataFillRoot; + btree->isData = TRUE; btree->searchMode = FALSE; btree->isDelete = FALSE; btree->fullScan = FALSE; @@ -628,7 +631,9 @@ prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode) * Inserts array of item pointers, may execute several tree scan (very rare) */ void -insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem) +ginInsertItemPointer(GinPostingTreeScan *gdi, + ItemPointerData *items, uint32 nitem, + GinStatsData *buildStats) { BlockNumber rootBlkno = gdi->stack->blkno; @@ -653,7 +658,7 @@ insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem) freeGinBtreeStack(gdi->stack); } else - ginInsertValue(&(gdi->btree), gdi->stack); + ginInsertValue(&(gdi->btree), gdi->stack, buildStats); gdi->stack = NULL; } diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index d60282f204..a47e92785c 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -659,8 +659,11 @@ prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum valu { memset(btree, 0, sizeof(GinBtreeData)); - btree->isMoveRight = entryIsMoveRight; + btree->index = index; + btree->ginstate = ginstate; + btree->findChildPage = entryLocateEntry; + btree->isMoveRight = entryIsMoveRight; btree->findItem = entryLocateLeafEntry; btree->findChildPtr = entryFindChildPtr; btree->getLeftMostPage = entryGetLeftMostPage; @@ -669,13 +672,12 @@ prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum valu btree->splitPage = entrySplitPage; btree->fillRoot = entryFillRoot; - btree->index = index; - btree->ginstate = ginstate; - btree->entryAttnum = attnum; - btree->entryValue = value; - - btree->isDelete = FALSE; + btree->isData = FALSE; btree->searchMode = FALSE; btree->fullScan = FALSE; btree->isBuild = FALSE; + + btree->entryAttnum = attnum; + btree->entryValue = value; + btree->isDelete = FALSE; } diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index eacac507e4..0c050c97a0 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -789,7 +789,7 @@ ginInsertCleanup(Relation index, GinState *ginstate, ginBeginBAScan(&accum); while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) { - ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE); + ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL); if (vac_delay) vacuum_delay_point(); } @@ -823,7 +823,7 @@ ginInsertCleanup(Relation index, GinState *ginstate, ginBeginBAScan(&accum); while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) - ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE); + ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL); } /* diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 640d3acde9..263e447ca4 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -27,6 +27,7 @@ typedef struct { GinState ginstate; double indtuples; + GinStatsData buildStats; MemoryContext tmpCtx; MemoryContext funcCtx; BuildAccumulator accum; @@ -97,8 +98,10 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems) * GinFormTuple(). */ static IndexTuple -addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack, - IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild) +addItemPointersToTuple(Relation index, GinState *ginstate, + GinBtreeStack *stack, IndexTuple old, + ItemPointerData *items, uint32 nitem, + GinStatsData *buildStats) { Datum key = gin_index_getattr(ginstate, old); OffsetNumber attnum = gintuple_get_attrnum(ginstate, old); @@ -128,11 +131,15 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack, GinSetPostingTree(res, postingRoot); gdi = prepareScanPostingTree(index, postingRoot, FALSE); - gdi->btree.isBuild = isBuild; + gdi->btree.isBuild = (buildStats != NULL); - insertItemPointer(gdi, items, nitem); + ginInsertItemPointer(gdi, items, nitem, buildStats); pfree(gdi); + + /* During index build, count the newly-added data page */ + if (buildStats) + buildStats->nDataPages++; } return res; @@ -140,18 +147,25 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack, /* * Inserts only one entry to the index, but it can add more than 1 ItemPointer. + * + * During an index build, buildStats is non-null and the counters + * it contains should be incremented as needed. */ void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, ItemPointerData *items, uint32 nitem, - bool isBuild) + GinStatsData *buildStats) { GinBtreeData btree; GinBtreeStack *stack; IndexTuple itup; Page page; + /* During index build, count the to-be-inserted entry */ + if (buildStats) + buildStats->nEntries++; + prepareEntryScan(&btree, index, attnum, value, ginstate); stack = ginFindLeafPage(&btree, NULL); @@ -174,14 +188,15 @@ ginEntryInsert(Relation index, GinState *ginstate, /* insert into posting tree */ gdi = prepareScanPostingTree(index, rootPostingTree, FALSE); - gdi->btree.isBuild = isBuild; - insertItemPointer(gdi, items, nitem); + gdi->btree.isBuild = (buildStats != NULL); + ginInsertItemPointer(gdi, items, nitem, buildStats); pfree(gdi); return; } - itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild); + itup = addItemPointersToTuple(index, ginstate, stack, itup, + items, nitem, buildStats); btree.isDelete = TRUE; } @@ -195,13 +210,14 @@ ginEntryInsert(Relation index, GinState *ginstate, /* Add the rest, making a posting tree if necessary */ IndexTuple previtup = itup; - itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, isBuild); + itup = addItemPointersToTuple(index, ginstate, stack, previtup, + items + 1, nitem - 1, buildStats); pfree(previtup); } } btree.entry = itup; - ginInsertValue(&btree, stack); + ginInsertValue(&btree, stack, buildStats); pfree(itup); } @@ -260,7 +276,8 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values, { /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); - ginEntryInsert(index, &buildstate->ginstate, attnum, entry, list, nlist, TRUE); + ginEntryInsert(index, &buildstate->ginstate, attnum, entry, + list, nlist, &buildstate->buildStats); } MemoryContextReset(buildstate->tmpCtx); @@ -292,6 +309,8 @@ ginbuild(PG_FUNCTION_ARGS) RelationGetRelationName(index)); initGinState(&buildstate.ginstate, index); + buildstate.indtuples = 0; + memset(&buildstate.buildStats, 0, sizeof(GinStatsData)); /* initialize the meta page */ MetaBuffer = GinNewBuffer(index); @@ -331,8 +350,8 @@ ginbuild(PG_FUNCTION_ARGS) UnlockReleaseBuffer(RootBuffer); END_CRIT_SECTION(); - /* build the index */ - buildstate.indtuples = 0; + /* count the root as first entry page */ + buildstate.buildStats.nEntryPages++; /* * create a temporary memory context that is reset once for each tuple @@ -367,12 +386,19 @@ ginbuild(PG_FUNCTION_ARGS) { /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); - ginEntryInsert(index, &buildstate.ginstate, attnum, entry, list, nlist, TRUE); + ginEntryInsert(index, &buildstate.ginstate, attnum, entry, + list, nlist, &buildstate.buildStats); } MemoryContextSwitchTo(oldCtx); MemoryContextDelete(buildstate.tmpCtx); + /* + * Update metapage stats + */ + buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index); + ginUpdateStats(index, &buildstate.buildStats); + /* * Return statistics */ @@ -401,7 +427,7 @@ ginHeapTupleInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datu return 0; for (i = 0; i < nentries; i++) - ginEntryInsert(index, ginstate, attnum, entries[i], item, 1, FALSE); + ginEntryInsert(index, ginstate, attnum, entries[i], item, 1, NULL); return nentries; } diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index c128e5b330..52bca8cee3 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -13,10 +13,12 @@ */ #include "postgres.h" + #include "access/genam.h" #include "access/gin.h" #include "access/reloptions.h" #include "catalog/pg_type.h" +#include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/indexfsm.h" @@ -227,6 +229,10 @@ GinInitMetabuffer(Buffer b) metadata->tailFreeSize = 0; metadata->nPendingPages = 0; metadata->nPendingHeapTuples = 0; + metadata->nTotalPages = 0; + metadata->nEntryPages = 0; + metadata->nDataPages = 0; + metadata->nEntries = 0; } int @@ -354,3 +360,82 @@ ginoptions(PG_FUNCTION_ARGS) PG_RETURN_BYTEA_P(rdopts); } + +/* + * Fetch index's statistical data into *stats + * + * Note: in the result, nPendingPages can be trusted to be up-to-date, + * but the other fields are as of the last VACUUM. + */ +void +ginGetStats(Relation index, GinStatsData *stats) +{ + Buffer metabuffer; + Page metapage; + GinMetaPageData *metadata; + + metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + LockBuffer(metabuffer, GIN_SHARE); + metapage = BufferGetPage(metabuffer); + metadata = GinPageGetMeta(metapage); + + stats->nPendingPages = metadata->nPendingPages; + stats->nTotalPages = metadata->nTotalPages; + stats->nEntryPages = metadata->nEntryPages; + stats->nDataPages = metadata->nDataPages; + stats->nEntries = metadata->nEntries; + + UnlockReleaseBuffer(metabuffer); +} + +/* + * Write the given statistics to the index's metapage + * + * Note: nPendingPages is *not* copied over + */ +void +ginUpdateStats(Relation index, const GinStatsData *stats) +{ + Buffer metabuffer; + Page metapage; + GinMetaPageData *metadata; + + metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + LockBuffer(metabuffer, GIN_EXCLUSIVE); + metapage = BufferGetPage(metabuffer); + metadata = GinPageGetMeta(metapage); + + START_CRIT_SECTION(); + + metadata->nTotalPages = stats->nTotalPages; + metadata->nEntryPages = stats->nEntryPages; + metadata->nDataPages = stats->nDataPages; + metadata->nEntries = stats->nEntries; + + MarkBufferDirty(metabuffer); + + if (!index->rd_istemp) + { + XLogRecPtr recptr; + ginxlogUpdateMeta data; + XLogRecData rdata; + + data.node = index->rd_node; + data.ntuples = 0; + data.newRightlink = data.prevTail = InvalidBlockNumber; + memcpy(&data.metadata, metadata, sizeof(GinMetaPageData)); + + rdata.buffer = InvalidBuffer; + rdata.data = (char *) &data; + rdata.len = sizeof(ginxlogUpdateMeta); + rdata.next = NULL; + + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata); + PageSetLSN(metapage, recptr); + PageSetTLI(metapage, ThisTimeLineID); + } + + UnlockReleaseBuffer(metabuffer); + + END_CRIT_SECTION(); +} diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index f074299622..c7f9a72d2e 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -707,9 +707,8 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) BlockNumber npages, blkno; BlockNumber totFreePages; - BlockNumber lastBlock = GIN_ROOT_BLKNO, - lastFilledBlock = GIN_ROOT_BLKNO; GinState ginstate; + GinStatsData idxStat; /* * In an autovacuum analyze, we want to clean up pending insertions. @@ -736,6 +735,8 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) ginInsertCleanup(index, &ginstate, true, stats); } + memset(&idxStat, 0, sizeof(idxStat)); + /* * XXX we always report the heap tuple count as the number of index * entries. This is bogus if the index is partial, but it's real hard to @@ -757,7 +758,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) totFreePages = 0; - for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++) + for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++) { Buffer buffer; Page page; @@ -771,15 +772,28 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) if (GinPageIsDeleted(page)) { + Assert(blkno != GIN_ROOT_BLKNO); RecordFreeIndexPage(index, blkno); totFreePages++; } - else - lastFilledBlock = blkno; + else if (GinPageIsData(page)) + { + idxStat.nDataPages++; + } + else if (!GinPageIsList(page)) + { + idxStat.nEntryPages++; + + if ( GinPageIsLeaf(page) ) + idxStat.nEntries += PageGetMaxOffsetNumber(page); + } UnlockReleaseBuffer(buffer); } - lastBlock = npages - 1; + + /* Update the metapage with accurate page and entry counts */ + idxStat.nTotalPages = npages; + ginUpdateStats(info->index, &idxStat); /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 75997d9534..18b5908d05 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -839,7 +839,7 @@ ginContinueSplit(ginIncompleteSplit *split) stack.parent = NULL; findParents(&btree, &stack, split->rootBlkno); - ginInsertValue(&btree, stack.parent); + ginInsertValue(&btree, stack.parent, NULL); FreeFakeRelcacheEntry(reln); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 34369e5aae..ce6d4e2a79 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -91,6 +91,7 @@ #include #include +#include "access/gin.h" #include "access/sysattr.h" #include "catalog/index.h" #include "catalog/pg_opfamily.h" @@ -6235,6 +6236,24 @@ gistcostestimate(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +/* Find the index column matching "op"; return its index, or -1 if no match */ +static int +find_index_column(Node *op, IndexOptInfo *index) +{ + int i; + + for (i = 0; i < index->ncolumns; i++) + { + if (match_index_to_operand(op, i, index)) + return i; + } + + return -1; +} + +/* + * GIN has search behavior completely different from other index types + */ Datum gincostestimate(PG_FUNCTION_ARGS) { @@ -6246,10 +6265,329 @@ gincostestimate(PG_FUNCTION_ARGS) Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + ListCell *l; + int32 nfullscan = 0; + List *selectivityQuals; + double numPages = index->pages, + numTuples = index->tuples; + double numEntryPages, + numDataPages, + numPendingPages, + numEntries; + double partialEntriesInQuals = 0.0; + double searchEntriesInQuals = 0.0; + double exactEntriesInQuals = 0.0; + double entryPagesFetched, + dataPagesFetched, + dataPagesFetchedBySel; + double qual_op_cost, + qual_arg_cost, + spc_random_page_cost, + num_scans; + QualCost index_qual_cost; + Relation indexRel; + GinStatsData ginStats; - genericcostestimate(root, index, indexQuals, outer_rel, 0.0, - indexStartupCost, indexTotalCost, - indexSelectivity, indexCorrelation); + /* + * Obtain statistic information from the meta page + */ + indexRel = index_open(index->indexoid, AccessShareLock); + ginGetStats(indexRel, &ginStats); + index_close(indexRel, AccessShareLock); + + numEntryPages = ginStats.nEntryPages; + numDataPages = ginStats.nDataPages; + numPendingPages = ginStats.nPendingPages; + numEntries = ginStats.nEntries; + + /* + * nPendingPages can be trusted, but the other fields are as of the last + * VACUUM. Scale them by the ratio numPages / nTotalPages to account for + * growth since then. If the fields are zero (implying no VACUUM at all, + * and an index created pre-9.1), assume all pages are entry pages. + */ + if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0) + { + numEntryPages = numPages; + numDataPages = 0; + numEntries = numTuples; /* bogus, but no other info available */ + } + else + { + double scale = numPages / ginStats.nTotalPages; + + numEntryPages = ceil(numEntryPages * scale); + numDataPages = ceil(numDataPages * scale); + numEntries = ceil(numEntries * scale); + /* ensure we didn't round up too much */ + numEntryPages = Min(numEntryPages, numPages); + numDataPages = Min(numDataPages, numPages - numEntryPages); + } + + /* + * Include predicate in selectivityQuals (should match genericcostestimate) + */ + if (index->indpred != NIL) + { + List *predExtraQuals = NIL; + + foreach(l, index->indpred) + { + Node *predQual = (Node *) lfirst(l); + List *oneQual = list_make1(predQual); + + if (!predicate_implied_by(oneQual, indexQuals)) + predExtraQuals = list_concat(predExtraQuals, oneQual); + } + /* list_concat avoids modifying the passed-in indexQuals list */ + selectivityQuals = list_concat(predExtraQuals, indexQuals); + } + else + selectivityQuals = indexQuals; + + /* Estimate the fraction of main-table tuples that will be visited */ + *indexSelectivity = clauselist_selectivity(root, selectivityQuals, + index->rel->relid, + JOIN_INNER, + NULL); + + /* fetch estimated page cost for schema containing index */ + get_tablespace_page_costs(index->reltablespace, + &spc_random_page_cost, + NULL); + + /* + * Generic assumption about index correlation: there isn't any. + */ + *indexCorrelation = 0.0; + + /* + * Examine quals to estimate number of search entries & partial matches + */ + foreach(l, indexQuals) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Expr *clause; + Node *leftop, + *rightop, + *operand; + Oid extractProcOid; + Oid clause_op; + int strategy_op; + Oid lefttype, + righttype; + int32 nentries = 0; + bool *partial_matches = NULL; + Pointer *extra_data = NULL; + int indexcol; + + Assert(IsA(rinfo, RestrictInfo)); + clause = rinfo->clause; + Assert(IsA(clause, OpExpr)); + leftop = get_leftop(clause); + rightop = get_rightop(clause); + clause_op = ((OpExpr *) clause)->opno; + + if ((indexcol = find_index_column(leftop, index)) >= 0) + { + operand = rightop; + } + else if ((indexcol = find_index_column(rightop, index)) >= 0) + { + operand = leftop; + clause_op = get_commutator(clause_op); + } + else + { + elog(ERROR, "Could not match index to operand"); + operand = NULL; /* keep compiler quiet */ + } + + if (IsA(operand, RelabelType)) + operand = (Node *) ((RelabelType *) operand)->arg; + + /* + * It's impossible to call extractQuery method for unknown operand. + * So unless operand is a Const we can't do much; just assume there + * will be one ordinary search entry from the operand at runtime. + */ + if (!IsA(operand, Const)) + { + searchEntriesInQuals++; + continue; + } + + /* If Const is null, there can be no matches */ + if (((Const*) operand)->constisnull) + { + *indexStartupCost = 0; + *indexTotalCost = 0; + *indexSelectivity = 0; + PG_RETURN_VOID(); + } + + /* + * Get the operator's strategy number and declared input data types + * within the index opfamily. + */ + get_op_opfamily_properties(clause_op, index->opfamily[indexcol], + &strategy_op, &lefttype, &righttype); + + /* + * GIN (like GiST) always has lefttype == righttype in pg_amproc + * and they are equal to type Oid on which index was created/designed + */ + extractProcOid = get_opfamily_proc(index->opfamily[indexcol], + lefttype, lefttype, + GIN_EXTRACTQUERY_PROC); + + if (!OidIsValid(extractProcOid)) + { + /* probably shouldn't happen, but cope sanely if so */ + searchEntriesInQuals++; + continue; + } + + OidFunctionCall5(extractProcOid, + ((Const*)operand)->constvalue, + PointerGetDatum(&nentries), + UInt16GetDatum(strategy_op), + PointerGetDatum(&partial_matches), + PointerGetDatum(&extra_data)); + + if (nentries == 0) + { + nfullscan++; + } + else if (nentries < 0) + { + /* + * GIN_EXTRACTQUERY_PROC guarantees that nothing will be found + */ + *indexStartupCost = 0; + *indexTotalCost = 0; + *indexSelectivity = 0; + PG_RETURN_VOID(); + } + else + { + int i; + + for (i=0; irows > 1) + num_scans = outer_rel->rows; + else + num_scans = 1; + + /* + * cost to begin scan, first of all, pay attention to + * pending list. + */ + entryPagesFetched = numPendingPages; + + /* + * Estimate number of entry pages read. We need to do + * searchEntriesInQuals searches. Use a power function as it should be, + * but tuples on leaf pages usually is much greater. + * Here we include all searches in entry tree, including + * search of first entry in partial match algorithm + */ + entryPagesFetched += ceil(searchEntriesInQuals * rint(pow(numEntryPages, 0.15))); + + /* + * Add an estimate of entry pages read by partial match algorithm. + * It's a scan over leaf pages in entry tree. We haven't any useful stats + * here, so estimate it as proportion. + */ + entryPagesFetched += ceil(numEntryPages * partialEntriesInQuals / numEntries); + + /* + * Partial match algorithm reads all data pages before + * doing actual scan, so it's a startup cost. Again, + * we havn't any useful stats here, so, estimate it as + * proportion + */ + dataPagesFetched = ceil(numDataPages * partialEntriesInQuals / numEntries); + + /* calculate cache effects */ + if (num_scans > 1 || searchEntriesInQuals > 1) + { + entryPagesFetched = index_pages_fetched(entryPagesFetched, + (BlockNumber) numEntryPages, + numEntryPages, root); + dataPagesFetched = index_pages_fetched(dataPagesFetched, + (BlockNumber) numDataPages, + numDataPages, root); + } + + /* + * Here we use random page cost because logically-close pages could be + * far apart on disk. + */ + *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost; + + /* cost to scan data pages for each exact (non-partial) matched entry */ + dataPagesFetched = ceil(numDataPages * exactEntriesInQuals / numEntries); + + /* + * Estimate number of data pages read, using selectivity estimation and + * capacity of data page. + */ + dataPagesFetchedBySel = ceil(*indexSelectivity * + (numTuples / (BLCKSZ/SizeOfIptrData))); + + if (dataPagesFetchedBySel > dataPagesFetched) + { + /* + * At least one of entries is very frequent and, unfortunately, + * we couldn't get statistic about entries (only tsvector has + * such statistics). So, we obviously have too small estimation of + * pages fetched from data tree. Re-estimate it from known + * capacity of data pages + */ + dataPagesFetched = dataPagesFetchedBySel; + } + + if (num_scans > 1) + dataPagesFetched = index_pages_fetched(dataPagesFetched, + (BlockNumber) numDataPages, + numDataPages, root); + *indexTotalCost = *indexStartupCost + + dataPagesFetched * spc_random_page_cost; + + /* + * Add on index qual eval costs, much as in genericcostestimate + */ + cost_qual_eval(&index_qual_cost, indexQuals, root); + qual_op_cost = cpu_operator_cost * list_length(indexQuals); + qual_arg_cost = index_qual_cost.startup + + index_qual_cost.per_tuple - qual_op_cost; + if (qual_arg_cost < 0) /* just in case... */ + qual_arg_cost = 0; + + *indexStartupCost += qual_arg_cost; + *indexTotalCost += qual_arg_cost; + *indexTotalCost += ( numTuples * *indexSelectivity ) * (cpu_index_tuple_cost + qual_op_cost); PG_RETURN_VOID(); } diff --git a/src/include/access/gin.h b/src/include/access/gin.h index c67d4182c4..e6db073a05 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -79,6 +79,14 @@ typedef struct GinMetaPageData */ BlockNumber nPendingPages; int64 nPendingHeapTuples; + + /* + * Statistics for planner use (accurate as of last VACUUM) + */ + BlockNumber nTotalPages; + BlockNumber nEntryPages; + BlockNumber nDataPages; + int64 nEntries; } GinMetaPageData; #define GinPageGetMeta(p) \ @@ -94,6 +102,8 @@ typedef struct GinMetaPageData #define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF ) #define GinPageIsData(page) ( GinPageGetOpaque(page)->flags & GIN_DATA ) #define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA ) +#define GinPageIsList(page) ( GinPageGetOpaque(page)->flags & GIN_LIST ) +#define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST ) #define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW ) #define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW ) @@ -362,13 +372,28 @@ extern Datum *extractEntriesSU(GinState *ginstate, OffsetNumber attnum, Datum va extern Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple); extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple); +/* + * GinStatsData represents stats data for planner use + */ +typedef struct GinStatsData +{ + BlockNumber nPendingPages; + BlockNumber nTotalPages; + BlockNumber nEntryPages; + BlockNumber nDataPages; + int64 nEntries; +} GinStatsData; + +extern void ginGetStats(Relation index, GinStatsData *stats); +extern void ginUpdateStats(Relation index, const GinStatsData *stats); + /* gininsert.c */ extern Datum ginbuild(PG_FUNCTION_ARGS); extern Datum gininsert(PG_FUNCTION_ARGS); extern void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, ItemPointerData *items, uint32 nitem, - bool isBuild); + GinStatsData *buildStats); /* ginxlog.c */ extern void gin_redo(XLogRecPtr lsn, XLogRecord *record); @@ -406,6 +431,7 @@ typedef struct GinBtreeData Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, XLogRecData **); void (*fillRoot) (GinBtree, Buffer, Buffer, Buffer); + bool isData; bool searchMode; Relation index; @@ -432,7 +458,8 @@ typedef struct GinBtreeData extern GinBtreeStack *ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno); extern GinBtreeStack *ginFindLeafPage(GinBtree btree, GinBtreeStack *stack); extern void freeGinBtreeStack(GinBtreeStack *stack); -extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack); +extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack, + GinStatsData *buildStats); extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno); /* ginentrypage.c */ @@ -462,8 +489,9 @@ typedef struct extern GinPostingTreeScan *prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode); -extern void insertItemPointer(GinPostingTreeScan *gdi, - ItemPointerData *items, uint32 nitem); +extern void ginInsertItemPointer(GinPostingTreeScan *gdi, + ItemPointerData *items, uint32 nitem, + GinStatsData *buildStats); extern Buffer scanBeginPostingTree(GinPostingTreeScan *gdi); extern void dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); extern void prepareDataScan(GinBtree btree, Relation index);