diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 6035b4dfd4..b885f2a3a6 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -39,6 +39,7 @@ #include "access/relscan.h" #include "access/transam.h" +#include "access/visibilitymap.h" #include "executor/execdebug.h" #include "executor/nodeBitmapHeapscan.h" #include "miscadmin.h" @@ -225,9 +226,31 @@ BitmapHeapNext(BitmapHeapScanState *node) } /* - * Fetch the current heap page and identify candidate tuples. + * We can skip fetching the heap page if we don't need any fields + * from the heap, and the bitmap entries don't need rechecking, + * and all tuples on the page are visible to our transaction. */ - bitgetpage(scan, tbmres); + node->skip_fetch = (node->can_skip_fetch && + !tbmres->recheck && + VM_ALL_VISIBLE(node->ss.ss_currentRelation, + tbmres->blockno, + &node->vmbuffer)); + + if (node->skip_fetch) + { + /* + * The number of tuples on this page is put into + * scan->rs_ntuples; note we don't fill scan->rs_vistuples. + */ + scan->rs_ntuples = tbmres->ntuples; + } + else + { + /* + * Fetch the current heap page and identify candidate tuples. + */ + bitgetpage(scan, tbmres); + } if (tbmres->ntuples >= 0) node->exact_pages++; @@ -289,45 +312,55 @@ BitmapHeapNext(BitmapHeapScanState *node) */ BitmapPrefetch(node, scan); - /* - * Okay to fetch the tuple - */ - targoffset = scan->rs_vistuples[scan->rs_cindex]; - dp = (Page) BufferGetPage(scan->rs_cbuf); - lp = PageGetItemId(dp, targoffset); - Assert(ItemIdIsNormal(lp)); - - scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); - scan->rs_ctup.t_len = ItemIdGetLength(lp); - scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; - ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); - - pgstat_count_heap_fetch(scan->rs_rd); - - /* - * Set up the result slot to point to this tuple. Note that the slot - * acquires a pin on the buffer. - */ - ExecStoreTuple(&scan->rs_ctup, - slot, - scan->rs_cbuf, - false); - - /* - * If we are using lossy info, we have to recheck the qual conditions - * at every tuple. - */ - if (tbmres->recheck) + if (node->skip_fetch) { - econtext->ecxt_scantuple = slot; - ResetExprContext(econtext); + /* + * If we don't have to fetch the tuple, just return nulls. + */ + ExecStoreAllNullTuple(slot); + } + else + { + /* + * Okay to fetch the tuple. + */ + targoffset = scan->rs_vistuples[scan->rs_cindex]; + dp = (Page) BufferGetPage(scan->rs_cbuf); + lp = PageGetItemId(dp, targoffset); + Assert(ItemIdIsNormal(lp)); - if (!ExecQual(node->bitmapqualorig, econtext)) + scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); + scan->rs_ctup.t_len = ItemIdGetLength(lp); + scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; + ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); + + pgstat_count_heap_fetch(scan->rs_rd); + + /* + * Set up the result slot to point to this tuple. Note that the + * slot acquires a pin on the buffer. + */ + ExecStoreTuple(&scan->rs_ctup, + slot, + scan->rs_cbuf, + false); + + /* + * If we are using lossy info, we have to recheck the qual + * conditions at every tuple. + */ + if (tbmres->recheck) { - /* Fails recheck, so drop it and loop back for another */ - InstrCountFiltered2(node, 1); - ExecClearTuple(slot); - continue; + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + + if (!ExecQual(node->bitmapqualorig, econtext)) + { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + ExecClearTuple(slot); + continue; + } } } @@ -582,6 +615,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan) while (node->prefetch_pages < node->prefetch_target) { TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); + bool skip_fetch; if (tbmpre == NULL) { @@ -591,7 +625,26 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan) break; } node->prefetch_pages++; - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); + + /* + * If we expect not to have to actually read this heap page, + * skip this prefetch call, but continue to run the prefetch + * logic normally. (Would it be better not to increment + * prefetch_pages?) + * + * This depends on the assumption that the index AM will + * report the same recheck flag for this future heap page as + * it did for the current heap page; which is not a certainty + * but is true in many cases. + */ + skip_fetch = (node->can_skip_fetch && + (node->tbmres ? !node->tbmres->recheck : false) && + VM_ALL_VISIBLE(node->ss.ss_currentRelation, + tbmpre->blockno, + &node->pvmbuffer)); + + if (!skip_fetch) + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } @@ -608,6 +661,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan) { TBMIterateResult *tbmpre; bool do_prefetch = false; + bool skip_fetch; /* * Recheck under the mutex. If some other process has already @@ -633,7 +687,15 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan) break; } - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); + /* As above, skip prefetch if we expect not to need page */ + skip_fetch = (node->can_skip_fetch && + (node->tbmres ? !node->tbmres->recheck : false) && + VM_ALL_VISIBLE(node->ss.ss_currentRelation, + tbmpre->blockno, + &node->pvmbuffer)); + + if (!skip_fetch) + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } } @@ -687,6 +749,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) /* rescan to release any page pin */ heap_rescan(node->ss.ss_currentScanDesc, NULL); + /* release bitmaps and buffers if any */ if (node->tbmiterator) tbm_end_iterate(node->tbmiterator); if (node->prefetch_iterator) @@ -697,6 +760,10 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) tbm_end_shared_iterate(node->shared_prefetch_iterator); if (node->tbm) tbm_free(node->tbm); + if (node->vmbuffer != InvalidBuffer) + ReleaseBuffer(node->vmbuffer); + if (node->pvmbuffer != InvalidBuffer) + ReleaseBuffer(node->pvmbuffer); node->tbm = NULL; node->tbmiterator = NULL; node->tbmres = NULL; @@ -704,6 +771,8 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) node->initialized = false; node->shared_tbmiterator = NULL; node->shared_prefetch_iterator = NULL; + node->vmbuffer = InvalidBuffer; + node->pvmbuffer = InvalidBuffer; ExecScanReScan(&node->ss); @@ -748,7 +817,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) ExecEndNode(outerPlanState(node)); /* - * release bitmap if any + * release bitmaps and buffers if any */ if (node->tbmiterator) tbm_end_iterate(node->tbmiterator); @@ -760,6 +829,10 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) tbm_end_shared_iterate(node->shared_tbmiterator); if (node->shared_prefetch_iterator) tbm_end_shared_iterate(node->shared_prefetch_iterator); + if (node->vmbuffer != InvalidBuffer) + ReleaseBuffer(node->vmbuffer); + if (node->pvmbuffer != InvalidBuffer) + ReleaseBuffer(node->pvmbuffer); /* * close heap scan @@ -805,6 +878,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->tbm = NULL; scanstate->tbmiterator = NULL; scanstate->tbmres = NULL; + scanstate->skip_fetch = false; + scanstate->vmbuffer = InvalidBuffer; + scanstate->pvmbuffer = InvalidBuffer; scanstate->exact_pages = 0; scanstate->lossy_pages = 0; scanstate->prefetch_iterator = NULL; @@ -815,8 +891,19 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->pscan_len = 0; scanstate->initialized = false; scanstate->shared_tbmiterator = NULL; + scanstate->shared_prefetch_iterator = NULL; scanstate->pstate = NULL; + /* + * We can potentially skip fetching heap pages if we do not need any + * columns of the table, either for checking non-indexable quals or for + * returning data. This test is a bit simplistic, as it checks the + * stronger condition that there's no qual or return tlist at all. But in + * most cases it's probably not worth working harder than that. + */ + scanstate->can_skip_fetch = (node->scan.plan.qual == NIL && + node->scan.plan.targetlist == NIL); + /* * Miscellaneous initialization * diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index c802d61c39..4b497486a0 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -807,6 +807,15 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags) if (IsA(path, CustomPath)) return false; + /* + * If a bitmap scan's tlist is empty, keep it as-is. This may allow the + * executor to skip heap page fetches, and in any case, the benefit of + * using a physical tlist instead would be minimal. + */ + if (IsA(path, BitmapHeapPath) && + path->pathtarget->exprs == NIL) + return false; + /* * Can't do it if any system columns or whole-row Vars are requested. * (This could possibly be fixed but would take some fragile assumptions diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 8698c8a50c..d209ec012c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -507,7 +507,7 @@ typedef struct EState bool *es_epqTupleSet; /* true if EPQ tuple is provided */ bool *es_epqScanDone; /* true if EPQ tuple has been fetched */ - bool es_use_parallel_mode; /* can we use parallel workers? */ + bool es_use_parallel_mode; /* can we use parallel workers? */ /* The per-query shared memory area to use for parallel execution. */ struct dsa_area *es_query_dsa; @@ -1331,6 +1331,10 @@ typedef struct ParallelBitmapHeapState * tbm bitmap obtained from child index scan(s) * tbmiterator iterator for scanning current pages * tbmres current-page data + * can_skip_fetch can we potentially skip tuple fetches in this scan? + * skip_fetch are we skipping tuple fetches on this page? + * vmbuffer buffer for visibility-map lookups + * pvmbuffer ditto, for prefetched pages * exact_pages total number of exact pages retrieved * lossy_pages total number of lossy pages retrieved * prefetch_iterator iterator for prefetching ahead of current page @@ -1351,6 +1355,10 @@ typedef struct BitmapHeapScanState TIDBitmap *tbm; TBMIterator *tbmiterator; TBMIterateResult *tbmres; + bool can_skip_fetch; + bool skip_fetch; + Buffer vmbuffer; + Buffer pvmbuffer; long exact_pages; long lossy_pages; TBMIterator *prefetch_iterator; diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index fc91f3ce36..991c287b11 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -136,12 +136,15 @@ SELECT count(*) FROM tenk2; (1 row) -- do an indexscan +-- make sure it is not a bitmap scan, which might skip fetching heap tuples +SET enable_bitmapscan TO off; SELECT count(*) FROM tenk2 WHERE unique1 = 1; count ------- 1 (1 row) +RESET enable_bitmapscan; -- We can't just call wait_for_stats() at this point, because we only -- transmit stats when the session goes idle, and we probably didn't -- transmit the last couple of counts yet thanks to the rate-limiting logic diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index 6e882bf3ac..2be7dde834 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -138,7 +138,10 @@ ROLLBACK; -- do a seqscan SELECT count(*) FROM tenk2; -- do an indexscan +-- make sure it is not a bitmap scan, which might skip fetching heap tuples +SET enable_bitmapscan TO off; SELECT count(*) FROM tenk2 WHERE unique1 = 1; +RESET enable_bitmapscan; -- We can't just call wait_for_stats() at this point, because we only -- transmit stats when the session goes idle, and we probably didn't