diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index dada2ecd1e..01bb2f4cc1 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -967,6 +967,8 @@ heap_beginscan(Relation relation, Snapshot snapshot, scan->rs_base.rs_flags = flags; scan->rs_base.rs_parallel = parallel_scan; scan->rs_strategy = NULL; /* set in initscan */ + scan->rs_vmbuffer = InvalidBuffer; + scan->rs_empty_tuples_pending = 0; /* * Disable page-at-a-time mode if it's not a MVCC-safe snapshot. @@ -1055,6 +1057,14 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); + if (BufferIsValid(scan->rs_vmbuffer)) + { + ReleaseBuffer(scan->rs_vmbuffer); + scan->rs_vmbuffer = InvalidBuffer; + } + + Assert(scan->rs_empty_tuples_pending == 0); + /* * reinitialize scan descriptor */ @@ -1074,6 +1084,11 @@ heap_endscan(TableScanDesc sscan) if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); + if (BufferIsValid(scan->rs_vmbuffer)) + ReleaseBuffer(scan->rs_vmbuffer); + + Assert(scan->rs_empty_tuples_pending == 0); + /* * decrement relation reference count and free scan descriptor storage */ diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 3e7a6b5548..58de2c82a7 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -27,6 +27,7 @@ #include "access/syncscan.h" #include "access/tableam.h" #include "access/tsmapi.h" +#include "access/visibilitymap.h" #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/index.h" @@ -2198,6 +2199,24 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, hscan->rs_cindex = 0; hscan->rs_ntuples = 0; + /* + * We can skip fetching the heap page if we don't need any fields from the + * heap, the bitmap entries don't need rechecking, and all tuples on the + * page are visible to our transaction. + */ + if (!(scan->rs_flags & SO_NEED_TUPLES) && + !tbmres->recheck && + VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->rs_vmbuffer)) + { + /* can't be lossy in the skip_fetch case */ + Assert(tbmres->ntuples >= 0); + Assert(hscan->rs_empty_tuples_pending >= 0); + + hscan->rs_empty_tuples_pending += tbmres->ntuples; + + return true; + } + /* * Ignore any claimed entries past what we think is the end of the * relation. It may have been extended after the start of our scan (we @@ -2310,6 +2329,16 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan, Page page; ItemId lp; + if (hscan->rs_empty_tuples_pending > 0) + { + /* + * If we don't have to fetch the tuple, just return nulls. + */ + ExecStoreAllNullTuple(slot); + hscan->rs_empty_tuples_pending--; + return true; + } + /* * Out of range? If so, nothing more to look at on this page */ diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 2148a21531..5f768701a5 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -105,16 +105,6 @@ BitmapHeapNext(BitmapHeapScanState *node) */ if (!node->initialized) { - /* - * We can potentially skip fetching heap pages if we do not need any - * columns of the table, either for checking non-indexable quals or - * for returning data. This test is a bit simplistic, as it checks - * the stronger condition that there's no qual or return tlist at all. - * But in most cases it's probably not worth working harder than that. - */ - node->can_skip_fetch = (node->ss.ps.plan->qual == NIL && - node->ss.ps.plan->targetlist == NIL); - if (!pstate) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); @@ -195,10 +185,24 @@ BitmapHeapNext(BitmapHeapScanState *node) */ if (!scan) { + bool need_tuples = false; + + /* + * We can potentially skip fetching heap pages if we do not need + * any columns of the table, either for checking non-indexable + * quals or for returning data. This test is a bit simplistic, as + * it checks the stronger condition that there's no qual or return + * tlist at all. But in most cases it's probably not worth working + * harder than that. + */ + need_tuples = (node->ss.ps.plan->qual != NIL || + node->ss.ps.plan->targetlist != NIL); + scan = table_beginscan_bm(node->ss.ss_currentRelation, node->ss.ps.state->es_snapshot, 0, - NULL); + NULL, + need_tuples); node->ss.ss_currentScanDesc = scan; } @@ -208,7 +212,7 @@ BitmapHeapNext(BitmapHeapScanState *node) for (;;) { - bool skip_fetch; + bool valid_block; CHECK_FOR_INTERRUPTS(); @@ -229,37 +233,14 @@ BitmapHeapNext(BitmapHeapScanState *node) BitmapAdjustPrefetchIterator(node, tbmres); + valid_block = table_scan_bitmap_next_block(scan, tbmres); + if (tbmres->ntuples >= 0) node->exact_pages++; else node->lossy_pages++; - /* - * We can skip fetching the heap page if we don't need any fields - * from the heap, and the bitmap entries don't need rechecking, - * and all tuples on the page are visible to our transaction. - * - * XXX: It's a layering violation that we do these checks above - * tableam, they should probably moved below it at some point. - */ - skip_fetch = (node->can_skip_fetch && - !tbmres->recheck && - VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmres->blockno, - &node->vmbuffer)); - - if (skip_fetch) - { - /* can't be lossy in the skip_fetch case */ - Assert(tbmres->ntuples >= 0); - - /* - * The number of tuples on this page is put into - * node->return_empty_tuples. - */ - node->return_empty_tuples = tbmres->ntuples; - } - else if (!table_scan_bitmap_next_block(scan, tbmres)) + if (!valid_block) { /* AM doesn't think this block is valid, skip */ continue; @@ -302,51 +283,32 @@ BitmapHeapNext(BitmapHeapScanState *node) * should happen only when we have determined there is still something * to do on the current page, else we may uselessly prefetch the same * page we are just about to request for real. - * - * XXX: It's a layering violation that we do these checks above - * tableam, they should probably moved below it at some point. */ BitmapPrefetch(node, scan); - if (node->return_empty_tuples > 0) + /* + * Attempt to fetch tuple from AM. + */ + if (!table_scan_bitmap_next_tuple(scan, tbmres, slot)) { - /* - * If we don't have to fetch the tuple, just return nulls. - */ - ExecStoreAllNullTuple(slot); - - if (--node->return_empty_tuples == 0) - { - /* no more tuples to return in the next round */ - node->tbmres = tbmres = NULL; - } + /* nothing more to look at on this page */ + node->tbmres = tbmres = NULL; + continue; } - else - { - /* - * Attempt to fetch tuple from AM. - */ - if (!table_scan_bitmap_next_tuple(scan, tbmres, slot)) - { - /* nothing more to look at on this page */ - node->tbmres = tbmres = NULL; - continue; - } - /* - * If we are using lossy info, we have to recheck the qual - * conditions at every tuple. - */ - if (tbmres->recheck) + /* + * If we are using lossy info, we have to recheck the qual conditions + * at every tuple. + */ + if (tbmres->recheck) + { + econtext->ecxt_scantuple = slot; + if (!ExecQualAndReset(node->bitmapqualorig, econtext)) { - econtext->ecxt_scantuple = slot; - if (!ExecQualAndReset(node->bitmapqualorig, econtext)) - { - /* Fails recheck, so drop it and loop back for another */ - InstrCountFiltered2(node, 1); - ExecClearTuple(slot); - continue; - } + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + ExecClearTuple(slot); + continue; } } @@ -519,7 +481,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) * it did for the current heap page; which is not a certainty * but is true in many cases. */ - skip_fetch = (node->can_skip_fetch && + skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) && (node->tbmres ? !node->tbmres->recheck : false) && VM_ALL_VISIBLE(node->ss.ss_currentRelation, tbmpre->blockno, @@ -570,7 +532,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) } /* As above, skip prefetch if we expect not to need page */ - skip_fetch = (node->can_skip_fetch && + skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) && (node->tbmres ? !node->tbmres->recheck : false) && VM_ALL_VISIBLE(node->ss.ss_currentRelation, tbmpre->blockno, @@ -640,8 +602,6 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) tbm_end_shared_iterate(node->shared_prefetch_iterator); if (node->tbm) tbm_free(node->tbm); - if (node->vmbuffer != InvalidBuffer) - ReleaseBuffer(node->vmbuffer); if (node->pvmbuffer != InvalidBuffer) ReleaseBuffer(node->pvmbuffer); node->tbm = NULL; @@ -651,7 +611,6 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) node->initialized = false; node->shared_tbmiterator = NULL; node->shared_prefetch_iterator = NULL; - node->vmbuffer = InvalidBuffer; node->pvmbuffer = InvalidBuffer; ExecScanReScan(&node->ss); @@ -696,8 +655,6 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) tbm_end_shared_iterate(node->shared_tbmiterator); if (node->shared_prefetch_iterator) tbm_end_shared_iterate(node->shared_prefetch_iterator); - if (node->vmbuffer != InvalidBuffer) - ReleaseBuffer(node->vmbuffer); if (node->pvmbuffer != InvalidBuffer) ReleaseBuffer(node->pvmbuffer); @@ -741,8 +698,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->tbm = NULL; scanstate->tbmiterator = NULL; scanstate->tbmres = NULL; - scanstate->return_empty_tuples = 0; - scanstate->vmbuffer = InvalidBuffer; scanstate->pvmbuffer = InvalidBuffer; scanstate->exact_pages = 0; scanstate->lossy_pages = 0; @@ -753,7 +708,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->shared_tbmiterator = NULL; scanstate->shared_prefetch_iterator = NULL; scanstate->pstate = NULL; - scanstate->can_skip_fetch = false; /* * Miscellaneous initialization diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 2765efc4e5..750ea30852 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -76,6 +76,16 @@ typedef struct HeapScanDescData */ ParallelBlockTableScanWorkerData *rs_parallelworkerdata; + /* + * These fields are only used for bitmap scans for the "skip fetch" + * optimization. Bitmap scans needing no fields from the heap may skip + * fetching an all visible block, instead using the number of tuples per + * block reported by the bitmap to determine how many NULL-filled tuples + * to return. + */ + Buffer rs_vmbuffer; + int rs_empty_tuples_pending; + /* these fields only used in page-at-a-time mode and for bitmap scans */ int rs_cindex; /* current tuple's index in vistuples */ int rs_ntuples; /* number of visible tuples on page */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index e7eeb75409..be198fa315 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -63,6 +63,13 @@ typedef enum ScanOptions /* unregister snapshot at scan end? */ SO_TEMP_SNAPSHOT = 1 << 9, + + /* + * At the discretion of the table AM, bitmap table scans may be able to + * skip fetching a block from the table if none of the table data is + * needed. If table data may be needed, set SO_NEED_TUPLES. + */ + SO_NEED_TUPLES = 1 << 10, } ScanOptions; /* @@ -937,10 +944,13 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, */ static inline TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, - int nkeys, struct ScanKeyData *key) + int nkeys, struct ScanKeyData *key, bool need_tuple) { uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; + if (need_tuple) + flags |= SO_NEED_TUPLES; + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index e57ebd7288..fa2f70b7a4 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1794,10 +1794,7 @@ typedef struct ParallelBitmapHeapState * tbm bitmap obtained from child index scan(s) * tbmiterator iterator for scanning current pages * tbmres current-page data - * can_skip_fetch can we potentially skip tuple fetches in this scan? - * return_empty_tuples number of empty tuples to return - * vmbuffer buffer for visibility-map lookups - * pvmbuffer ditto, for prefetched pages + * pvmbuffer buffer for visibility-map lookups of prefetched pages * exact_pages total number of exact pages retrieved * lossy_pages total number of lossy pages retrieved * prefetch_iterator iterator for prefetching ahead of current page @@ -1817,9 +1814,6 @@ typedef struct BitmapHeapScanState TIDBitmap *tbm; TBMIterator *tbmiterator; TBMIterateResult *tbmres; - bool can_skip_fetch; - int return_empty_tuples; - Buffer vmbuffer; Buffer pvmbuffer; long exact_pages; long lossy_pages;