diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index d8d4f3b1f5..19d2c529d8 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -245,8 +245,8 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) if (!RelationUsesLocalBuffers(scan->rs_base.rs_rd) && scan->rs_nblocks > NBuffers / 4) { - allow_strat = scan->rs_base.rs_allow_strat; - allow_sync = scan->rs_base.rs_allow_sync; + allow_strat = (scan->rs_base.rs_flags & SO_ALLOW_STRAT) != 0; + allow_sync = (scan->rs_base.rs_flags & SO_ALLOW_SYNC) != 0; } else allow_strat = allow_sync = false; @@ -267,7 +267,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) if (scan->rs_base.rs_parallel != NULL) { /* For parallel scan, believe whatever ParallelTableScanDesc says. */ - scan->rs_base.rs_syncscan = scan->rs_base.rs_parallel->phs_syncscan; + if (scan->rs_base.rs_parallel->phs_syncscan) + scan->rs_base.rs_flags |= SO_ALLOW_SYNC; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; } else if (keep_startblock) { @@ -276,16 +279,19 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * so that rewinding a cursor doesn't generate surprising results. * Reset the active syncscan setting, though. */ - scan->rs_base.rs_syncscan = (allow_sync && synchronize_seqscans); + if (allow_sync && synchronize_seqscans) + scan->rs_base.rs_flags |= SO_ALLOW_SYNC; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; } else if (allow_sync && synchronize_seqscans) { - scan->rs_base.rs_syncscan = true; + scan->rs_base.rs_flags |= SO_ALLOW_SYNC; scan->rs_startblock = ss_get_location(scan->rs_base.rs_rd, scan->rs_nblocks); } else { - scan->rs_base.rs_syncscan = false; + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; scan->rs_startblock = 0; } @@ -305,11 +311,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) memcpy(scan->rs_base.rs_key, key, scan->rs_base.rs_nkeys * sizeof(ScanKeyData)); /* - * Currently, we don't have a stats counter for bitmap heap scans (but the - * underlying bitmap index scans will be counted) or sample scans (we only - * update stats for tuple fetches there) + * Currently, we only have a stats counter for sequential heap scans (but + * e.g for bitmap scans the underlying bitmap index scans will be counted, + * and for sample scans we update stats for tuple fetches). */ - if (!scan->rs_base.rs_bitmapscan && !scan->rs_base.rs_samplescan) + if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN) pgstat_count_heap_scan(scan->rs_base.rs_rd); } @@ -325,7 +331,8 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk HeapScanDesc scan = (HeapScanDesc) sscan; Assert(!scan->rs_inited); /* else too late to change */ - Assert(!scan->rs_base.rs_syncscan); /* else rs_startblock is significant */ + /* else rs_startblock is significant */ + Assert(!(scan->rs_base.rs_flags & SO_ALLOW_SYNC)); /* Check startBlk is valid (but allow case of zero blocks...) */ Assert(startBlk == 0 || startBlk < scan->rs_nblocks); @@ -375,7 +382,7 @@ heapgetpage(TableScanDesc sscan, BlockNumber page) RBM_NORMAL, scan->rs_strategy); scan->rs_cblock = page; - if (!scan->rs_base.rs_pageatatime) + if (!(scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)) return; buffer = scan->rs_cbuf; @@ -574,7 +581,7 @@ heapgettup(HeapScanDesc scan, * time, and much more likely that we'll just bollix things for * forward scanners. */ - scan->rs_base.rs_syncscan = false; + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; /* start from last page of the scan */ if (scan->rs_startblock > 0) page = scan->rs_startblock - 1; @@ -738,7 +745,7 @@ heapgettup(HeapScanDesc scan, * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_base.rs_syncscan) + if (scan->rs_base.rs_flags & SO_ALLOW_SYNC) ss_report_location(scan->rs_base.rs_rd, page); } @@ -885,7 +892,7 @@ heapgettup_pagemode(HeapScanDesc scan, * time, and much more likely that we'll just bollix things for * forward scanners. */ - scan->rs_base.rs_syncscan = false; + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; /* start from last page of the scan */ if (scan->rs_startblock > 0) page = scan->rs_startblock - 1; @@ -1037,7 +1044,7 @@ heapgettup_pagemode(HeapScanDesc scan, * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_base.rs_syncscan) + if (scan->rs_base.rs_flags & SO_ALLOW_SYNC) ss_report_location(scan->rs_base.rs_rd, page); } @@ -1125,12 +1132,7 @@ TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap) + uint32 flags) { HeapScanDesc scan; @@ -1151,33 +1153,39 @@ heap_beginscan(Relation relation, Snapshot snapshot, scan->rs_base.rs_rd = relation; scan->rs_base.rs_snapshot = snapshot; scan->rs_base.rs_nkeys = nkeys; - scan->rs_base.rs_bitmapscan = is_bitmapscan; - scan->rs_base.rs_samplescan = is_samplescan; - scan->rs_strategy = NULL; /* set in initscan */ - scan->rs_base.rs_allow_strat = allow_strat; - scan->rs_base.rs_allow_sync = allow_sync; - scan->rs_base.rs_temp_snap = temp_snap; + scan->rs_base.rs_flags = flags; scan->rs_base.rs_parallel = parallel_scan; + scan->rs_strategy = NULL; /* set in initscan */ /* - * we can use page-at-a-time mode if it's an MVCC-safe snapshot + * Disable page-at-a-time mode if it's not a MVCC-safe snapshot. */ - scan->rs_base.rs_pageatatime = - allow_pagemode && snapshot && IsMVCCSnapshot(snapshot); + if (!(snapshot && IsMVCCSnapshot(snapshot))) + scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE; /* - * For a seqscan in a serializable transaction, acquire a predicate lock - * on the entire relation. This is required not only to lock all the - * matching tuples, but also to conflict with new insertions into the - * table. In an indexscan, we take page locks on the index pages covering - * the range specified in the scan qual, but in a heap scan there is - * nothing more fine-grained to lock. A bitmap scan is a different story, - * there we have already scanned the index and locked the index pages - * covering the predicate. But in that case we still have to lock any - * matching heap tuples. + * For seqscan and sample scans in a serializable transaction, acquire a + * predicate lock on the entire relation. This is required not only to + * lock all the matching tuples, but also to conflict with new insertions + * into the table. In an indexscan, we take page locks on the index pages + * covering the range specified in the scan qual, but in a heap scan there + * is nothing more fine-grained to lock. A bitmap scan is a different + * story, there we have already scanned the index and locked the index + * pages covering the predicate. But in that case we still have to lock + * any matching heap tuples. For sample scan we could optimize the locking + * to be at least page-level granularity, but we'd need to add per-tuple + * locking for that. */ - if (!is_bitmapscan) + if (scan->rs_base.rs_flags & (SO_TYPE_SEQSCAN | SO_TYPE_SAMPLESCAN)) + { + /* + * Ensure a missing snapshot is noticed reliably, even if the + * isolation mode means predicate locking isn't performed (and + * therefore the snapshot isn't used here). + */ + Assert(snapshot); PredicateLockRelation(relation, snapshot); + } /* we only need to set this up once */ scan->rs_ctup.t_tableOid = RelationGetRelid(relation); @@ -1204,10 +1212,21 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, if (set_params) { - scan->rs_base.rs_allow_strat = allow_strat; - scan->rs_base.rs_allow_sync = allow_sync; - scan->rs_base.rs_pageatatime = - allow_pagemode && IsMVCCSnapshot(scan->rs_base.rs_snapshot); + if (allow_strat) + scan->rs_base.rs_flags |= SO_ALLOW_STRAT; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT; + + if (allow_sync) + scan->rs_base.rs_flags |= SO_ALLOW_SYNC; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; + + if (allow_pagemode && scan->rs_base.rs_snapshot && + IsMVCCSnapshot(scan->rs_base.rs_snapshot)) + scan->rs_base.rs_flags |= SO_ALLOW_PAGEMODE; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE; } /* @@ -1246,7 +1265,7 @@ heap_endscan(TableScanDesc sscan) if (scan->rs_strategy != NULL) FreeAccessStrategy(scan->rs_strategy); - if (scan->rs_base.rs_temp_snap) + if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT) UnregisterSnapshot(scan->rs_base.rs_snapshot); pfree(scan); @@ -1288,7 +1307,7 @@ heap_getnext(TableScanDesc sscan, ScanDirection direction) HEAPDEBUG_1; /* heap_getnext( info ) */ - if (scan->rs_base.rs_pageatatime) + if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE) heapgettup_pagemode(scan, direction, scan->rs_base.rs_nkeys, scan->rs_base.rs_key); else @@ -1335,11 +1354,10 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s HEAPAMSLOTDEBUG_1; /* heap_getnextslot( info ) */ - if (scan->rs_base.rs_pageatatime) - heapgettup_pagemode(scan, direction, - scan->rs_base.rs_nkeys, scan->rs_base.rs_key); + if (sscan->rs_flags & SO_ALLOW_PAGEMODE) + heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key); else - heapgettup(scan, direction, scan->rs_base.rs_nkeys, scan->rs_base.rs_key); + heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key); if (scan->rs_ctup.t_data == NULL) { diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 35553c7c92..8d8161fd97 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2323,7 +2323,7 @@ heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_syncscan) + if (scan->rs_flags & SO_ALLOW_SYNC) ss_report_location(scan->rs_rd, blockno); if (blockno == hscan->rs_startblock) @@ -2357,7 +2357,7 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, HeapScanDesc hscan = (HeapScanDesc) scan; TsmRoutine *tsm = scanstate->tsmroutine; BlockNumber blockno = hscan->rs_cblock; - bool pagemode = scan->rs_pageatatime; + bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0; Page page; bool all_visible; @@ -2504,7 +2504,7 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, { HeapScanDesc hscan = (HeapScanDesc) scan; - if (scan->rs_pageatatime) + if (scan->rs_flags & SO_ALLOW_PAGEMODE) { /* * In pageatatime mode, heapgetpage() already did visibility checks, diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 73025b8ead..c3455bc48b 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -93,12 +93,13 @@ table_slot_create(Relation relation, List **reglist) TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key) { + uint32 flags = SO_TYPE_SEQSCAN | + SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE | SO_TEMP_SNAPSHOT; Oid relid = RelationGetRelid(relation); Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key, - NULL, true, true, true, false, - false, true); + NULL, flags); } void @@ -108,7 +109,7 @@ table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot) RegisterSnapshot(snapshot); scan->rs_snapshot = snapshot; - scan->rs_temp_snap = true; + scan->rs_flags |= SO_TEMP_SNAPSHOT; } @@ -156,6 +157,8 @@ TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) { Snapshot snapshot; + uint32 flags = SO_TYPE_SEQSCAN | + SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE; Assert(RelationGetRelid(relation) == parallel_scan->phs_relid); @@ -165,6 +168,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) snapshot = RestoreSnapshot((char *) parallel_scan + parallel_scan->phs_snapshot_off); RegisterSnapshot(snapshot); + flags |= SO_TEMP_SNAPSHOT; } else { @@ -173,9 +177,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) } return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, - parallel_scan, true, true, true, - false, false, - !parallel_scan->phs_snapshot_any); + parallel_scan, flags); } diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 6b8c7020c8..62aaa08eff 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -110,12 +110,7 @@ typedef enum extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap); + uint32 flags); extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk, BlockNumber endBlk); extern void heapgetpage(TableScanDesc scan, BlockNumber page); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 82de4cdcf2..8bb480d322 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -35,13 +35,12 @@ typedef struct TableScanDescData struct SnapshotData *rs_snapshot; /* snapshot to see */ int rs_nkeys; /* number of scan keys */ struct ScanKeyData *rs_key; /* array of scan key descriptors */ - bool rs_bitmapscan; /* true if this is really a bitmap scan */ - bool rs_samplescan; /* true if this is really a sample scan */ - bool rs_pageatatime; /* verify visibility page-at-a-time? */ - bool rs_allow_strat; /* allow or disallow use of access strategy */ - bool rs_allow_sync; /* allow or disallow use of syncscan */ - bool rs_temp_snap; /* unregister snapshot at scan end? */ - bool rs_syncscan; /* report location to syncscan logic? */ + + /* + * Information about type and behaviour of the scan, a bitmask of members + * of the ScanOptions enum (see tableam.h). + */ + uint32 rs_flags; struct ParallelTableScanDescData *rs_parallel; /* parallel scan * information */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 8fbeb02033..06eae2337a 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -39,6 +39,28 @@ struct TBMIterateResult; struct VacuumParams; struct ValidateIndexState; +/* + * Bitmask values for the flags argument to the scan_begin callback. + */ +typedef enum ScanOptions +{ + /* one of SO_TYPE_* may be specified */ + SO_TYPE_SEQSCAN = 1 << 0, + SO_TYPE_BITMAPSCAN = 1 << 1, + SO_TYPE_SAMPLESCAN = 1 << 2, + SO_TYPE_ANALYZE = 1 << 3, + + /* several of SO_ALLOW_* may be specified */ + /* allow or disallow use of access strategy */ + SO_ALLOW_STRAT = 1 << 4, + /* report location to syncscan logic? */ + SO_ALLOW_SYNC = 1 << 5, + /* verify visibility page-at-a-time? */ + SO_ALLOW_PAGEMODE = 1 << 6, + + /* unregister snapshot at scan end? */ + SO_TEMP_SNAPSHOT = 1 << 7 +} ScanOptions; /* * Result codes for table_{update,delete,lock_tuple}, and for visibility @@ -78,7 +100,6 @@ typedef enum TM_Result TM_WouldBlock } TM_Result; - /* * When table_update, table_delete, or table_lock_tuple fail because the target * tuple is already outdated, they fill in this struct to provide information @@ -170,26 +191,17 @@ typedef struct TableAmRoutine * parallelscan_initialize(), and has to be for the same relation. Will * only be set coming from table_beginscan_parallel(). * - * allow_{strat, sync, pagemode} specify whether a scan strategy, - * synchronized scans, or page mode may be used (although not every AM - * will support those). - * - * is_{bitmapscan, samplescan} specify whether the scan is intended to - * support those types of scans. - * - * if temp_snap is true, the snapshot will need to be deallocated at - * scan_end. + * `flags` is a bitmask indicating the type of scan (ScanOptions's + * SO_TYPE_*, currently only one may be specified), options controlling + * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be + * specified, an AM may ignore unsupported ones) and whether the snapshot + * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT). */ TableScanDesc (*scan_begin) (Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap); + uint32 flags); /* * Release resources and deallocate scan. If TableScanDesc.temp_snap, @@ -715,8 +727,10 @@ static inline TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key) { - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, - true, true, true, false, false, false); + uint32 flags = SO_TYPE_SEQSCAN | + SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE; + + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); } /* @@ -738,9 +752,14 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync) { - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, - allow_strat, allow_sync, true, - false, false, false); + uint32 flags = SO_TYPE_SEQSCAN | SO_ALLOW_PAGEMODE; + + if (allow_strat) + flags |= SO_ALLOW_STRAT; + if (allow_sync) + flags |= SO_ALLOW_SYNC; + + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); } /* @@ -753,8 +772,9 @@ static inline TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key) { - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, - false, false, true, true, false, false); + uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; + + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); } /* @@ -770,9 +790,16 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot, bool allow_strat, bool allow_sync, bool allow_pagemode) { - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, - allow_strat, allow_sync, allow_pagemode, - false, true, false); + uint32 flags = SO_TYPE_SAMPLESCAN; + + if (allow_strat) + flags |= SO_ALLOW_STRAT; + if (allow_sync) + flags |= SO_ALLOW_SYNC; + if (allow_pagemode) + flags |= SO_ALLOW_PAGEMODE; + + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); } /* @@ -783,9 +810,9 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot, static inline TableScanDesc table_beginscan_analyze(Relation rel) { - return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, - true, false, true, - false, true, false); + uint32 flags = SO_TYPE_ANALYZE; + + return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags); } /* diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out index e6657a675e..8353d84a55 100644 --- a/src/test/regress/expected/vacuum.out +++ b/src/test/regress/expected/vacuum.out @@ -163,6 +163,14 @@ ERROR: relation "does_not_exist" does not exist VACUUM (SKIP_LOCKED) vactst; VACUUM (SKIP_LOCKED, FULL) vactst; ANALYZE (SKIP_LOCKED) vactst; +-- ensure VACUUM and ANALYZE don't have a problem with serializable +SET default_transaction_isolation = serializable; +VACUUM vactst; +ANALYZE vactst; +RESET default_transaction_isolation; +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +ANALYZE vactst; +COMMIT; DROP TABLE vaccluster; DROP TABLE vactst; DROP TABLE vacparted; diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql index 4fa90940dc..a558580fea 100644 --- a/src/test/regress/sql/vacuum.sql +++ b/src/test/regress/sql/vacuum.sql @@ -124,6 +124,15 @@ VACUUM (SKIP_LOCKED) vactst; VACUUM (SKIP_LOCKED, FULL) vactst; ANALYZE (SKIP_LOCKED) vactst; +-- ensure VACUUM and ANALYZE don't have a problem with serializable +SET default_transaction_isolation = serializable; +VACUUM vactst; +ANALYZE vactst; +RESET default_transaction_isolation; +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +ANALYZE vactst; +COMMIT; + DROP TABLE vaccluster; DROP TABLE vactst; DROP TABLE vacparted;