diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index f9da24c491..d2a006671a 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -84,6 +84,15 @@ #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ +/* + * When a table has no indexes, vacuum the FSM after every 8GB, approximately + * (it won't be exact because we only vacuum FSM after processing a heap page + * that has some removable tuples). When there are indexes, this is ignored, + * and we vacuum FSM after each index/heap cleaning pass. + */ +#define VACUUM_FSM_EVERY_PAGES \ + ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ)) + /* * Guesstimation of number of dead tuples per page. This is used to * provide an upper limit to memory allocated when vacuuming small @@ -285,9 +294,6 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP); - /* Vacuum the Free Space Map */ - FreeSpaceMapVacuum(onerel); - /* * Update statistics in pg_class. * @@ -465,7 +471,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid; TransactionId relminmxid = onerel->rd_rel->relminmxid; BlockNumber empty_pages, - vacuumed_pages; + vacuumed_pages, + next_fsm_block_to_vacuum; double num_tuples, /* total number of nonremovable tuples */ live_tuples, /* live tuples (reltuples estimate) */ tups_vacuumed, /* tuples cleaned up by vacuum */ @@ -501,6 +508,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, relname))); empty_pages = vacuumed_pages = 0; + next_fsm_block_to_vacuum = (BlockNumber) 0; num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0; indstats = (IndexBulkDeleteResult **) @@ -752,6 +760,13 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, vacrelstats->num_dead_tuples = 0; vacrelstats->num_index_scans++; + /* + * Vacuum the Free Space Map to make newly-freed space visible on + * upper-level FSM pages. Note we have not yet processed blkno. + */ + FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno); + next_fsm_block_to_vacuum = blkno; + /* Report that we are once again scanning the heap */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_SCAN_HEAP); @@ -1200,6 +1215,19 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, */ vacrelstats->num_dead_tuples = 0; vacuumed_pages++; + + /* + * Periodically do incremental FSM vacuuming to make newly-freed + * space visible on upper FSM pages. Note: although we've cleaned + * the current block, we haven't yet updated its FSM entry (that + * happens further down), so passing end == blkno is correct. + */ + if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES) + { + FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, + blkno); + next_fsm_block_to_vacuum = blkno; + } } freespace = PageGetHeapFreeSpace(page); @@ -1368,6 +1396,13 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, vacrelstats->num_index_scans++; } + /* + * Vacuum the remainder of the Free Space Map. We must do this whether or + * not there were indexes. + */ + if (blkno > next_fsm_block_to_vacuum) + FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno); + /* report all blocks vacuumed; and that we're cleaning up */ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, diff --git a/src/backend/storage/freespace/README b/src/backend/storage/freespace/README index bbd1b93fac..e7ff23b76f 100644 --- a/src/backend/storage/freespace/README +++ b/src/backend/storage/freespace/README @@ -180,13 +180,13 @@ have a corrupted page, with a parent somewhere with too small a value. Secondly, if we detect corrupted pages while we search, traversing down the tree. That check will notice if a parent node is set to too high a value. In both cases, the upper nodes on the page are immediately rebuilt, fixing -the corruption. +the corruption so far as that page is concerned. -Vacuum updates all the bottom level pages with correct amount of free space -on the heap pages, fixing any outdated values there. After the heap and -index passes are done, FreeSpaceMapVacuum is called, and the FSM tree is -scanned in depth-first order. This fixes any discrepancies between upper -and lower level FSM pages. +VACUUM updates all the bottom-level FSM pages with the correct amount of free +space on corresponding heap pages, as it proceeds through the heap. This +goes through fsm_set_avail(), so that the upper nodes on those pages are +immediately updated. Periodically, VACUUM calls FreeSpaceMapVacuum[Range] +to propagate the new free-space info into the upper pages of the FSM tree. TODO ---- diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index dd8ae52644..7eb4f3ee93 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -108,7 +108,9 @@ static Size fsm_space_cat_to_avail(uint8 cat); static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, uint8 newValue, uint8 minValue); static BlockNumber fsm_search(Relation rel, uint8 min_cat); -static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof); +static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, + BlockNumber start, BlockNumber end, + bool *eof); static BlockNumber fsm_get_lastblckno(Relation rel, FSMAddress addr); static void fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat); @@ -370,21 +372,48 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) */ if (rel->rd_smgr) rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks; + + /* + * Update upper-level FSM pages to account for the truncation. This is + * important because the just-truncated pages were likely marked as + * all-free, and would be preferentially selected. + */ + FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber); } /* - * FreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM + * FreeSpaceMapVacuum - update upper-level pages in the rel's FSM + * + * We assume that the bottom-level pages have already been updated with + * new free-space information. */ void FreeSpaceMapVacuum(Relation rel) { bool dummy; - /* - * Traverse the tree in depth-first order. The tree is stored physically - * in depth-first order, so this should be pretty I/O efficient. - */ - fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy); + /* Recursively scan the tree, starting at the root */ + (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, + (BlockNumber) 0, InvalidBlockNumber, + &dummy); +} + +/* + * FreeSpaceMapVacuumRange - update upper-level pages in the rel's FSM + * + * As above, but assume that only heap pages between start and end-1 inclusive + * have new free-space information, so update only the upper-level slots + * covering that block range. end == InvalidBlockNumber is equivalent to + * "all the rest of the relation". + */ +void +FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end) +{ + bool dummy; + + /* Recursively scan the tree, starting at the root */ + if (end > start) + (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, start, end, &dummy); } /******** Internal routines ********/ @@ -783,9 +812,21 @@ fsm_search(Relation rel, uint8 min_cat) /* * Recursive guts of FreeSpaceMapVacuum + * + * Examine the FSM page indicated by addr, as well as its children, updating + * upper-level nodes that cover the heap block range from start to end-1. + * (It's okay if end is beyond the actual end of the map.) + * Return the maximum freespace value on this page. + * + * If addr is past the end of the FSM, set *eof_p to true and return 0. + * + * This traverses the tree in depth-first order. The tree is stored + * physically in depth-first order, so this should be pretty I/O efficient. */ static uint8 -fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) +fsm_vacuum_page(Relation rel, FSMAddress addr, + BlockNumber start, BlockNumber end, + bool *eof_p) { Buffer buf; Page page; @@ -804,15 +845,52 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) page = BufferGetPage(buf); /* - * Recurse into children, and fix the information stored about them at - * this level. + * If we're above the bottom level, recurse into children, and fix the + * information stored about them at this level. */ if (addr.level > FSM_BOTTOM_LEVEL) { - int slot; + FSMAddress fsm_start, + fsm_end; + uint16 fsm_start_slot, + fsm_end_slot; + int slot, + start_slot, + end_slot; bool eof = false; - for (slot = 0; slot < SlotsPerFSMPage; slot++) + /* + * Compute the range of slots we need to update on this page, given + * the requested range of heap blocks to consider. The first slot to + * update is the one covering the "start" block, and the last slot is + * the one covering "end - 1". (Some of this work will be duplicated + * in each recursive call, but it's cheap enough to not worry about.) + */ + fsm_start = fsm_get_location(start, &fsm_start_slot); + fsm_end = fsm_get_location(end - 1, &fsm_end_slot); + + while (fsm_start.level < addr.level) + { + fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot); + fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot); + } + Assert(fsm_start.level == addr.level); + + if (fsm_start.logpageno == addr.logpageno) + start_slot = fsm_start_slot; + else if (fsm_start.logpageno > addr.logpageno) + start_slot = SlotsPerFSMPage; /* shouldn't get here... */ + else + start_slot = 0; + + if (fsm_end.logpageno == addr.logpageno) + end_slot = fsm_end_slot; + else if (fsm_end.logpageno > addr.logpageno) + end_slot = SlotsPerFSMPage - 1; + else + end_slot = -1; /* shouldn't get here... */ + + for (slot = start_slot; slot <= end_slot; slot++) { int child_avail; @@ -820,7 +898,9 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) /* After we hit end-of-file, just clear the rest of the slots */ if (!eof) - child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof); + child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), + start, end, + &eof); else child_avail = 0; @@ -835,6 +915,7 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) } } + /* Now get the maximum value on the page, to return to caller */ max_avail = fsm_get_max_avail(BufferGetPage(buf)); /* diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h index a517d7ec43..a2032518ac 100644 --- a/src/include/storage/freespace.h +++ b/src/include/storage/freespace.h @@ -32,6 +32,8 @@ extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks); extern void FreeSpaceMapVacuum(Relation rel); +extern void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, + BlockNumber end); extern void UpdateFreeSpaceMap(Relation rel, BlockNumber startBlkNum, BlockNumber endBlkNum,