Set PD_ALL_VISIBLE and visibility map bits in COPY FREEZE
Make sure COPY FREEZE marks the pages as PD_ALL_VISIBLE and updates the visibility map. Until now we only marked individual tuples as frozen, but page-level flags were not updated, so the first VACUUM after the COPY FREEZE had to rewrite the whole table. This is a fairly old patch, and multiple people worked on it. The first version was written by Jeff Janes, and then reworked by Pavan Deolasee and Anastasia Lubennikova. Author: Anastasia Lubennikova, Pavan Deolasee, Jeff Janes Reviewed-by: Kuntal Ghosh, Jeff Janes, Tomas Vondra, Masahiko Sawada, Andres Freund, Ibrar Ahmed, Robert Haas, Tatsuro Ishii, Darafei Praliaskouski Discussion: https://postgr.es/m/CABOikdN-ptGv0mZntrK2Q8OtfUuAjqaYMGmkdU1dCKFtUxVLrg@mail.gmail.com Discussion: https://postgr.es/m/CAMkU%3D1w3osJJ2FneELhhNRLxfZitDgp9FPHee08NT2FQFmz_pQ%40mail.gmail.com
This commit is contained in:
parent
0c7d3bb99f
commit
7db0cd2145
|
@ -179,6 +179,69 @@ select pg_truncate_visibility_map('test_partition');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- test copy freeze
|
||||||
|
create table copyfreeze (a int, b char(1500));
|
||||||
|
-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
|
||||||
|
-- and all-frozen.
|
||||||
|
begin;
|
||||||
|
truncate copyfreeze;
|
||||||
|
copy copyfreeze from stdin freeze;
|
||||||
|
commit;
|
||||||
|
select * from pg_visibility_map('copyfreeze');
|
||||||
|
blkno | all_visible | all_frozen
|
||||||
|
-------+-------------+------------
|
||||||
|
0 | t | t
|
||||||
|
1 | t | t
|
||||||
|
2 | t | t
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
select * from pg_check_frozen('copyfreeze');
|
||||||
|
t_ctid
|
||||||
|
--------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
|
||||||
|
-- which are touched by regular COPY must not be set all-visible/all-frozen. On
|
||||||
|
-- the other hand, pages allocated by COPY FREEZE should be marked
|
||||||
|
-- all-frozen/all-visible.
|
||||||
|
begin;
|
||||||
|
truncate copyfreeze;
|
||||||
|
copy copyfreeze from stdin;
|
||||||
|
copy copyfreeze from stdin freeze;
|
||||||
|
commit;
|
||||||
|
select * from pg_visibility_map('copyfreeze');
|
||||||
|
blkno | all_visible | all_frozen
|
||||||
|
-------+-------------+------------
|
||||||
|
0 | f | f
|
||||||
|
1 | f | f
|
||||||
|
2 | t | t
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
select * from pg_check_frozen('copyfreeze');
|
||||||
|
t_ctid
|
||||||
|
--------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
-- Try a mix of regular COPY and COPY FREEZE.
|
||||||
|
begin;
|
||||||
|
truncate copyfreeze;
|
||||||
|
copy copyfreeze from stdin freeze;
|
||||||
|
copy copyfreeze from stdin;
|
||||||
|
copy copyfreeze from stdin freeze;
|
||||||
|
commit;
|
||||||
|
select * from pg_visibility_map('copyfreeze');
|
||||||
|
blkno | all_visible | all_frozen
|
||||||
|
-------+-------------+------------
|
||||||
|
0 | t | t
|
||||||
|
1 | f | f
|
||||||
|
2 | t | t
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
select * from pg_check_frozen('copyfreeze');
|
||||||
|
t_ctid
|
||||||
|
--------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
-- cleanup
|
-- cleanup
|
||||||
drop table test_partitioned;
|
drop table test_partitioned;
|
||||||
drop view test_view;
|
drop view test_view;
|
||||||
|
@ -188,3 +251,4 @@ drop server dummy_server;
|
||||||
drop foreign data wrapper dummy;
|
drop foreign data wrapper dummy;
|
||||||
drop materialized view matview_visibility_test;
|
drop materialized view matview_visibility_test;
|
||||||
drop table regular_table;
|
drop table regular_table;
|
||||||
|
drop table copyfreeze;
|
||||||
|
|
|
@ -94,6 +94,82 @@ select count(*) > 0 from pg_visibility_map_summary('test_partition');
|
||||||
select * from pg_check_frozen('test_partition'); -- hopefully none
|
select * from pg_check_frozen('test_partition'); -- hopefully none
|
||||||
select pg_truncate_visibility_map('test_partition');
|
select pg_truncate_visibility_map('test_partition');
|
||||||
|
|
||||||
|
-- test copy freeze
|
||||||
|
create table copyfreeze (a int, b char(1500));
|
||||||
|
|
||||||
|
-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
|
||||||
|
-- and all-frozen.
|
||||||
|
begin;
|
||||||
|
truncate copyfreeze;
|
||||||
|
copy copyfreeze from stdin freeze;
|
||||||
|
1 '1'
|
||||||
|
2 '2'
|
||||||
|
3 '3'
|
||||||
|
4 '4'
|
||||||
|
5 '5'
|
||||||
|
6 '6'
|
||||||
|
7 '7'
|
||||||
|
8 '8'
|
||||||
|
9 '9'
|
||||||
|
10 '10'
|
||||||
|
11 '11'
|
||||||
|
12 '12'
|
||||||
|
\.
|
||||||
|
commit;
|
||||||
|
select * from pg_visibility_map('copyfreeze');
|
||||||
|
select * from pg_check_frozen('copyfreeze');
|
||||||
|
|
||||||
|
-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
|
||||||
|
-- which are touched by regular COPY must not be set all-visible/all-frozen. On
|
||||||
|
-- the other hand, pages allocated by COPY FREEZE should be marked
|
||||||
|
-- all-frozen/all-visible.
|
||||||
|
begin;
|
||||||
|
truncate copyfreeze;
|
||||||
|
copy copyfreeze from stdin;
|
||||||
|
1 '1'
|
||||||
|
2 '2'
|
||||||
|
3 '3'
|
||||||
|
4 '4'
|
||||||
|
5 '5'
|
||||||
|
6 '6'
|
||||||
|
\.
|
||||||
|
copy copyfreeze from stdin freeze;
|
||||||
|
7 '7'
|
||||||
|
8 '8'
|
||||||
|
9 '9'
|
||||||
|
10 '10'
|
||||||
|
11 '11'
|
||||||
|
12 '12'
|
||||||
|
\.
|
||||||
|
commit;
|
||||||
|
select * from pg_visibility_map('copyfreeze');
|
||||||
|
select * from pg_check_frozen('copyfreeze');
|
||||||
|
|
||||||
|
-- Try a mix of regular COPY and COPY FREEZE.
|
||||||
|
begin;
|
||||||
|
truncate copyfreeze;
|
||||||
|
copy copyfreeze from stdin freeze;
|
||||||
|
1 '1'
|
||||||
|
2 '2'
|
||||||
|
3 '3'
|
||||||
|
4 '4'
|
||||||
|
5 '5'
|
||||||
|
\.
|
||||||
|
copy copyfreeze from stdin;
|
||||||
|
6 '6'
|
||||||
|
\.
|
||||||
|
copy copyfreeze from stdin freeze;
|
||||||
|
7 '7'
|
||||||
|
8 '8'
|
||||||
|
9 '9'
|
||||||
|
10 '10'
|
||||||
|
11 '11'
|
||||||
|
12 '12'
|
||||||
|
\.
|
||||||
|
commit;
|
||||||
|
select * from pg_visibility_map('copyfreeze');
|
||||||
|
select * from pg_check_frozen('copyfreeze');
|
||||||
|
|
||||||
-- cleanup
|
-- cleanup
|
||||||
drop table test_partitioned;
|
drop table test_partitioned;
|
||||||
drop view test_view;
|
drop view test_view;
|
||||||
|
@ -103,3 +179,4 @@ drop server dummy_server;
|
||||||
drop foreign data wrapper dummy;
|
drop foreign data wrapper dummy;
|
||||||
drop materialized view matview_visibility_test;
|
drop materialized view matview_visibility_test;
|
||||||
drop table regular_table;
|
drop table regular_table;
|
||||||
|
drop table copyfreeze;
|
||||||
|
|
|
@ -2121,6 +2121,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||||
int ndone;
|
int ndone;
|
||||||
PGAlignedBlock scratch;
|
PGAlignedBlock scratch;
|
||||||
Page page;
|
Page page;
|
||||||
|
Buffer vmbuffer = InvalidBuffer;
|
||||||
bool needwal;
|
bool needwal;
|
||||||
Size saveFreeSpace;
|
Size saveFreeSpace;
|
||||||
bool need_tuple_data = RelationIsLogicallyLogged(relation);
|
bool need_tuple_data = RelationIsLogicallyLogged(relation);
|
||||||
|
@ -2175,8 +2176,9 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||||
while (ndone < ntuples)
|
while (ndone < ntuples)
|
||||||
{
|
{
|
||||||
Buffer buffer;
|
Buffer buffer;
|
||||||
Buffer vmbuffer = InvalidBuffer;
|
bool starting_with_empty_page;
|
||||||
bool all_visible_cleared = false;
|
bool all_visible_cleared = false;
|
||||||
|
bool all_frozen_set = false;
|
||||||
int nthispage;
|
int nthispage;
|
||||||
|
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
@ -2184,12 +2186,20 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||||
/*
|
/*
|
||||||
* Find buffer where at least the next tuple will fit. If the page is
|
* Find buffer where at least the next tuple will fit. If the page is
|
||||||
* all-visible, this will also pin the requisite visibility map page.
|
* all-visible, this will also pin the requisite visibility map page.
|
||||||
|
*
|
||||||
|
* Also pin visibility map page if COPY FREEZE inserts tuples into an
|
||||||
|
* empty page. See all_frozen_set below.
|
||||||
*/
|
*/
|
||||||
buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
|
buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
|
||||||
InvalidBuffer, options, bistate,
|
InvalidBuffer, options, bistate,
|
||||||
&vmbuffer, NULL);
|
&vmbuffer, NULL);
|
||||||
page = BufferGetPage(buffer);
|
page = BufferGetPage(buffer);
|
||||||
|
|
||||||
|
starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
|
||||||
|
|
||||||
|
if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
|
||||||
|
all_frozen_set = true;
|
||||||
|
|
||||||
/* NO EREPORT(ERROR) from here till changes are logged */
|
/* NO EREPORT(ERROR) from here till changes are logged */
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
|
@ -2223,7 +2233,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||||
log_heap_new_cid(relation, heaptup);
|
log_heap_new_cid(relation, heaptup);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PageIsAllVisible(page))
|
/*
|
||||||
|
* If the page is all visible, need to clear that, unless we're only
|
||||||
|
* going to add further frozen rows to it.
|
||||||
|
*
|
||||||
|
* If we're only adding already frozen rows to a previously empty
|
||||||
|
* page, mark it as all-visible.
|
||||||
|
*/
|
||||||
|
if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
|
||||||
{
|
{
|
||||||
all_visible_cleared = true;
|
all_visible_cleared = true;
|
||||||
PageClearAllVisible(page);
|
PageClearAllVisible(page);
|
||||||
|
@ -2231,6 +2248,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||||
BufferGetBlockNumber(buffer),
|
BufferGetBlockNumber(buffer),
|
||||||
vmbuffer, VISIBILITYMAP_VALID_BITS);
|
vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||||
}
|
}
|
||||||
|
else if (all_frozen_set)
|
||||||
|
PageSetAllVisible(page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX Should we set PageSetPrunable on this page ? See heap_insert()
|
* XXX Should we set PageSetPrunable on this page ? See heap_insert()
|
||||||
|
@ -2254,8 +2273,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||||
* If the page was previously empty, we can reinit the page
|
* If the page was previously empty, we can reinit the page
|
||||||
* instead of restoring the whole thing.
|
* instead of restoring the whole thing.
|
||||||
*/
|
*/
|
||||||
init = (ItemPointerGetOffsetNumber(&(heaptuples[ndone]->t_self)) == FirstOffsetNumber &&
|
init = starting_with_empty_page;
|
||||||
PageGetMaxOffsetNumber(page) == FirstOffsetNumber + nthispage - 1);
|
|
||||||
|
|
||||||
/* allocate xl_heap_multi_insert struct from the scratch area */
|
/* allocate xl_heap_multi_insert struct from the scratch area */
|
||||||
xlrec = (xl_heap_multi_insert *) scratchptr;
|
xlrec = (xl_heap_multi_insert *) scratchptr;
|
||||||
|
@ -2273,7 +2291,15 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||||
/* the rest of the scratch space is used for tuple data */
|
/* the rest of the scratch space is used for tuple data */
|
||||||
tupledata = scratchptr;
|
tupledata = scratchptr;
|
||||||
|
|
||||||
xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0;
|
/* check that the mutually exclusive flags are not both set */
|
||||||
|
Assert (!(all_visible_cleared && all_frozen_set));
|
||||||
|
|
||||||
|
xlrec->flags = 0;
|
||||||
|
if (all_visible_cleared)
|
||||||
|
xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
|
||||||
|
if (all_frozen_set)
|
||||||
|
xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
|
||||||
|
|
||||||
xlrec->ntuples = nthispage;
|
xlrec->ntuples = nthispage;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2347,13 +2373,40 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||||
|
|
||||||
END_CRIT_SECTION();
|
END_CRIT_SECTION();
|
||||||
|
|
||||||
UnlockReleaseBuffer(buffer);
|
/*
|
||||||
if (vmbuffer != InvalidBuffer)
|
* If we've frozen everything on the page, update the visibilitymap.
|
||||||
ReleaseBuffer(vmbuffer);
|
* We're already holding pin on the vmbuffer.
|
||||||
|
*/
|
||||||
|
if (all_frozen_set)
|
||||||
|
{
|
||||||
|
Assert(PageIsAllVisible(page));
|
||||||
|
Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's fine to use InvalidTransactionId here - this is only used
|
||||||
|
* when HEAP_INSERT_FROZEN is specified, which intentionally
|
||||||
|
* violates visibility rules.
|
||||||
|
*/
|
||||||
|
visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
|
||||||
|
InvalidXLogRecPtr, vmbuffer,
|
||||||
|
InvalidTransactionId,
|
||||||
|
VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
ndone += nthispage;
|
ndone += nthispage;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NB: Only release vmbuffer after inserting all tuples - it's fairly
|
||||||
|
* likely that we'll insert into subsequent heap pages that are likely
|
||||||
|
* to use the same vm page.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We're done with inserting all tuples, so release the last vmbuffer. */
|
||||||
|
if (vmbuffer != InvalidBuffer)
|
||||||
|
ReleaseBuffer(vmbuffer);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We're done with the actual inserts. Check for conflicts again, to
|
* We're done with the actual inserts. Check for conflicts again, to
|
||||||
* ensure that all rw-conflicts in to these inserts are detected. Without
|
* ensure that all rw-conflicts in to these inserts are detected. Without
|
||||||
|
@ -8725,6 +8778,10 @@ heap_xlog_insert(XLogReaderState *record)
|
||||||
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
|
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
|
||||||
PageClearAllVisible(page);
|
PageClearAllVisible(page);
|
||||||
|
|
||||||
|
/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
|
||||||
|
if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
|
||||||
|
PageSetAllVisible(page);
|
||||||
|
|
||||||
MarkBufferDirty(buffer);
|
MarkBufferDirty(buffer);
|
||||||
}
|
}
|
||||||
if (BufferIsValid(buffer))
|
if (BufferIsValid(buffer))
|
||||||
|
@ -8775,6 +8832,10 @@ heap_xlog_multi_insert(XLogReaderState *record)
|
||||||
|
|
||||||
XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
|
XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
|
||||||
|
|
||||||
|
/* check that the mutually exclusive flags are not both set */
|
||||||
|
Assert (!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
|
||||||
|
(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The visibility map may need to be fixed even if the heap page is
|
* The visibility map may need to be fixed even if the heap page is
|
||||||
* already up-to-date.
|
* already up-to-date.
|
||||||
|
|
|
@ -433,6 +433,14 @@ loop:
|
||||||
buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
|
buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
|
||||||
if (PageIsAllVisible(BufferGetPage(buffer)))
|
if (PageIsAllVisible(BufferGetPage(buffer)))
|
||||||
visibilitymap_pin(relation, targetBlock, vmbuffer);
|
visibilitymap_pin(relation, targetBlock, vmbuffer);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the page is empty, pin vmbuffer to set all_frozen bit later.
|
||||||
|
*/
|
||||||
|
if ((options & HEAP_INSERT_FROZEN) &&
|
||||||
|
(PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0))
|
||||||
|
visibilitymap_pin(relation, targetBlock, vmbuffer);
|
||||||
|
|
||||||
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
}
|
}
|
||||||
else if (otherBlock == targetBlock)
|
else if (otherBlock == targetBlock)
|
||||||
|
@ -619,6 +627,15 @@ loop:
|
||||||
PageInit(page, BufferGetPageSize(buffer), 0);
|
PageInit(page, BufferGetPageSize(buffer), 0);
|
||||||
MarkBufferDirty(buffer);
|
MarkBufferDirty(buffer);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The page is empty, pin vmbuffer to set all_frozen bit.
|
||||||
|
*/
|
||||||
|
if (options & HEAP_INSERT_FROZEN)
|
||||||
|
{
|
||||||
|
Assert(PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0);
|
||||||
|
visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release the file-extension lock; it's now OK for someone else to extend
|
* Release the file-extension lock; it's now OK for someone else to extend
|
||||||
* the relation some more.
|
* the relation some more.
|
||||||
|
|
|
@ -69,6 +69,9 @@
|
||||||
#define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3)
|
#define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3)
|
||||||
#define XLH_INSERT_ON_TOAST_RELATION (1<<4)
|
#define XLH_INSERT_ON_TOAST_RELATION (1<<4)
|
||||||
|
|
||||||
|
/* all_frozen_set always implies all_visible_set */
|
||||||
|
#define XLH_INSERT_ALL_FROZEN_SET (1<<5)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* xl_heap_update flag values, 8 bits are available.
|
* xl_heap_update flag values, 8 bits are available.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue