Remove _hash_wrtbuf() in favor of calling MarkBufferDirty().

The whole concept of _hash_wrtbuf() is that we need to know at the
time we're releasing the buffer lock (and pin) whether we dirtied the
buffer, but this is easy to get wrong.  This patch actually fixes one
non-obvious bug of that form: hashbucketcleanup forgot to signal
_hash_squeezebucket, which gets the primary bucket page already
locked, as to whether it had already dirtied the page.  Calling
MarkBufferDirty() at the places where we dirty the buffer is more
intuitive and lets us simplify the code in various places as well.

On top of all that, the ultimate goal here is to make hash indexes
WAL-logged, and as the comments to _hash_wrtbuf() note, it should
go away when that happens.  Making it go away a little earlier than
that seems like a good preparatory step.

Report by Jeff Janes.  Diagnosis by Amit Kapila, Kuntal Ghosh,
and Dilip Kumar.  Patch by me, after studying an alternative patch
submitted by Amit Kapila.

Discussion: http://postgr.es/m/CAA4eK1Kf6tOY0oVz_SEdngiNFkeXrA3xUSDPPORQvsWVPdKqnA@mail.gmail.com
This commit is contained in:
Robert Haas 2016-12-16 09:29:21 -05:00
parent 4f5182e18d
commit 25216c9893
5 changed files with 41 additions and 63 deletions

View File

@ -635,7 +635,8 @@ loop_top:
num_index_tuples = metap->hashm_ntuples;
}
_hash_wrtbuf(rel, metabuf);
MarkBufferDirty(metabuf);
_hash_relbuf(rel, metabuf);
/* return statistics */
if (stats == NULL)
@ -724,7 +725,6 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
OffsetNumber deletable[MaxOffsetNumber];
int ndeletable = 0;
bool retain_pin = false;
bool curr_page_dirty = false;
vacuum_delay_point();
@ -805,7 +805,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
{
PageIndexMultiDelete(page, deletable, ndeletable);
bucket_dirty = true;
curr_page_dirty = true;
MarkBufferDirty(buf);
}
/* bail out if there are no more pages to scan. */
@ -820,15 +820,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
* release the lock on previous page after acquiring the lock on next
* page
*/
if (curr_page_dirty)
{
if (retain_pin)
_hash_chgbufaccess(rel, buf, HASH_WRITE, HASH_NOLOCK);
else
_hash_wrtbuf(rel, buf);
curr_page_dirty = false;
}
else if (retain_pin)
if (retain_pin)
_hash_chgbufaccess(rel, buf, HASH_READ, HASH_NOLOCK);
else
_hash_relbuf(rel, buf);
@ -862,6 +854,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page);
bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
MarkBufferDirty(bucket_buf);
}
/*
@ -873,7 +866,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
_hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
bstrategy);
else
_hash_chgbufaccess(rel, bucket_buf, HASH_WRITE, HASH_NOLOCK);
_hash_chgbufaccess(rel, bucket_buf, HASH_READ, HASH_NOLOCK);
}
void

View File

@ -208,11 +208,12 @@ restart_insert:
(void) _hash_pgaddtup(rel, buf, itemsz, itup);
/*
* write and release the modified page. if the page we modified was an
* dirty and release the modified page. if the page we modified was an
* overflow page, we also need to separately drop the pin we retained on
* the primary bucket page.
*/
_hash_wrtbuf(rel, buf);
MarkBufferDirty(buf);
_hash_relbuf(rel, buf);
if (buf != bucket_buf)
_hash_dropbuf(rel, bucket_buf);

View File

@ -149,10 +149,11 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
/* logically chain overflow page to previous page */
pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
MarkBufferDirty(buf);
if ((pageopaque->hasho_flag & LH_BUCKET_PAGE) && retain_pin)
_hash_chgbufaccess(rel, buf, HASH_WRITE, HASH_NOLOCK);
_hash_chgbufaccess(rel, buf, HASH_READ, HASH_NOLOCK);
else
_hash_wrtbuf(rel, buf);
_hash_relbuf(rel, buf);
return ovflbuf;
}
@ -304,7 +305,8 @@ found:
/* mark page "in use" in the bitmap */
SETBIT(freep, bit);
_hash_wrtbuf(rel, mapbuf);
MarkBufferDirty(mapbuf);
_hash_relbuf(rel, mapbuf);
/* Reacquire exclusive lock on the meta page */
_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
@ -416,7 +418,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
* in _hash_pageinit() when the page is reused.)
*/
MemSet(ovflpage, 0, BufferGetPageSize(ovflbuf));
_hash_wrtbuf(rel, ovflbuf);
MarkBufferDirty(ovflbuf);
_hash_relbuf(rel, ovflbuf);
/*
* Fix up the bucket chain. this is a doubly-linked list, so we must fix
@ -445,7 +448,10 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
prevopaque->hasho_nextblkno = nextblkno;
if (prevblkno != writeblkno)
_hash_wrtbuf(rel, prevbuf);
{
MarkBufferDirty(prevbuf);
_hash_relbuf(rel, prevbuf);
}
}
/* write and unlock the write buffer */
@ -466,7 +472,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
Assert(nextopaque->hasho_bucket == bucket);
nextopaque->hasho_prevblkno = prevblkno;
_hash_wrtbuf(rel, nextbuf);
MarkBufferDirty(nextbuf);
_hash_relbuf(rel, nextbuf);
}
/* Note: bstrategy is intentionally not used for metapage and bitmap */
@ -494,7 +501,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
freep = HashPageGetBitmap(mappage);
Assert(ISSET(freep, bitmapbit));
CLRBIT(freep, bitmapbit);
_hash_wrtbuf(rel, mapbuf);
MarkBufferDirty(mapbuf);
_hash_relbuf(rel, mapbuf);
/* Get write-lock on metapage to update firstfree */
_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
@ -503,13 +511,9 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
if (ovflbitno < metap->hashm_firstfree)
{
metap->hashm_firstfree = ovflbitno;
_hash_wrtbuf(rel, metabuf);
}
else
{
/* no need to change metapage */
_hash_relbuf(rel, metabuf);
MarkBufferDirty(metabuf);
}
_hash_relbuf(rel, metabuf);
return nextblkno;
}
@ -559,8 +563,9 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
freep = HashPageGetBitmap(pg);
MemSet(freep, 0xFF, BMPGSZ_BYTE(metap));
/* write out the new bitmap page (releasing write lock and pin) */
_hash_wrtbuf(rel, buf);
/* dirty the new bitmap page, and release write lock and pin */
MarkBufferDirty(buf);
_hash_relbuf(rel, buf);
/* add the new bitmap page to the metapage's list of bitmaps */
/* metapage already has a write lock */
@ -724,13 +729,8 @@ _hash_squeezebucket(Relation rel,
* on next page
*/
if (wbuf_dirty)
{
if (retain_pin)
_hash_chgbufaccess(rel, wbuf, HASH_WRITE, HASH_NOLOCK);
else
_hash_wrtbuf(rel, wbuf);
}
else if (retain_pin)
MarkBufferDirty(wbuf);
if (retain_pin)
_hash_chgbufaccess(rel, wbuf, HASH_READ, HASH_NOLOCK);
else
_hash_relbuf(rel, wbuf);
@ -742,10 +742,9 @@ _hash_squeezebucket(Relation rel,
{
/* Delete tuples we already moved off read page */
PageIndexMultiDelete(rpage, deletable, ndeletable);
_hash_wrtbuf(rel, rbuf);
MarkBufferDirty(rbuf);
}
else
_hash_relbuf(rel, rbuf);
_hash_relbuf(rel, rbuf);
return;
}

View File

@ -289,25 +289,6 @@ _hash_dropscanbuf(Relation rel, HashScanOpaque so)
so->hashso_buc_split = false;
}
/*
* _hash_wrtbuf() -- write a hash page to disk.
*
* This routine releases the lock held on the buffer and our refcount
* for it. It is an error to call _hash_wrtbuf() without a write lock
* and a pin on the buffer.
*
* NOTE: this routine should go away when/if hash indexes are WAL-ified.
* The correct sequence of operations is to mark the buffer dirty, then
* write the WAL record, then release the lock and pin; so marking dirty
* can't be combined with releasing.
*/
void
_hash_wrtbuf(Relation rel, Buffer buf)
{
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
}
/*
* _hash_chgbufaccess() -- Change the lock type on a buffer, without
* dropping our pin on it.
@ -483,7 +464,8 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
pageopaque->hasho_bucket = i;
pageopaque->hasho_flag = LH_BUCKET_PAGE;
pageopaque->hasho_page_id = HASHO_PAGE_ID;
_hash_wrtbuf(rel, buf);
MarkBufferDirty(buf);
_hash_relbuf(rel, buf);
}
/* Now reacquire buffer lock on metapage */
@ -495,7 +477,8 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
_hash_initbitmap(rel, metap, num_buckets + 1, forkNum);
/* all done */
_hash_wrtbuf(rel, metabuf);
MarkBufferDirty(metabuf);
_hash_relbuf(rel, metabuf);
return num_buckets;
}
@ -1075,7 +1058,10 @@ _hash_splitbucket_guts(Relation rel,
if (nbuf == bucket_nbuf)
_hash_chgbufaccess(rel, bucket_nbuf, HASH_WRITE, HASH_NOLOCK);
else
_hash_wrtbuf(rel, nbuf);
{
MarkBufferDirty(nbuf);
_hash_relbuf(rel, nbuf);
}
_hash_chgbufaccess(rel, bucket_obuf, HASH_NOLOCK, HASH_WRITE);
opage = BufferGetPage(bucket_obuf);

View File

@ -336,7 +336,6 @@ extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
extern void _hash_relbuf(Relation rel, Buffer buf);
extern void _hash_dropbuf(Relation rel, Buffer buf);
extern void _hash_dropscanbuf(Relation rel, HashScanOpaque so);
extern void _hash_wrtbuf(Relation rel, Buffer buf);
extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
int to_access);
extern uint32 _hash_metapinit(Relation rel, double num_tuples,