hash: Refactor hash index creation.

The primary goal here is to move all of the related page modifications
to a single section of code, in preparation for adding write-ahead
logging.  In passing, rename _hash_metapinit to _hash_init, since it
initializes more than just the metapage.

Amit Kapila.  The larger patch series of which this is a part has been
reviewed and tested by Álvaro Herrera, Ashutosh Sharma, Mark Kirkwood,
Jeff Janes, and Jesper Pedersen.
This commit is contained in:
Robert Haas 2017-03-07 17:03:51 -05:00
parent 2b87dd8d7a
commit 38305398cd
4 changed files with 144 additions and 135 deletions

View File

@ -120,7 +120,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac); estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
/* Initialize the hash index metadata page and initial buckets */ /* Initialize the hash index metadata page and initial buckets */
num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM); num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
/* /*
* If we just insert the tuples into the index in scan order, then * If we just insert the tuples into the index in scan order, then
@ -182,7 +182,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
void void
hashbuildempty(Relation index) hashbuildempty(Relation index)
{ {
_hash_metapinit(index, 0, INIT_FORKNUM); _hash_init(index, 0, INIT_FORKNUM);
} }
/* /*

View File

@ -570,68 +570,6 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
} }
/*
* _hash_initbitmap()
*
* Initialize a new bitmap page. The metapage has a write-lock upon
* entering the function, and must be written by caller after return.
*
* 'blkno' is the block number of the new bitmap page.
*
* All bits in the new bitmap page are set to "1", indicating "in use".
*/
void
_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
ForkNumber forkNum)
{
Buffer buf;
Page pg;
HashPageOpaque op;
uint32 *freep;
/*
* It is okay to write-lock the new bitmap page while holding metapage
* write lock, because no one else could be contending for the new page.
* Also, the metapage lock makes it safe to extend the index using
* _hash_getnewbuf.
*
* There is some loss of concurrency in possibly doing I/O for the new
* page while holding the metapage lock, but this path is taken so seldom
* that it's not worth worrying about.
*/
buf = _hash_getnewbuf(rel, blkno, forkNum);
pg = BufferGetPage(buf);
/* initialize the page's special space */
op = (HashPageOpaque) PageGetSpecialPointer(pg);
op->hasho_prevblkno = InvalidBlockNumber;
op->hasho_nextblkno = InvalidBlockNumber;
op->hasho_bucket = -1;
op->hasho_flag = LH_BITMAP_PAGE;
op->hasho_page_id = HASHO_PAGE_ID;
/* set all of the bits to 1 */
freep = HashPageGetBitmap(pg);
MemSet(freep, 0xFF, BMPGSZ_BYTE(metap));
/* dirty the new bitmap page, and release write lock and pin */
MarkBufferDirty(buf);
_hash_relbuf(rel, buf);
/* add the new bitmap page to the metapage's list of bitmaps */
/* metapage already has a write lock */
if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("out of overflow pages in hash index \"%s\"",
RelationGetRelationName(rel))));
metap->hashm_mapp[metap->hashm_nmaps] = blkno;
metap->hashm_nmaps++;
}
/* /*
* _hash_initbitmapbuffer() * _hash_initbitmapbuffer()
* *

View File

@ -156,6 +156,36 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
return buf; return buf;
} }
/*
* _hash_initbuf() -- Get and initialize a buffer by bucket number.
*/
void
_hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag,
bool initpage)
{
HashPageOpaque pageopaque;
Page page;
page = BufferGetPage(buf);
/* initialize the page */
if (initpage)
_hash_pageinit(page, BufferGetPageSize(buf));
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
/*
* Set hasho_prevblkno with current hashm_maxbucket. This value will
* be used to validate cached HashMetaPageData. See
* _hash_getbucketbuf_from_hashkey().
*/
pageopaque->hasho_prevblkno = max_bucket;
pageopaque->hasho_nextblkno = InvalidBlockNumber;
pageopaque->hasho_bucket = num_bucket;
pageopaque->hasho_flag = flag;
pageopaque->hasho_page_id = HASHO_PAGE_ID;
}
/* /*
* _hash_getnewbuf() -- Get a new page at the end of the index. * _hash_getnewbuf() -- Get a new page at the end of the index.
* *
@ -288,7 +318,7 @@ _hash_dropscanbuf(Relation rel, HashScanOpaque so)
/* /*
* _hash_metapinit() -- Initialize the metadata page of a hash index, * _hash_init() -- Initialize the metadata page of a hash index,
* the initial buckets, and the initial bitmap page. * the initial buckets, and the initial bitmap page.
* *
* The initial number of buckets is dependent on num_tuples, an estimate * The initial number of buckets is dependent on num_tuples, an estimate
@ -300,19 +330,18 @@ _hash_dropscanbuf(Relation rel, HashScanOpaque so)
* multiple buffer locks is ignored. * multiple buffer locks is ignored.
*/ */
uint32 uint32
_hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum) _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
{ {
HashMetaPage metap;
HashPageOpaque pageopaque;
Buffer metabuf; Buffer metabuf;
Buffer buf; Buffer buf;
Buffer bitmapbuf;
Page pg; Page pg;
HashMetaPage metap;
RegProcedure procid;
int32 data_width; int32 data_width;
int32 item_width; int32 item_width;
int32 ffactor; int32 ffactor;
double dnumbuckets;
uint32 num_buckets; uint32 num_buckets;
uint32 log2_num_buckets;
uint32 i; uint32 i;
/* safety check */ /* safety check */
@ -334,6 +363,96 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
if (ffactor < 10) if (ffactor < 10)
ffactor = 10; ffactor = 10;
procid = index_getprocid(rel, 1, HASHPROC);
/*
* We initialize the metapage, the first N bucket pages, and the first
* bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
* calls to occur. This ensures that the smgr level has the right idea of
* the physical index length.
*
* Critical section not required, because on error the creation of the
* whole relation will be rolled back.
*/
metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
_hash_init_metabuffer(metabuf, num_tuples, procid, ffactor, false);
MarkBufferDirty(metabuf);
pg = BufferGetPage(metabuf);
metap = HashPageGetMeta(pg);
num_buckets = metap->hashm_maxbucket + 1;
/*
* Release buffer lock on the metapage while we initialize buckets.
* Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
* won't accomplish anything. It's a bad idea to hold buffer locks for
* long intervals in any case, since that can block the bgwriter.
*/
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
/*
* Initialize and WAL Log the first N buckets
*/
for (i = 0; i < num_buckets; i++)
{
BlockNumber blkno;
/* Allow interrupts, in case N is huge */
CHECK_FOR_INTERRUPTS();
blkno = BUCKET_TO_BLKNO(metap, i);
buf = _hash_getnewbuf(rel, blkno, forkNum);
_hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false);
MarkBufferDirty(buf);
_hash_relbuf(rel, buf);
}
/* Now reacquire buffer lock on metapage */
LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
/*
* Initialize bitmap page
*/
bitmapbuf = _hash_getnewbuf(rel, num_buckets + 1, forkNum);
_hash_initbitmapbuffer(bitmapbuf, metap->hashm_bmsize, false);
MarkBufferDirty(bitmapbuf);
/* add the new bitmap page to the metapage's list of bitmaps */
/* metapage already has a write lock */
if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("out of overflow pages in hash index \"%s\"",
RelationGetRelationName(rel))));
metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
metap->hashm_nmaps++;
MarkBufferDirty(metabuf);
/* all done */
_hash_relbuf(rel, bitmapbuf);
_hash_relbuf(rel, metabuf);
return num_buckets;
}
/*
* _hash_init_metabuffer() -- Initialize the metadata page of a hash index.
*/
void
_hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
uint16 ffactor, bool initpage)
{
HashMetaPage metap;
HashPageOpaque pageopaque;
Page page;
double dnumbuckets;
uint32 num_buckets;
uint32 log2_num_buckets;
uint32 i;
/* /*
* Choose the number of initial bucket pages to match the fill factor * Choose the number of initial bucket pages to match the fill factor
* given the estimated number of tuples. We round up the result to the * given the estimated number of tuples. We round up the result to the
@ -353,30 +472,25 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
Assert(num_buckets == (((uint32) 1) << log2_num_buckets)); Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS); Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);
/* page = BufferGetPage(buf);
* We initialize the metapage, the first N bucket pages, and the first if (initpage)
* bitmap page in sequence, using _hash_getnewbuf to cause smgrextend() _hash_pageinit(page, BufferGetPageSize(buf));
* calls to occur. This ensures that the smgr level has the right idea of
* the physical index length.
*/
metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
pg = BufferGetPage(metabuf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
pageopaque->hasho_prevblkno = InvalidBlockNumber; pageopaque->hasho_prevblkno = InvalidBlockNumber;
pageopaque->hasho_nextblkno = InvalidBlockNumber; pageopaque->hasho_nextblkno = InvalidBlockNumber;
pageopaque->hasho_bucket = -1; pageopaque->hasho_bucket = -1;
pageopaque->hasho_flag = LH_META_PAGE; pageopaque->hasho_flag = LH_META_PAGE;
pageopaque->hasho_page_id = HASHO_PAGE_ID; pageopaque->hasho_page_id = HASHO_PAGE_ID;
metap = HashPageGetMeta(pg); metap = HashPageGetMeta(page);
metap->hashm_magic = HASH_MAGIC; metap->hashm_magic = HASH_MAGIC;
metap->hashm_version = HASH_VERSION; metap->hashm_version = HASH_VERSION;
metap->hashm_ntuples = 0; metap->hashm_ntuples = 0;
metap->hashm_nmaps = 0; metap->hashm_nmaps = 0;
metap->hashm_ffactor = ffactor; metap->hashm_ffactor = ffactor;
metap->hashm_bsize = HashGetMaxBitmapSize(pg); metap->hashm_bsize = HashGetMaxBitmapSize(page);
/* find largest bitmap array size that will fit in page size */ /* find largest bitmap array size that will fit in page size */
for (i = _hash_log2(metap->hashm_bsize); i > 0; --i) for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
{ {
@ -393,7 +507,7 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
* pretty useless for normal operation (in fact, hashm_procid is not used * pretty useless for normal operation (in fact, hashm_procid is not used
* anywhere), but it might be handy for forensic purposes so we keep it. * anywhere), but it might be handy for forensic purposes so we keep it.
*/ */
metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); metap->hashm_procid = procid;
/* /*
* We initialize the index with N buckets, 0 .. N-1, occupying physical * We initialize the index with N buckets, 0 .. N-1, occupying physical
@ -411,54 +525,9 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
metap->hashm_ovflpoint = log2_num_buckets; metap->hashm_ovflpoint = log2_num_buckets;
metap->hashm_firstfree = 0; metap->hashm_firstfree = 0;
/* /* Set pd_lower just past the end of the metadata. */
* Release buffer lock on the metapage while we initialize buckets. ((PageHeader) page)->pd_lower =
* Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS ((char *) metap + sizeof(HashMetaPageData)) - (char *) page;
* won't accomplish anything. It's a bad idea to hold buffer locks for
* long intervals in any case, since that can block the bgwriter.
*/
MarkBufferDirty(metabuf);
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
/*
* Initialize the first N buckets
*/
for (i = 0; i < num_buckets; i++)
{
/* Allow interrupts, in case N is huge */
CHECK_FOR_INTERRUPTS();
buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
pg = BufferGetPage(buf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
/*
* Set hasho_prevblkno with current hashm_maxbucket. This value will
* be used to validate cached HashMetaPageData. See
* _hash_getbucketbuf_from_hashkey().
*/
pageopaque->hasho_prevblkno = metap->hashm_maxbucket;
pageopaque->hasho_nextblkno = InvalidBlockNumber;
pageopaque->hasho_bucket = i;
pageopaque->hasho_flag = LH_BUCKET_PAGE;
pageopaque->hasho_page_id = HASHO_PAGE_ID;
MarkBufferDirty(buf);
_hash_relbuf(rel, buf);
}
/* Now reacquire buffer lock on metapage */
LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
/*
* Initialize first bitmap page
*/
_hash_initbitmap(rel, metap, num_buckets + 1, forkNum);
/* all done */
MarkBufferDirty(metabuf);
_hash_relbuf(rel, metabuf);
return num_buckets;
} }
/* /*
@ -535,7 +604,7 @@ restart_expand:
* than a disk block then this would be an independent constraint. * than a disk block then this would be an independent constraint.
* *
* If you change this, see also the maximum initial number of buckets in * If you change this, see also the maximum initial number of buckets in
* _hash_metapinit(). * _hash_init().
*/ */
if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE) if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
goto fail; goto fail;

View File

@ -311,8 +311,6 @@ extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool r
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, extern BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets,
Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy); Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy);
extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
BlockNumber blkno, ForkNumber forkNum);
extern void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage); extern void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage);
extern void _hash_squeezebucket(Relation rel, extern void _hash_squeezebucket(Relation rel,
Bucket bucket, BlockNumber bucket_blkno, Bucket bucket, BlockNumber bucket_blkno,
@ -331,6 +329,8 @@ extern Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey,
int access, int access,
HashMetaPage *cachedmetap); HashMetaPage *cachedmetap);
extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno); extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
extern void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket,
uint32 flag, bool initpage);
extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno,
ForkNumber forkNum); ForkNumber forkNum);
extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
@ -339,8 +339,10 @@ extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
extern void _hash_relbuf(Relation rel, Buffer buf); extern void _hash_relbuf(Relation rel, Buffer buf);
extern void _hash_dropbuf(Relation rel, Buffer buf); extern void _hash_dropbuf(Relation rel, Buffer buf);
extern void _hash_dropscanbuf(Relation rel, HashScanOpaque so); extern void _hash_dropscanbuf(Relation rel, HashScanOpaque so);
extern uint32 _hash_metapinit(Relation rel, double num_tuples, extern uint32 _hash_init(Relation rel, double num_tuples,
ForkNumber forkNum); ForkNumber forkNum);
extern void _hash_init_metabuffer(Buffer buf, double num_tuples,
RegProcedure procid, uint16 ffactor, bool initpage);
extern void _hash_pageinit(Page page, Size size); extern void _hash_pageinit(Page page, Size size);
extern void _hash_expandtable(Relation rel, Buffer metabuf); extern void _hash_expandtable(Relation rel, Buffer metabuf);
extern void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, extern void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf,