1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* hashinsert.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* Item insertion in hash tables for Postgres.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
1999-07-16 01:04:24 +02:00
|
|
|
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.17 1999/07/15 23:02:54 momjian Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "postgres.h"
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "access/hash.h"
|
1996-10-20 08:34:30 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, ScanKey scankey, HashItem hitem, Buffer metabuf);
|
|
|
|
static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem);
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* _hash_doinsert() -- Handle insertion of a single HashItem in the table.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* This routine is called by the public interface routines, hashbuild
|
|
|
|
* and hashinsert. By here, hashitem is filled in, and has a unique
|
|
|
|
* (xid, seqno) pair. The datum to be used as a "key" is in the
|
|
|
|
* hashitem.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
InsertIndexResult
|
|
|
|
_hash_doinsert(Relation rel, HashItem hitem)
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Buffer buf;
|
|
|
|
Buffer metabuf;
|
|
|
|
BlockNumber blkno;
|
|
|
|
HashMetaPage metap;
|
|
|
|
IndexTuple itup;
|
1997-09-07 07:04:48 +02:00
|
|
|
InsertIndexResult res;
|
1997-09-08 04:41:22 +02:00
|
|
|
ScanKey itup_scankey;
|
|
|
|
int natts;
|
|
|
|
Page page;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
|
|
|
|
metap = (HashMetaPage) BufferGetPage(metabuf);
|
|
|
|
_hash_checkpage((Page) metap, LH_META_PAGE);
|
|
|
|
|
|
|
|
/* we need a scan key to do our search, so build one */
|
|
|
|
itup = &(hitem->hash_itup);
|
|
|
|
if ((natts = rel->rd_rel->relnatts) != 1)
|
1998-01-07 22:07:04 +01:00
|
|
|
elog(ERROR, "Hash indices valid for only one index key.");
|
1997-09-07 07:04:48 +02:00
|
|
|
itup_scankey = _hash_mkscankey(rel, itup, metap);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* find the first page in the bucket chain containing this key and
|
|
|
|
* place it in buf. _hash_search obtains a read lock for us.
|
|
|
|
*/
|
|
|
|
_hash_search(rel, natts, itup_scankey, &buf, metap);
|
|
|
|
page = BufferGetPage(buf);
|
|
|
|
_hash_checkpage(page, LH_BUCKET_PAGE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* trade in our read lock for a write lock so that we can do the
|
|
|
|
* insertion.
|
|
|
|
*/
|
|
|
|
blkno = BufferGetBlockNumber(buf);
|
|
|
|
_hash_relbuf(rel, buf, HASH_READ);
|
|
|
|
buf = _hash_getbuf(rel, blkno, HASH_WRITE);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX btree comment (haven't decided what to do in hash): don't think
|
|
|
|
* the bucket can be split while we're reading the metapage.
|
|
|
|
*
|
|
|
|
* If the page was split between the time that we surrendered our read
|
|
|
|
* lock and acquired our write lock, then this page may no longer be
|
|
|
|
* the right place for the key we want to insert.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* do the insertion */
|
|
|
|
res = _hash_insertonpg(rel, buf, natts, itup_scankey,
|
|
|
|
hitem, metabuf);
|
|
|
|
|
|
|
|
/* be tidy */
|
|
|
|
_hash_freeskey(itup_scankey);
|
|
|
|
|
1998-09-01 05:29:17 +02:00
|
|
|
return res;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* _hash_insertonpg() -- Insert a tuple on a particular page in the table.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* This recursive procedure does the following things:
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* + if necessary, splits the target page.
|
|
|
|
* + inserts the tuple.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* On entry, we must have the right buffer on which to do the
|
|
|
|
* insertion, and the buffer must be pinned and locked. On return,
|
|
|
|
* we will have dropped both the pin and the write lock on the buffer.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
static InsertIndexResult
|
1996-07-09 08:22:35 +02:00
|
|
|
_hash_insertonpg(Relation rel,
|
1997-09-07 07:04:48 +02:00
|
|
|
Buffer buf,
|
|
|
|
int keysz,
|
|
|
|
ScanKey scankey,
|
|
|
|
HashItem hitem,
|
|
|
|
Buffer metabuf)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-07 07:04:48 +02:00
|
|
|
InsertIndexResult res;
|
1997-09-08 04:41:22 +02:00
|
|
|
Page page;
|
|
|
|
BlockNumber itup_blkno;
|
|
|
|
OffsetNumber itup_off;
|
|
|
|
int itemsz;
|
|
|
|
HashPageOpaque pageopaque;
|
|
|
|
bool do_expand = false;
|
|
|
|
Buffer ovflbuf;
|
|
|
|
HashMetaPage metap;
|
|
|
|
Bucket bucket;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
metap = (HashMetaPage) BufferGetPage(metabuf);
|
|
|
|
_hash_checkpage((Page) metap, LH_META_PAGE);
|
|
|
|
|
|
|
|
page = BufferGetPage(buf);
|
|
|
|
_hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
1996-07-09 08:22:35 +02:00
|
|
|
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
1997-09-07 07:04:48 +02:00
|
|
|
bucket = pageopaque->hasho_bucket;
|
|
|
|
|
|
|
|
itemsz = IndexTupleDSize(hitem->hash_itup)
|
|
|
|
+ (sizeof(HashItemData) - sizeof(IndexTupleData));
|
|
|
|
itemsz = DOUBLEALIGN(itemsz);
|
|
|
|
|
|
|
|
while (PageGetFreeSpace(page) < itemsz)
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* no space on this page; check for an overflow page
|
|
|
|
*/
|
|
|
|
if (BlockNumberIsValid(pageopaque->hasho_nextblkno))
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ovfl page exists; go get it. if it doesn't have room,
|
|
|
|
* we'll find out next pass through the loop test above.
|
|
|
|
*/
|
|
|
|
ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno,
|
|
|
|
HASH_WRITE);
|
|
|
|
_hash_relbuf(rel, buf, HASH_WRITE);
|
|
|
|
buf = ovflbuf;
|
|
|
|
page = BufferGetPage(buf);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* we're at the end of the bucket chain and we haven't found a
|
|
|
|
* page with enough room. allocate a new overflow page.
|
|
|
|
*/
|
|
|
|
do_expand = true;
|
|
|
|
ovflbuf = _hash_addovflpage(rel, &metabuf, buf);
|
|
|
|
_hash_relbuf(rel, buf, HASH_WRITE);
|
|
|
|
buf = ovflbuf;
|
|
|
|
page = BufferGetPage(buf);
|
|
|
|
|
|
|
|
if (PageGetFreeSpace(page) < itemsz)
|
|
|
|
{
|
|
|
|
/* it doesn't fit on an empty page -- give up */
|
1998-01-07 22:07:04 +01:00
|
|
|
elog(ERROR, "hash item too large");
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
_hash_checkpage(page, LH_OVERFLOW_PAGE);
|
|
|
|
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
Assert(pageopaque->hasho_bucket == bucket);
|
|
|
|
}
|
|
|
|
|
|
|
|
itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem);
|
|
|
|
itup_blkno = BufferGetBlockNumber(buf);
|
|
|
|
|
|
|
|
/* by here, the new tuple is inserted */
|
|
|
|
res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
|
|
|
|
|
|
|
|
ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
|
|
|
|
|
|
|
|
if (res != NULL)
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Increment the number of keys in the table. We switch lock
|
|
|
|
* access type just for a moment to allow greater accessibility to
|
|
|
|
* the metapage.
|
|
|
|
*/
|
|
|
|
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
|
|
|
|
HASH_READ, HASH_WRITE);
|
|
|
|
metap->hashm_nkeys += 1;
|
|
|
|
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
|
|
|
|
HASH_WRITE, HASH_READ);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
_hash_wrtbuf(rel, buf);
|
|
|
|
|
|
|
|
if (do_expand ||
|
|
|
|
(metap->hashm_nkeys / (metap->hashm_maxbucket + 1))
|
|
|
|
> metap->hashm_ffactor)
|
|
|
|
_hash_expandtable(rel, metabuf);
|
|
|
|
_hash_relbuf(rel, metabuf, HASH_READ);
|
1998-09-01 05:29:17 +02:00
|
|
|
return res;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* _hash_pgaddtup() -- add a tuple to a particular page in the index.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* This routine adds the tuple to the page as requested, and keeps the
|
|
|
|
* write lock and reference associated with the page's buffer. It is
|
|
|
|
* an error to call pgaddtup() without a write lock and reference.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
static OffsetNumber
|
1996-07-09 08:22:35 +02:00
|
|
|
_hash_pgaddtup(Relation rel,
|
1997-09-07 07:04:48 +02:00
|
|
|
Buffer buf,
|
|
|
|
int keysz,
|
|
|
|
ScanKey itup_scankey,
|
|
|
|
Size itemsize,
|
|
|
|
HashItem hitem)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
OffsetNumber itup_off;
|
|
|
|
Page page;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
page = BufferGetPage(buf);
|
|
|
|
_hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
|
|
|
|
|
|
|
itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
|
|
|
PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED);
|
|
|
|
|
|
|
|
/* write the buffer, but hold our lock */
|
|
|
|
_hash_wrtnorelbuf(rel, buf);
|
|
|
|
|
1998-09-01 05:29:17 +02:00
|
|
|
return itup_off;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|