718 lines
18 KiB
C
718 lines
18 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* hash.c
|
|
* Implementation of Margo Seltzer's Hashing package for postgres.
|
|
*
|
|
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/access/hash/hash.c
|
|
*
|
|
* NOTES
|
|
* This file contains only the public interface routines.
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/hash.h"
|
|
#include "access/relscan.h"
|
|
#include "catalog/index.h"
|
|
#include "commands/vacuum.h"
|
|
#include "optimizer/cost.h"
|
|
#include "optimizer/plancat.h"
|
|
#include "storage/bufmgr.h"
|
|
|
|
|
|
/* Working state for hashbuild and its callback */
|
|
typedef struct
|
|
{
|
|
HSpool *spool; /* NULL if not using spooling */
|
|
double indtuples; /* # tuples accepted into index */
|
|
} HashBuildState;
|
|
|
|
static void hashbuildCallback(Relation index,
|
|
HeapTuple htup,
|
|
Datum *values,
|
|
bool *isnull,
|
|
bool tupleIsAlive,
|
|
void *state);
|
|
|
|
|
|
/*
|
|
* hashbuild() -- build a new hash index.
|
|
*/
|
|
Datum
|
|
hashbuild(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation heap = (Relation) PG_GETARG_POINTER(0);
|
|
Relation index = (Relation) PG_GETARG_POINTER(1);
|
|
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
|
|
IndexBuildResult *result;
|
|
BlockNumber relpages;
|
|
double reltuples;
|
|
uint32 num_buckets;
|
|
HashBuildState buildstate;
|
|
|
|
/*
|
|
* We expect to be called exactly once for any index relation. If that's
|
|
* not the case, big trouble's what we have.
|
|
*/
|
|
if (RelationGetNumberOfBlocks(index) != 0)
|
|
elog(ERROR, "index \"%s\" already contains data",
|
|
RelationGetRelationName(index));
|
|
|
|
/* Estimate the number of rows currently present in the table */
|
|
estimate_rel_size(heap, NULL, &relpages, &reltuples);
|
|
|
|
/* Initialize the hash index metadata page and initial buckets */
|
|
num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);
|
|
|
|
/*
|
|
* If we just insert the tuples into the index in scan order, then
|
|
* (assuming their hash codes are pretty random) there will be no locality
|
|
* of access to the index, and if the index is bigger than available RAM
|
|
* then we'll thrash horribly. To prevent that scenario, we can sort the
|
|
* tuples by (expected) bucket number. However, such a sort is useless
|
|
* overhead when the index does fit in RAM. We choose to sort if the
|
|
* initial index size exceeds NBuffers.
|
|
*
|
|
* NOTE: this test will need adjustment if a bucket is ever different from
|
|
* one page.
|
|
*/
|
|
if (num_buckets >= (uint32) NBuffers)
|
|
buildstate.spool = _h_spoolinit(index, num_buckets);
|
|
else
|
|
buildstate.spool = NULL;
|
|
|
|
/* prepare to build the index */
|
|
buildstate.indtuples = 0;
|
|
|
|
/* do the heap scan */
|
|
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
|
|
hashbuildCallback, (void *) &buildstate);
|
|
|
|
if (buildstate.spool)
|
|
{
|
|
/* sort the tuples and insert them into the index */
|
|
_h_indexbuild(buildstate.spool);
|
|
_h_spooldestroy(buildstate.spool);
|
|
}
|
|
|
|
/*
|
|
* Return statistics
|
|
*/
|
|
result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
|
|
|
|
result->heap_tuples = reltuples;
|
|
result->index_tuples = buildstate.indtuples;
|
|
|
|
PG_RETURN_POINTER(result);
|
|
}
|
|
|
|
/*
|
|
* hashbuildempty() -- build an empty hash index in the initialization fork
|
|
*/
|
|
Datum
|
|
hashbuildempty(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation index = (Relation) PG_GETARG_POINTER(0);
|
|
|
|
_hash_metapinit(index, 0, INIT_FORKNUM);
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* Per-tuple callback from IndexBuildHeapScan
|
|
*/
|
|
static void
|
|
hashbuildCallback(Relation index,
|
|
HeapTuple htup,
|
|
Datum *values,
|
|
bool *isnull,
|
|
bool tupleIsAlive,
|
|
void *state)
|
|
{
|
|
HashBuildState *buildstate = (HashBuildState *) state;
|
|
IndexTuple itup;
|
|
|
|
/* form an index tuple and point it at the heap tuple */
|
|
itup = _hash_form_tuple(index, values, isnull);
|
|
itup->t_tid = htup->t_self;
|
|
|
|
/* Hash indexes don't index nulls, see notes in hashinsert */
|
|
if (IndexTupleHasNulls(itup))
|
|
{
|
|
pfree(itup);
|
|
return;
|
|
}
|
|
|
|
/* Either spool the tuple for sorting, or just put it into the index */
|
|
if (buildstate->spool)
|
|
_h_spool(itup, buildstate->spool);
|
|
else
|
|
_hash_doinsert(index, itup);
|
|
|
|
buildstate->indtuples += 1;
|
|
|
|
pfree(itup);
|
|
}
|
|
|
|
/*
|
|
* hashinsert() -- insert an index tuple into a hash table.
|
|
*
|
|
* Hash on the heap tuple's key, form an index tuple with hash code.
|
|
* Find the appropriate location for the new tuple, and put it there.
|
|
*/
|
|
Datum
|
|
hashinsert(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
|
Datum *values = (Datum *) PG_GETARG_POINTER(1);
|
|
bool *isnull = (bool *) PG_GETARG_POINTER(2);
|
|
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
|
|
|
#ifdef NOT_USED
|
|
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
|
IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5);
|
|
#endif
|
|
IndexTuple itup;
|
|
|
|
/* generate an index tuple */
|
|
itup = _hash_form_tuple(rel, values, isnull);
|
|
itup->t_tid = *ht_ctid;
|
|
|
|
/*
|
|
* If the single index key is null, we don't insert it into the index.
|
|
* Hash tables support scans on '='. Relational algebra says that A = B
|
|
* returns null if either A or B is null. This means that no
|
|
* qualification used in an index scan could ever return true on a null
|
|
* attribute. It also means that indices can't be used by ISNULL or
|
|
* NOTNULL scans, but that's an artifact of the strategy map architecture
|
|
* chosen in 1986, not of the way nulls are handled here.
|
|
*/
|
|
if (IndexTupleHasNulls(itup))
|
|
{
|
|
pfree(itup);
|
|
PG_RETURN_BOOL(false);
|
|
}
|
|
|
|
_hash_doinsert(rel, itup);
|
|
|
|
pfree(itup);
|
|
|
|
PG_RETURN_BOOL(false);
|
|
}
|
|
|
|
|
|
/*
|
|
* hashgettuple() -- Get the next tuple in the scan.
|
|
*/
|
|
Datum
|
|
hashgettuple(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
Relation rel = scan->indexRelation;
|
|
Buffer buf;
|
|
Page page;
|
|
OffsetNumber offnum;
|
|
ItemPointer current;
|
|
bool res;
|
|
|
|
/* Hash indexes are always lossy since we store only the hash code */
|
|
scan->xs_recheck = true;
|
|
|
|
/*
|
|
* We hold pin but not lock on current buffer while outside the hash AM.
|
|
* Reacquire the read lock here.
|
|
*/
|
|
if (BufferIsValid(so->hashso_curbuf))
|
|
_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ);
|
|
|
|
/*
|
|
* If we've already initialized this scan, we can just advance it in the
|
|
* appropriate direction. If we haven't done so yet, we call a routine to
|
|
* get the first item in the scan.
|
|
*/
|
|
current = &(so->hashso_curpos);
|
|
if (ItemPointerIsValid(current))
|
|
{
|
|
/*
|
|
* An insertion into the current index page could have happened while
|
|
* we didn't have read lock on it. Re-find our position by looking
|
|
* for the TID we previously returned. (Because we hold share lock on
|
|
* the bucket, no deletions or splits could have occurred; therefore
|
|
* we can expect that the TID still exists in the current index page,
|
|
* at an offset >= where we were.)
|
|
*/
|
|
OffsetNumber maxoffnum;
|
|
|
|
buf = so->hashso_curbuf;
|
|
Assert(BufferIsValid(buf));
|
|
page = BufferGetPage(buf);
|
|
maxoffnum = PageGetMaxOffsetNumber(page);
|
|
for (offnum = ItemPointerGetOffsetNumber(current);
|
|
offnum <= maxoffnum;
|
|
offnum = OffsetNumberNext(offnum))
|
|
{
|
|
IndexTuple itup;
|
|
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
|
|
if (ItemPointerEquals(&(so->hashso_heappos), &(itup->t_tid)))
|
|
break;
|
|
}
|
|
if (offnum > maxoffnum)
|
|
elog(ERROR, "failed to re-find scan position within index \"%s\"",
|
|
RelationGetRelationName(rel));
|
|
ItemPointerSetOffsetNumber(current, offnum);
|
|
|
|
/*
|
|
* Check to see if we should kill the previously-fetched tuple.
|
|
*/
|
|
if (scan->kill_prior_tuple)
|
|
{
|
|
/*
|
|
* Yes, so mark it by setting the LP_DEAD state in the item flags.
|
|
*/
|
|
ItemIdMarkDead(PageGetItemId(page, offnum));
|
|
|
|
/*
|
|
* Since this can be redone later if needed, it's treated the same
|
|
* as a commit-hint-bit status update for heap tuples: we mark the
|
|
* buffer dirty but don't make a WAL log entry.
|
|
*/
|
|
SetBufferCommitInfoNeedsSave(buf);
|
|
}
|
|
|
|
/*
|
|
* Now continue the scan.
|
|
*/
|
|
res = _hash_next(scan, dir);
|
|
}
|
|
else
|
|
res = _hash_first(scan, dir);
|
|
|
|
/*
|
|
* Skip killed tuples if asked to.
|
|
*/
|
|
if (scan->ignore_killed_tuples)
|
|
{
|
|
while (res)
|
|
{
|
|
offnum = ItemPointerGetOffsetNumber(current);
|
|
page = BufferGetPage(so->hashso_curbuf);
|
|
if (!ItemIdIsDead(PageGetItemId(page, offnum)))
|
|
break;
|
|
res = _hash_next(scan, dir);
|
|
}
|
|
}
|
|
|
|
/* Release read lock on current buffer, but keep it pinned */
|
|
if (BufferIsValid(so->hashso_curbuf))
|
|
_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_READ, HASH_NOLOCK);
|
|
|
|
/* Return current heap TID on success */
|
|
scan->xs_ctup.t_self = so->hashso_heappos;
|
|
|
|
PG_RETURN_BOOL(res);
|
|
}
|
|
|
|
|
|
/*
|
|
* hashgetbitmap() -- get all tuples at once
|
|
*/
|
|
Datum
|
|
hashgetbitmap(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
bool res;
|
|
int64 ntids = 0;
|
|
|
|
res = _hash_first(scan, ForwardScanDirection);
|
|
|
|
while (res)
|
|
{
|
|
bool add_tuple;
|
|
|
|
/*
|
|
* Skip killed tuples if asked to.
|
|
*/
|
|
if (scan->ignore_killed_tuples)
|
|
{
|
|
Page page;
|
|
OffsetNumber offnum;
|
|
|
|
offnum = ItemPointerGetOffsetNumber(&(so->hashso_curpos));
|
|
page = BufferGetPage(so->hashso_curbuf);
|
|
add_tuple = !ItemIdIsDead(PageGetItemId(page, offnum));
|
|
}
|
|
else
|
|
add_tuple = true;
|
|
|
|
/* Save tuple ID, and continue scanning */
|
|
if (add_tuple)
|
|
{
|
|
/* Note we mark the tuple ID as requiring recheck */
|
|
tbm_add_tuples(tbm, &(so->hashso_heappos), 1, true);
|
|
ntids++;
|
|
}
|
|
|
|
res = _hash_next(scan, ForwardScanDirection);
|
|
}
|
|
|
|
PG_RETURN_INT64(ntids);
|
|
}
|
|
|
|
|
|
/*
|
|
* hashbeginscan() -- start a scan on a hash index
|
|
*/
|
|
Datum
|
|
hashbeginscan(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
|
int nkeys = PG_GETARG_INT32(1);
|
|
int norderbys = PG_GETARG_INT32(2);
|
|
IndexScanDesc scan;
|
|
HashScanOpaque so;
|
|
|
|
/* no order by operators allowed */
|
|
Assert(norderbys == 0);
|
|
|
|
scan = RelationGetIndexScan(rel, nkeys, norderbys);
|
|
|
|
so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
|
|
so->hashso_bucket_valid = false;
|
|
so->hashso_bucket_blkno = 0;
|
|
so->hashso_curbuf = InvalidBuffer;
|
|
/* set position invalid (this will cause _hash_first call) */
|
|
ItemPointerSetInvalid(&(so->hashso_curpos));
|
|
ItemPointerSetInvalid(&(so->hashso_heappos));
|
|
|
|
scan->opaque = so;
|
|
|
|
/* register scan in case we change pages it's using */
|
|
_hash_regscan(scan);
|
|
|
|
PG_RETURN_POINTER(scan);
|
|
}
|
|
|
|
/*
|
|
* hashrescan() -- rescan an index relation
|
|
*/
|
|
Datum
|
|
hashrescan(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1);
|
|
|
|
/* remaining arguments are ignored */
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
Relation rel = scan->indexRelation;
|
|
|
|
/* release any pin we still hold */
|
|
if (BufferIsValid(so->hashso_curbuf))
|
|
_hash_dropbuf(rel, so->hashso_curbuf);
|
|
so->hashso_curbuf = InvalidBuffer;
|
|
|
|
/* release lock on bucket, too */
|
|
if (so->hashso_bucket_blkno)
|
|
_hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE);
|
|
so->hashso_bucket_blkno = 0;
|
|
|
|
/* set position invalid (this will cause _hash_first call) */
|
|
ItemPointerSetInvalid(&(so->hashso_curpos));
|
|
ItemPointerSetInvalid(&(so->hashso_heappos));
|
|
|
|
/* Update scan key, if a new one is given */
|
|
if (scankey && scan->numberOfKeys > 0)
|
|
{
|
|
memmove(scan->keyData,
|
|
scankey,
|
|
scan->numberOfKeys * sizeof(ScanKeyData));
|
|
so->hashso_bucket_valid = false;
|
|
}
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* hashendscan() -- close down a scan
|
|
*/
|
|
Datum
|
|
hashendscan(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
Relation rel = scan->indexRelation;
|
|
|
|
/* don't need scan registered anymore */
|
|
_hash_dropscan(scan);
|
|
|
|
/* release any pin we still hold */
|
|
if (BufferIsValid(so->hashso_curbuf))
|
|
_hash_dropbuf(rel, so->hashso_curbuf);
|
|
so->hashso_curbuf = InvalidBuffer;
|
|
|
|
/* release lock on bucket, too */
|
|
if (so->hashso_bucket_blkno)
|
|
_hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE);
|
|
so->hashso_bucket_blkno = 0;
|
|
|
|
pfree(so);
|
|
scan->opaque = NULL;
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* hashmarkpos() -- save current scan position
|
|
*/
|
|
Datum
|
|
hashmarkpos(PG_FUNCTION_ARGS)
|
|
{
|
|
elog(ERROR, "hash does not support mark/restore");
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* hashrestrpos() -- restore scan to last saved position
|
|
*/
|
|
Datum
|
|
hashrestrpos(PG_FUNCTION_ARGS)
|
|
{
|
|
elog(ERROR, "hash does not support mark/restore");
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* Bulk deletion of all index entries pointing to a set of heap tuples.
|
|
* The set of target tuples is specified via a callback routine that tells
|
|
* whether any given heap tuple (identified by ItemPointer) is being deleted.
|
|
*
|
|
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
|
*/
|
|
Datum
|
|
hashbulkdelete(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
|
|
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
|
|
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
|
|
void *callback_state = (void *) PG_GETARG_POINTER(3);
|
|
Relation rel = info->index;
|
|
double tuples_removed;
|
|
double num_index_tuples;
|
|
double orig_ntuples;
|
|
Bucket orig_maxbucket;
|
|
Bucket cur_maxbucket;
|
|
Bucket cur_bucket;
|
|
Buffer metabuf;
|
|
HashMetaPage metap;
|
|
HashMetaPageData local_metapage;
|
|
|
|
tuples_removed = 0;
|
|
num_index_tuples = 0;
|
|
|
|
/*
|
|
* Read the metapage to fetch original bucket and tuple counts. Also, we
|
|
* keep a copy of the last-seen metapage so that we can use its
|
|
* hashm_spares[] values to compute bucket page addresses. This is a bit
|
|
* hokey but perfectly safe, since the interesting entries in the spares
|
|
* array cannot change under us; and it beats rereading the metapage for
|
|
* each bucket.
|
|
*/
|
|
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
|
|
metap = HashPageGetMeta(BufferGetPage(metabuf));
|
|
orig_maxbucket = metap->hashm_maxbucket;
|
|
orig_ntuples = metap->hashm_ntuples;
|
|
memcpy(&local_metapage, metap, sizeof(local_metapage));
|
|
_hash_relbuf(rel, metabuf);
|
|
|
|
/* Scan the buckets that we know exist */
|
|
cur_bucket = 0;
|
|
cur_maxbucket = orig_maxbucket;
|
|
|
|
loop_top:
|
|
while (cur_bucket <= cur_maxbucket)
|
|
{
|
|
BlockNumber bucket_blkno;
|
|
BlockNumber blkno;
|
|
bool bucket_dirty = false;
|
|
|
|
/* Get address of bucket's start page */
|
|
bucket_blkno = BUCKET_TO_BLKNO(&local_metapage, cur_bucket);
|
|
|
|
/* Exclusive-lock the bucket so we can shrink it */
|
|
_hash_getlock(rel, bucket_blkno, HASH_EXCLUSIVE);
|
|
|
|
/* Shouldn't have any active scans locally, either */
|
|
if (_hash_has_active_scan(rel, cur_bucket))
|
|
elog(ERROR, "hash index has active scan during VACUUM");
|
|
|
|
/* Scan each page in bucket */
|
|
blkno = bucket_blkno;
|
|
while (BlockNumberIsValid(blkno))
|
|
{
|
|
Buffer buf;
|
|
Page page;
|
|
HashPageOpaque opaque;
|
|
OffsetNumber offno;
|
|
OffsetNumber maxoffno;
|
|
OffsetNumber deletable[MaxOffsetNumber];
|
|
int ndeletable = 0;
|
|
|
|
vacuum_delay_point();
|
|
|
|
buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
|
|
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
|
|
info->strategy);
|
|
page = BufferGetPage(buf);
|
|
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
|
Assert(opaque->hasho_bucket == cur_bucket);
|
|
|
|
/* Scan each tuple in page */
|
|
maxoffno = PageGetMaxOffsetNumber(page);
|
|
for (offno = FirstOffsetNumber;
|
|
offno <= maxoffno;
|
|
offno = OffsetNumberNext(offno))
|
|
{
|
|
IndexTuple itup;
|
|
ItemPointer htup;
|
|
|
|
itup = (IndexTuple) PageGetItem(page,
|
|
PageGetItemId(page, offno));
|
|
htup = &(itup->t_tid);
|
|
if (callback(htup, callback_state))
|
|
{
|
|
/* mark the item for deletion */
|
|
deletable[ndeletable++] = offno;
|
|
tuples_removed += 1;
|
|
}
|
|
else
|
|
num_index_tuples += 1;
|
|
}
|
|
|
|
/*
|
|
* Apply deletions and write page if needed, advance to next page.
|
|
*/
|
|
blkno = opaque->hasho_nextblkno;
|
|
|
|
if (ndeletable > 0)
|
|
{
|
|
PageIndexMultiDelete(page, deletable, ndeletable);
|
|
_hash_wrtbuf(rel, buf);
|
|
bucket_dirty = true;
|
|
}
|
|
else
|
|
_hash_relbuf(rel, buf);
|
|
}
|
|
|
|
/* If we deleted anything, try to compact free space */
|
|
if (bucket_dirty)
|
|
_hash_squeezebucket(rel, cur_bucket, bucket_blkno,
|
|
info->strategy);
|
|
|
|
/* Release bucket lock */
|
|
_hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE);
|
|
|
|
/* Advance to next bucket */
|
|
cur_bucket++;
|
|
}
|
|
|
|
/* Write-lock metapage and check for split since we started */
|
|
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE, LH_META_PAGE);
|
|
metap = HashPageGetMeta(BufferGetPage(metabuf));
|
|
|
|
if (cur_maxbucket != metap->hashm_maxbucket)
|
|
{
|
|
/* There's been a split, so process the additional bucket(s) */
|
|
cur_maxbucket = metap->hashm_maxbucket;
|
|
memcpy(&local_metapage, metap, sizeof(local_metapage));
|
|
_hash_relbuf(rel, metabuf);
|
|
goto loop_top;
|
|
}
|
|
|
|
/* Okay, we're really done. Update tuple count in metapage. */
|
|
|
|
if (orig_maxbucket == metap->hashm_maxbucket &&
|
|
orig_ntuples == metap->hashm_ntuples)
|
|
{
|
|
/*
|
|
* No one has split or inserted anything since start of scan, so
|
|
* believe our count as gospel.
|
|
*/
|
|
metap->hashm_ntuples = num_index_tuples;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Otherwise, our count is untrustworthy since we may have
|
|
* double-scanned tuples in split buckets. Proceed by dead-reckoning.
|
|
* (Note: we still return estimated_count = false, because using this
|
|
* count is better than not updating reltuples at all.)
|
|
*/
|
|
if (metap->hashm_ntuples > tuples_removed)
|
|
metap->hashm_ntuples -= tuples_removed;
|
|
else
|
|
metap->hashm_ntuples = 0;
|
|
num_index_tuples = metap->hashm_ntuples;
|
|
}
|
|
|
|
_hash_wrtbuf(rel, metabuf);
|
|
|
|
/* return statistics */
|
|
if (stats == NULL)
|
|
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
|
|
stats->estimated_count = false;
|
|
stats->num_index_tuples = num_index_tuples;
|
|
stats->tuples_removed += tuples_removed;
|
|
/* hashvacuumcleanup will fill in num_pages */
|
|
|
|
PG_RETURN_POINTER(stats);
|
|
}
|
|
|
|
/*
|
|
* Post-VACUUM cleanup.
|
|
*
|
|
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
|
*/
|
|
Datum
|
|
hashvacuumcleanup(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
|
|
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
|
|
Relation rel = info->index;
|
|
BlockNumber num_pages;
|
|
|
|
/* If hashbulkdelete wasn't called, return NULL signifying no change */
|
|
/* Note: this covers the analyze_only case too */
|
|
if (stats == NULL)
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
/* update statistics */
|
|
num_pages = RelationGetNumberOfBlocks(rel);
|
|
stats->num_pages = num_pages;
|
|
|
|
PG_RETURN_POINTER(stats);
|
|
}
|
|
|
|
|
|
void
|
|
hash_redo(XLogRecPtr lsn, XLogRecord *record)
|
|
{
|
|
elog(PANIC, "hash_redo: unimplemented");
|
|
}
|
|
|
|
void
|
|
hash_desc(StringInfo buf, uint8 xl_info, char *rec)
|
|
{
|
|
}
|