/*-------------------------------------------------------------------------
 *
 * tidbitmap.c
 *	  PostgreSQL tuple-id (TID) bitmap package
 *
 * This module provides bitmap data structures that are spiritually
 * similar to Bitmapsets, but are specially adapted to store sets of
 * tuple identifiers (TIDs), or ItemPointers.  In particular, the division
 * of an ItemPointer into BlockNumber and OffsetNumber is catered for.
 * Also, since we wish to be able to store very large tuple sets in
 * memory with this data structure, we support "lossy" storage, in which
 * we no longer remember individual tuple offsets on a page but only the
 * fact that a particular page needs to be visited.
 *
 * The "lossy" storage uses one bit per disk page, so at the standard 8K
 * BLCKSZ, we can represent all pages in 64Gb of disk space in about 1Mb
 * of memory.  People pushing around tables of that size should have a
 * couple of Mb to spare, so we don't worry about providing a second level
 * of lossiness.  In theory we could fall back to page ranges at some
 * point, but for now that seems useless complexity.
 *
 *
 * Copyright (c) 2003-2005, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.1 2005/04/17 22:24:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <limits.h>

#include "access/htup.h"
#include "nodes/tidbitmap.h"
#include "utils/hsearch.h"


/*
 * The maximum number of tuples per page is not large (typically 256 with
 * 8K pages, or 1024 with 32K pages).  So there's not much point in making
 * the per-page bitmaps variable size.  We just legislate that the size
 * is this:
 */
#define MAX_TUPLES_PER_PAGE  ((BLCKSZ - 1) / MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) + sizeof(ItemIdData)) + 1)

/*
 * When we have to switch over to lossy storage, we use a data structure
 * with one bit per page, where all pages having the same number DIV
 * PAGES_PER_CHUNK are aggregated into one chunk.  When a chunk is present
 * and has the bit set for a given page, there must not be a per-page entry
 * for that page in the page table.
 *
 * We actually store both exact pages and lossy chunks in the same hash
 * table, using identical data structures.  (This is because dynahash.c's
 * memory management doesn't allow space to be transferred easily from one
 * hashtable to another.)  Therefore it's best if PAGES_PER_CHUNK is the
 * same as MAX_TUPLES_PER_PAGE, or at least not too different.  But we
 * also want PAGES_PER_CHUNK to be a power of 2 to avoid expensive integer
 * remainder operations.  So, define it like this:
 */
#define PAGES_PER_CHUNK  (BLCKSZ / 32)

/* The bitmap unit size can be adjusted by changing these declarations: */
#define BITS_PER_BITMAPWORD 32
typedef uint32 bitmapword;		/* must be an unsigned type */

#define WORDNUM(x)	((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x)	((x) % BITS_PER_BITMAPWORD)

/* number of active words for an exact page: */
#define WORDS_PER_PAGE  ((MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1)
/* number of active words for a lossy chunk: */
#define WORDS_PER_CHUNK  ((PAGES_PER_CHUNK - 1) / BITS_PER_BITMAPWORD + 1)

/*
 * The hashtable entries are represented by this data structure.  For
 * an exact page, blockno is the page number and bit k of the bitmap
 * represents tuple offset k+1.  For a lossy chunk, blockno is the first
 * page in the chunk (this must be a multiple of PAGES_PER_CHUNK) and
 * bit k represents page blockno+k.  Note that it is not possible to
 * have exact storage for the first page of a chunk if we are using
 * lossy storage for any page in the chunk's range, since the same
 * hashtable entry has to serve both purposes.
 */
typedef struct PagetableEntry
{
	BlockNumber	blockno;		/* page number (hashtable key) */
	bool		ischunk;		/* T = lossy storage, F = exact */
	bitmapword	words[Max(WORDS_PER_PAGE, WORDS_PER_CHUNK)];
} PagetableEntry;

/*
 * Here is the representation for a whole TIDBitMap:
 */
struct TIDBitmap
{
	NodeTag		type;			/* to make it a valid Node */
	MemoryContext mcxt;			/* memory context containing me */
	HTAB	   *pagetable;		/* hash table of PagetableEntry's */
	int			nentries;		/* number of entries in pagetable */
	int			maxentries;		/* limit on same to meet maxbytes */
	int			npages;			/* number of exact entries in pagetable */
	int			nchunks;		/* number of lossy entries in pagetable */
	bool		iterating;		/* tbm_begin_iterate called? */
	/* the remaining fields are used while producing sorted output: */
	TBMIterateResult *output;	/* NULL if not yet created */
	PagetableEntry **spages;	/* sorted exact-page list, or NULL */
	PagetableEntry **schunks;	/* sorted lossy-chunk list, or NULL */
	int			spageptr;		/* next spages index */
	int			schunkptr;		/* next schunks index */
	int			schunkbit;		/* next bit to check in current schunk */
};


/* Local function prototypes */
static PagetableEntry *tbm_find_pageentry(const TIDBitmap *tbm,
										  BlockNumber pageno);
static PagetableEntry *tbm_get_pageentry(TIDBitmap *tbm, BlockNumber pageno);
static bool tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno);
static void tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno);
static void tbm_lossify(TIDBitmap *tbm);
static int	tbm_comparator(const void *left, const void *right);


/*
 * tbm_create - create an initially-empty bitmap
 *
 * The bitmap will live in the memory context that is CurrentMemoryContext
 * at the time of this call.  It will be limited to (approximately) maxbytes
 * total memory consumption.
 */
TIDBitmap *
tbm_create(long maxbytes)
{
	TIDBitmap  *tbm;
	HASHCTL		hash_ctl;
	long		nbuckets;

	tbm = makeNode(TIDBitmap);
	/* we rely on makeNode to have zeroed all the fields */
	tbm->mcxt = CurrentMemoryContext;

	/*
	 * Estimate number of hashtable entries we can have within maxbytes.
	 * This estimates the hash overhead at MAXALIGN(sizeof(HASHELEMENT))
	 * plus a pointer per hash entry, which is crude but good enough for
	 * our purpose.  (NOTE: this does not count the space for data
	 * structures created during iteration readout.)
	 */
	nbuckets = maxbytes /
		(MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(sizeof(PagetableEntry))
		 + sizeof(Pointer));
	nbuckets = Min(nbuckets, INT_MAX-1);	/* safety limit */
	tbm->maxentries = (int) nbuckets;

	MemSet(&hash_ctl, 0, sizeof(hash_ctl));
	hash_ctl.keysize = sizeof(BlockNumber);
	hash_ctl.entrysize = sizeof(PagetableEntry);
	hash_ctl.hash = tag_hash;
	hash_ctl.hcxt = CurrentMemoryContext;
	tbm->pagetable = hash_create("TIDBitmap",
								 nbuckets,
								 &hash_ctl,
								 HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);

	return tbm;
}

/*
 * tbm_free - free a TIDBitmap
 */
void
tbm_free(TIDBitmap *tbm)
{
	hash_destroy(tbm->pagetable);
	if (tbm->output)
		pfree(tbm->output);
	if (tbm->spages)
		pfree(tbm->spages);
	if (tbm->schunks)
		pfree(tbm->schunks);
	pfree(tbm);
}

/*
 * tbm_add_tuples - add some tuple IDs to a TIDBitmap
 */
void
tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids)
{
	int			i;

	Assert(!tbm->iterating);
	for (i = 0; i < ntids; i++)
	{
		BlockNumber blk = ItemPointerGetBlockNumber(tids + i);
		OffsetNumber off = ItemPointerGetOffsetNumber(tids + i);
		PagetableEntry *page;
		int			wordnum,
					bitnum;

		/* safety check to ensure we don't overrun bit array bounds */
		if (off < 1 || off > MAX_TUPLES_PER_PAGE)
			elog(ERROR, "tuple offset out of range: %u", off);

		if (tbm_page_is_lossy(tbm, blk))
			continue;			/* whole page is already marked */

		page = tbm_get_pageentry(tbm, blk);

		if (page->ischunk)
		{
			/* The page is a lossy chunk header, set bit for itself */
			wordnum = bitnum = 0;
		}
		else
		{
			/* Page is exact, so set bit for individual tuple */
			wordnum = WORDNUM(off - 1);
			bitnum = BITNUM(off - 1);
		}
		page->words[wordnum] |= ((bitmapword) 1 << bitnum);

		if (tbm->nentries > tbm->maxentries)
			tbm_lossify(tbm);
	}
}

/*
 * tbm_union - set union
 *
 * a is modified in-place, b is not changed
 */
void
tbm_union(TIDBitmap *a, const TIDBitmap *b)
{
	HASH_SEQ_STATUS status;
	PagetableEntry *apage;
	PagetableEntry *bpage;
	int		wordnum;

	Assert(!a->iterating);
	/* Scan through chunks and pages in b, merge into a */
	hash_seq_init(&status, b->pagetable);
	while ((bpage = (PagetableEntry *) hash_seq_search(&status)) != NULL)
	{
		if (bpage->ischunk)
		{
			/* Scan b's chunk, mark each indicated page lossy in a */
			for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++)
			{
				bitmapword	w = bpage->words[wordnum];

				if (w != 0)
				{
					BlockNumber	pg;

					pg = bpage->blockno + (wordnum * BITS_PER_BITMAPWORD);
					while (w != 0)
					{
						if (w & 1)
							tbm_mark_page_lossy(a, pg);
						pg++;
						w >>= 1;
					}
				}
			}
		}
		else if (tbm_page_is_lossy(a, bpage->blockno))
		{
			/* page is already lossy in a, nothing to do */
			continue;
		}
		else
		{
			apage = tbm_get_pageentry(a, bpage->blockno);
			if (apage->ischunk)
			{
				/* The page is a lossy chunk header, set bit for itself */
				apage->words[0] |= ((bitmapword) 1 << 0);
			}
			else
			{
				/* Both pages are exact, merge at the bit level */
				for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++)
					apage->words[wordnum] |= bpage->words[wordnum];
			}
		}

		if (a->nentries > a->maxentries)
			tbm_lossify(a);
	}
}

/*
 * tbm_intersect - set intersection
 *
 * a is modified in-place, b is not changed
 */
void
tbm_intersect(TIDBitmap *a, const TIDBitmap *b)
{
	HASH_SEQ_STATUS status;
	PagetableEntry *apage;
	PagetableEntry *bpage;
	int		wordnum;

	Assert(!a->iterating);
	/* Scan through chunks and pages in a, try to match to b */
	hash_seq_init(&status, a->pagetable);
	while ((apage = (PagetableEntry *) hash_seq_search(&status)) != NULL)
	{
		if (apage->ischunk)
		{
			/* Scan each bit in chunk, try to clear */
			bool	candelete = true;

			for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++)
			{
				bitmapword	w = apage->words[wordnum];

				if (w != 0)
				{
					bitmapword	neww = w;
					BlockNumber	pg;
					int		bitnum;

					pg = apage->blockno + (wordnum * BITS_PER_BITMAPWORD);
					bitnum = 0;
					while (w != 0)
					{
						if (w & 1)
						{
							if (!tbm_page_is_lossy(b, pg) &&
								tbm_find_pageentry(b, pg) == NULL)
							{
								/* Page is not in b at all, lose lossy bit */
								neww &= ~((bitmapword) 1 << bitnum);
							}
						}
						pg++;
						bitnum++;
						w >>= 1;
					}
					apage->words[wordnum] = neww;
					if (neww != 0)
						candelete = false;
				}
			}
			if (candelete)
			{
				/* Chunk is now empty, remove it from a */
				if (hash_search(a->pagetable,
								(void *) &apage->blockno,
								HASH_REMOVE, NULL) == NULL)
					elog(ERROR, "hash table corrupted");
				a->nentries--;
				a->nchunks--;
			}
		}
		else if (tbm_page_is_lossy(b, apage->blockno))
		{
			/* page is lossy in b, cannot clear any bits */
			continue;
		}
		else
		{
			bool	candelete = true;

			bpage = tbm_find_pageentry(b, apage->blockno);
			if (bpage != NULL)
			{
				/* Both pages are exact, merge at the bit level */
				Assert(!bpage->ischunk);
				for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++)
				{
					apage->words[wordnum] &= bpage->words[wordnum];
					if (apage->words[wordnum] != 0)
						candelete = false;
				}
			}
			if (candelete)
			{
				/* Page is now empty, remove it from a */
				if (hash_search(a->pagetable,
								(void *) &apage->blockno,
								HASH_REMOVE, NULL) == NULL)
					elog(ERROR, "hash table corrupted");
				a->nentries--;
				a->npages--;
			}
		}
	}
}

/*
 * tbm_begin_iterate - prepare to iterate through a TIDBitmap
 *
 * NB: after this is called, it is no longer allowed to modify the contents
 * of the bitmap.  However, you can call this multiple times to scan the
 * contents repeatedly.
 */
void
tbm_begin_iterate(TIDBitmap *tbm)
{
	HASH_SEQ_STATUS status;
	PagetableEntry *page;
	int			npages;
	int			nchunks;

	tbm->iterating = true;
	/*
	 * Allocate the output data structure if we didn't already.
	 * (We don't do this during tbm_create since it's entirely possible
	 * that a TIDBitmap will live and die without ever being iterated.)
	 */
	if (!tbm->output)
		tbm->output = (TBMIterateResult *)
			MemoryContextAllocZero(tbm->mcxt,
								   sizeof(TBMIterateResult) +
								   MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber));
	/*
	 * Create and fill the sorted page lists if we didn't already.
	 */
	if (!tbm->spages && tbm->npages > 0)
		tbm->spages = (PagetableEntry **)
			MemoryContextAlloc(tbm->mcxt,
							   tbm->npages * sizeof(PagetableEntry *));
	if (!tbm->schunks && tbm->nchunks > 0)
		tbm->schunks = (PagetableEntry **)
			MemoryContextAlloc(tbm->mcxt,
							   tbm->nchunks * sizeof(PagetableEntry *));

	hash_seq_init(&status, tbm->pagetable);
	npages = nchunks = 0;
	while ((page = (PagetableEntry *) hash_seq_search(&status)) != NULL)
	{
		if (page->ischunk)
			tbm->schunks[nchunks++] = page;
		else
			tbm->spages[npages++] = page;
	}
	Assert(npages == tbm->npages);
	Assert(nchunks == tbm->nchunks);
	if (npages > 1)
		qsort(tbm->spages, npages, sizeof(PagetableEntry *), tbm_comparator);
	if (nchunks > 1)
		qsort(tbm->schunks, nchunks, sizeof(PagetableEntry *), tbm_comparator);
	/*
	 * Reset iteration pointers.
	 */
	tbm->spageptr = 0;
	tbm->schunkptr = 0;
	tbm->schunkbit = 0;
}

/*
 * tbm_iterate - scan through next page of a TIDBitmap
 *
 * Returns a TBMIterateResult representing one page, or NULL if there are
 * no more pages to scan.  Pages are guaranteed to be delivered in numerical
 * order.  If result->ntuples < 0, then the bitmap is "lossy" and failed to
 * remember the exact tuples to look at on this page --- the caller must
 * examine all tuples on the page and check if they meet the intended
 * condition.
 */
TBMIterateResult *
tbm_iterate(TIDBitmap *tbm)
{
	TBMIterateResult *output = tbm->output;

	Assert(tbm->iterating);
	/*
	 * If lossy chunk pages remain, make sure we've advanced schunkptr/
	 * schunkbit to the next set bit.
	 */
	while (tbm->schunkptr < tbm->nchunks)
	{
		PagetableEntry *chunk = tbm->schunks[tbm->schunkptr];
		int		schunkbit = tbm->schunkbit;

		while (schunkbit < PAGES_PER_CHUNK)
		{
			int		wordnum = WORDNUM(schunkbit);
			int		bitnum = BITNUM(schunkbit);

			if ((chunk->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
				break;
			schunkbit++;
		}
		if (schunkbit < PAGES_PER_CHUNK)
		{
			tbm->schunkbit = schunkbit;
			break;
		}
		/* advance to next chunk */
		tbm->schunkptr++;
		tbm->schunkbit = 0;
	}
	/*
	 * If both chunk and per-page data remain, must output the numerically
	 * earlier page.
	 */
	if (tbm->schunkptr < tbm->nchunks)
	{
		PagetableEntry *chunk = tbm->schunks[tbm->schunkptr];
		BlockNumber chunk_blockno;

		chunk_blockno = chunk->blockno + tbm->schunkbit;
		if (tbm->spageptr >= tbm->npages ||
			chunk_blockno < tbm->spages[tbm->spageptr]->blockno)
		{
			/* Return a lossy page indicator from the chunk */
			output->blockno = chunk_blockno;
			output->ntuples = -1;
			tbm->schunkbit++;
			return output;
		}
	}

	if (tbm->spageptr < tbm->npages)
	{
		PagetableEntry *page = tbm->spages[tbm->spageptr];
		int			ntuples;
		int			wordnum;

		/* scan bitmap to extract individual offset numbers */
		ntuples = 0;
		for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++)
		{
			bitmapword	w = page->words[wordnum];

			if (w != 0)
			{
				int			off = wordnum * BITS_PER_BITMAPWORD + 1;

				while (w != 0)
				{
					if (w & 1)
						output->offsets[ntuples++] = (OffsetNumber) off;
					off++;
					w >>= 1;
				}
			}
		}
		output->blockno = page->blockno;
		output->ntuples = ntuples;
		tbm->spageptr++;
		return output;
	}

	/* Nothing more in the bitmap */
	return NULL;
}

/*
 * tbm_find_pageentry - find a PagetableEntry for the pageno
 *
 * Returns NULL if there is no non-lossy entry for the pageno.
 */
static PagetableEntry *
tbm_find_pageentry(const TIDBitmap *tbm, BlockNumber pageno)
{
	PagetableEntry *page;

	page = (PagetableEntry *) hash_search(tbm->pagetable,
										  (void *) &pageno,
										  HASH_FIND, NULL);
	if (page == NULL)
		return NULL;
	if (page->ischunk)
		return NULL;			/* don't want a lossy chunk header */
	return page;
}

/*
 * tbm_get_pageentry - find or create a PagetableEntry for the pageno
 *
 * If new, the entry is marked as an exact (non-chunk) entry.
 *
 * This may cause the table to exceed the desired memory size.  It is
 * up to the caller to call tbm_lossify() at the next safe point if so.
 */
static PagetableEntry *
tbm_get_pageentry(TIDBitmap *tbm, BlockNumber pageno)
{
	PagetableEntry *page;
	bool		found;

	/* Look up or create an entry */
	page = (PagetableEntry *) hash_search(tbm->pagetable,
										  (void *) &pageno,
										  HASH_ENTER, &found);
	if (page == NULL)
		ereport(ERROR,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of memory")));

	/* Initialize it if not present before */
	if (!found)
	{
		MemSet(page, 0, sizeof(PagetableEntry));
		page->blockno = pageno;
		/* must count it too */
		tbm->nentries++;
		tbm->npages++;
	}

	return page;
}

/*
 * tbm_page_is_lossy - is the page marked as lossily stored?
 */
static bool
tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno)
{
	PagetableEntry *page;
	BlockNumber chunk_pageno;
	int			bitno;

	/* we can skip the lookup if there are no lossy chunks */
	if (tbm->nchunks == 0)
		return false;

	bitno = pageno % PAGES_PER_CHUNK;
	chunk_pageno = pageno - bitno;
	page = (PagetableEntry *) hash_search(tbm->pagetable,
										  (void *) &chunk_pageno,
										  HASH_FIND, NULL);
	if (page != NULL && page->ischunk)
	{
		int		wordnum = WORDNUM(bitno);
		int		bitnum = BITNUM(bitno);

		if ((page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
			return true;
	}
	return false;
}

/*
 * tbm_mark_page_lossy - mark the page number as lossily stored
 *
 * This may cause the table to exceed the desired memory size.  It is
 * up to the caller to call tbm_lossify() at the next safe point if so.
 */
static void
tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
{
	PagetableEntry *page;
	bool		found;
	BlockNumber chunk_pageno;
	int			bitno;
	int			wordnum;
	int			bitnum;

	bitno = pageno % PAGES_PER_CHUNK;
	chunk_pageno = pageno - bitno;

	/*
	 * Remove any extant non-lossy entry for the page.  If the page is
	 * its own chunk header, however, we skip this and handle the case
	 * below.
	 */
	if (bitno != 0)
	{
		if (hash_search(tbm->pagetable,
						(void *) &pageno,
						HASH_REMOVE, NULL) != NULL)
		{
			/* It was present, so adjust counts */
			tbm->nentries--;
			tbm->npages--;		/* assume it must have been non-lossy */
		}
	}

	/* Look up or create entry for chunk-header page */
	page = (PagetableEntry *) hash_search(tbm->pagetable,
										  (void *) &chunk_pageno,
										  HASH_ENTER, &found);
	if (page == NULL)
		ereport(ERROR,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of memory")));

	/* Initialize it if not present before */
	if (!found)
	{
		MemSet(page, 0, sizeof(PagetableEntry));
		page->blockno = chunk_pageno;
		page->ischunk = true;
		/* must count it too */
		tbm->nentries++;
		tbm->nchunks++;
	}
	else if (!page->ischunk)
	{
		/* chunk header page was formerly non-lossy, make it lossy */
		MemSet(page, 0, sizeof(PagetableEntry));
		page->blockno = chunk_pageno;
		page->ischunk = true;
		/* we assume it had some tuple bit(s) set, so mark it lossy */
		page->words[0] = ((bitmapword) 1 << 0);
		/* adjust counts */
		tbm->nchunks++;
		tbm->npages--;
	}

	/* Now set the original target page's bit */
	wordnum = WORDNUM(bitno);
	bitnum = BITNUM(bitno);
	page->words[wordnum] |= ((bitmapword) 1 << bitnum);
}

/*
 * tbm_lossify - lose some information to get back under the memory limit
 */
static void
tbm_lossify(TIDBitmap *tbm)
{
	HASH_SEQ_STATUS status;
	PagetableEntry *page;

	/*
	 * XXX Really stupid implementation: this just lossifies pages in
	 * essentially random order.  We should be paying some attention
	 * to the number of bits set in each page, instead.  Also it might
	 * be a good idea to lossify more than the minimum number of pages
	 * during each call.
	 */
	Assert(!tbm->iterating);
	hash_seq_init(&status, tbm->pagetable);
	while ((page = (PagetableEntry *) hash_seq_search(&status)) != NULL)
	{
		if (page->ischunk)
			continue;			/* already a chunk header */
		/*
		 * If the page would become a chunk header, we won't save anything
		 * by converting it to lossy, so skip it.
		 */
		if ((page->blockno % PAGES_PER_CHUNK) == 0)
			continue;

		/* This does the dirty work ... */
		tbm_mark_page_lossy(tbm, page->blockno);

		if (tbm->nentries <= tbm->maxentries)
			return;				/* we have done enough */

		/*
		 * Note: tbm_mark_page_lossy may have inserted a lossy chunk into
		 * the hashtable.  We can continue the same seq_search scan since
		 * we do not care whether we visit lossy chunks or not.
		 */
	}
}

/*
 * qsort comparator to handle PagetableEntry pointers.
 */
static int
tbm_comparator(const void *left, const void *right)
{
	BlockNumber l = (*((const PagetableEntry **) left))->blockno;
	BlockNumber r = (*((const PagetableEntry **) right))->blockno;

	if (l < r)
		return -1;
	else if (l > r)
		return 1;
	return 0;
}