Revise _bt_binsrch() so that its binary search loop takes

care of equal-key cases, eliminating bt_firsteq().  The linear search
formerly done by bt_firsteq() took a lot of time in the case where many
equal keys appear on the same page.
This commit is contained in:
Tom Lane 1999-07-16 22:17:06 +00:00
parent 9679cb3910
commit bc9236bc01
1 changed files with 85 additions and 123 deletions

View File

@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.50 1999/07/16 04:58:30 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.51 1999/07/16 22:17:06 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -26,8 +26,6 @@
static BTStack _bt_searchr(Relation rel, int keysz, ScanKey scankey, static BTStack _bt_searchr(Relation rel, int keysz, ScanKey scankey,
Buffer *bufP, BTStack stack_in); Buffer *bufP, BTStack stack_in);
static OffsetNumber _bt_firsteq(Relation rel, TupleDesc itupdesc, Page page,
Size keysz, ScanKey scankey, OffsetNumber offnum);
static int _bt_compare(Relation rel, TupleDesc itupdesc, Page page, static int _bt_compare(Relation rel, TupleDesc itupdesc, Page page,
int keysz, ScanKey scankey, OffsetNumber offnum); int keysz, ScanKey scankey, OffsetNumber offnum);
static bool static bool
@ -368,7 +366,9 @@ _bt_skeycmp(Relation rel,
* comparison for every key in the scankey. _bt_binsrch() returns * comparison for every key in the scankey. _bt_binsrch() returns
* the OffsetNumber of the first matching key on the page, or the * the OffsetNumber of the first matching key on the page, or the
* OffsetNumber at which the matching key would appear if it were * OffsetNumber at which the matching key would appear if it were
* on this page. * on this page. (NOTE: in particular, this means it is possible to
* return a value 1 greater than the number of keys on the page, if
* the scankey is > all keys on the page.)
* *
* By the time this procedure is called, we're sure we're looking * By the time this procedure is called, we're sure we're looking
* at the right page -- don't need to walk right. _bt_binsrch() has * at the right page -- don't need to walk right. _bt_binsrch() has
@ -385,8 +385,8 @@ _bt_binsrch(Relation rel,
Page page; Page page;
BTPageOpaque opaque; BTPageOpaque opaque;
OffsetNumber low, OffsetNumber low,
mid,
high; high;
bool haveEq;
int natts = rel->rd_rel->relnatts; int natts = rel->rd_rel->relnatts;
int result; int result;
@ -395,148 +395,112 @@ _bt_binsrch(Relation rel,
opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/* by convention, item 1 on any non-rightmost page is the high key */ /* by convention, item 1 on any non-rightmost page is the high key */
low = mid = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
high = PageGetMaxOffsetNumber(page); high = PageGetMaxOffsetNumber(page);
/* /*
* Since for non-rightmost pages, the first item on the page is the * If there are no keys on the page, return the first available slot.
* high key, there are two notions of emptiness. One is if nothing * Note this covers two cases: the page is really empty (no keys),
* appears on the page. The other is if nothing but the high key * or it contains only a high key. The latter case is possible after
* does. The reason we test high <= low, rather than high == low, is * vacuuming.
* that after vacuuming there may be nothing *but* the high key on a
* page. In that case, given the scheme above, low = 2 and high = 1.
*/ */
if (high < low)
if (PageIsEmpty(page))
return low; return low;
if ((!P_RIGHTMOST(opaque) && high <= low))
{
if (high < low ||
(srchtype == BT_DESCENT && !(opaque->btpo_flags & BTP_LEAF)))
return low;
/* It's insertion and high == low == 2 */
result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
if (result > 0)
return OffsetNumberNext(low);
return low;
}
while ((high - low) > 1) /*
* Binary search to find the first key on the page >= scan key.
* Loop invariant: all slots before 'low' are < scan key, all slots
* at or after 'high' are >= scan key. Also, haveEq is true if the
* tuple at 'high' is == scan key.
* We can fall out when high == low.
*/
high++; /* establish the loop invariant for high */
haveEq = false;
while (high > low)
{ {
mid = low + ((high - low) / 2); OffsetNumber mid = low + ((high - low) / 2);
/* We have low <= mid < high, so mid points at a real slot */
result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid); result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid);
if (result > 0) if (result > 0)
low = mid; low = mid + 1;
else if (result < 0)
high = mid - 1;
else else
{ {
mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, mid); high = mid;
haveEq = (result == 0);
/*
* NOTE for multi-column indices: we may do scan using keys
* not for all attrs. But we handle duplicates using all attrs
* in _bt_insert/_bt_spool code. And so while searching on
* internal pages having number of attrs > keysize we want to
* point at the last item < the scankey, not at the first item
* = the scankey (!!!), and let _bt_moveright decide later
* whether to move right or not (see comments and example
* there). Note also that INSERTions are not affected by this
* code (natts == keysz). - vadim 04/15/97
*/
if (natts == keysz || opaque->btpo_flags & BTP_LEAF)
return mid;
low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
if (mid == low)
return mid;
return OffsetNumberPrev(mid);
} }
} }
/* /*--------------------
* We terminated because the endpoints got too close together. There * At this point we have high == low, but be careful: they could point
* are two cases to take care of. * past the last slot on the page. We also know that haveEq is true
* if and only if there is an equal key (in which case high&low point
* at the first equal key).
* *
* For non-insertion searches on internal pages, we want to point at the * On a leaf page, we always return the first key >= scan key
* last key <, or first key =, the scankey on the page. This * (which could be the last slot + 1).
* guarantees that we'll descend the tree correctly. (NOTE comments *--------------------
* above for multi-column indices).
*
* For all other cases, we want to point at the first key >= the scankey
* on the page. This guarantees that scans and insertions will happen
* correctly.
*/ */
if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT) if (opaque->btpo_flags & BTP_LEAF)
{ /* We want the last key <, or first key return low;
* ==, the scan key. */
result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
if (result == 0) /*--------------------
{ * On a non-leaf page, there are special cases:
mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, high); *
* For an insertion (srchtype != BT_DESCENT and natts == keysz)
* always return first key >= scan key (which could be off the end).
*
* For a standard search (srchtype == BT_DESCENT and natts == keysz)
* return the first equal key if one exists, else the last lesser key
* if one exists, else the first slot on the page.
*
* For a partial-match search (srchtype == BT_DESCENT and natts < keysz)
* return the last lesser key if one exists, else the first slot.
*
* Old comments:
* For multi-column indices, we may scan using keys
* not for all attrs. But we handle duplicates using all attrs
* in _bt_insert/_bt_spool code. And so while searching on
* internal pages having number of attrs > keysize we want to
* point at the last item < the scankey, not at the first item
* = the scankey (!!!), and let _bt_moveright decide later
* whether to move right or not (see comments and example
* there). Note also that INSERTions are not affected by this
* code (since natts == keysz for inserts). - vadim 04/15/97
*--------------------
*/
/* if (haveEq)
* If natts > keysz we want last item < the scan key. See {
* comments above for multi-column indices. /*
*/ * There is an equal key. We return either the first equal key
if (natts == keysz) * (which we just found), or the last lesser key.
return mid; *
low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; * We need not check srchtype != BT_DESCENT here, since if that
if (mid == low) * is true then natts == keysz by assumption.
return mid; */
return OffsetNumberPrev(mid); if (natts == keysz)
} return low; /* return first equal key */
else if (result > 0)
return high;
else
return low;
} }
else else
/* we want the first key >= the scan key */
{ {
result = _bt_compare(rel, itupdesc, page, keysz, scankey, low); /*
if (result <= 0) * There is no equal key. We return either the first greater key
return low; * (which we just found), or the last lesser key.
else */
{ if (srchtype != BT_DESCENT)
if (low == high) return low; /* return first greater key */
return OffsetNumberNext(low);
result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
if (result <= 0)
return high;
else
return OffsetNumberNext(high);
}
} }
}
static OffsetNumber
_bt_firsteq(Relation rel,
TupleDesc itupdesc,
Page page,
Size keysz,
ScanKey scankey,
OffsetNumber offnum)
{
BTPageOpaque opaque;
OffsetNumber limit;
opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (low == (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY))
return low; /* there is no prior item */
/* skip the high key, if any */ return OffsetNumberPrev(low);
limit = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
/* walk backwards looking for the first key in the chain of duplicates */
while (offnum > limit
&& _bt_compare(rel, itupdesc, page,
keysz, scankey, OffsetNumberPrev(offnum)) == 0)
offnum = OffsetNumberPrev(offnum);
return offnum;
} }
/* /*
@ -571,7 +535,6 @@ _bt_compare(Relation rel,
{ {
Datum datum; Datum datum;
BTItem btitem; BTItem btitem;
ItemId itemid;
IndexTuple itup; IndexTuple itup;
BTPageOpaque opaque; BTPageOpaque opaque;
ScanKey entry; ScanKey entry;
@ -589,12 +552,11 @@ _bt_compare(Relation rel,
*/ */
opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page);
if (!(opaque->btpo_flags & BTP_LEAF) if (!(opaque->btpo_flags & BTP_LEAF)
&& P_LEFTMOST(opaque) && P_LEFTMOST(opaque)
&& offnum == P_HIKEY) && offnum == P_HIKEY)
{ {
itemid = PageGetItemId(page, offnum);
/* /*
* we just have to believe that this will only be called with * we just have to believe that this will only be called with
* offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the first * offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the first
@ -621,7 +583,7 @@ _bt_compare(Relation rel,
* on the page is greater than anything. * on the page is greater than anything.
*/ */
if (_bt_skeycmp(rel, keysz, scankey, page, itemid, if (_bt_skeycmp(rel, keysz, scankey, page, PageGetItemId(page, offnum),
BTEqualStrategyNumber)) BTEqualStrategyNumber))
return 0; return 0;
return 1; return 1;