Revise _bt_binsrch() so that its binary search loop takes
care of equal-key cases, eliminating bt_firsteq(). The linear search formerly done by bt_firsteq() took a lot of time in the case where many equal keys appear on the same page.
This commit is contained in:
parent
9679cb3910
commit
bc9236bc01
|
@ -7,7 +7,7 @@
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.50 1999/07/16 04:58:30 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.51 1999/07/16 22:17:06 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -26,8 +26,6 @@
|
||||||
|
|
||||||
static BTStack _bt_searchr(Relation rel, int keysz, ScanKey scankey,
|
static BTStack _bt_searchr(Relation rel, int keysz, ScanKey scankey,
|
||||||
Buffer *bufP, BTStack stack_in);
|
Buffer *bufP, BTStack stack_in);
|
||||||
static OffsetNumber _bt_firsteq(Relation rel, TupleDesc itupdesc, Page page,
|
|
||||||
Size keysz, ScanKey scankey, OffsetNumber offnum);
|
|
||||||
static int _bt_compare(Relation rel, TupleDesc itupdesc, Page page,
|
static int _bt_compare(Relation rel, TupleDesc itupdesc, Page page,
|
||||||
int keysz, ScanKey scankey, OffsetNumber offnum);
|
int keysz, ScanKey scankey, OffsetNumber offnum);
|
||||||
static bool
|
static bool
|
||||||
|
@ -368,7 +366,9 @@ _bt_skeycmp(Relation rel,
|
||||||
* comparison for every key in the scankey. _bt_binsrch() returns
|
* comparison for every key in the scankey. _bt_binsrch() returns
|
||||||
* the OffsetNumber of the first matching key on the page, or the
|
* the OffsetNumber of the first matching key on the page, or the
|
||||||
* OffsetNumber at which the matching key would appear if it were
|
* OffsetNumber at which the matching key would appear if it were
|
||||||
* on this page.
|
* on this page. (NOTE: in particular, this means it is possible to
|
||||||
|
* return a value 1 greater than the number of keys on the page, if
|
||||||
|
* the scankey is > all keys on the page.)
|
||||||
*
|
*
|
||||||
* By the time this procedure is called, we're sure we're looking
|
* By the time this procedure is called, we're sure we're looking
|
||||||
* at the right page -- don't need to walk right. _bt_binsrch() has
|
* at the right page -- don't need to walk right. _bt_binsrch() has
|
||||||
|
@ -385,8 +385,8 @@ _bt_binsrch(Relation rel,
|
||||||
Page page;
|
Page page;
|
||||||
BTPageOpaque opaque;
|
BTPageOpaque opaque;
|
||||||
OffsetNumber low,
|
OffsetNumber low,
|
||||||
mid,
|
|
||||||
high;
|
high;
|
||||||
|
bool haveEq;
|
||||||
int natts = rel->rd_rel->relnatts;
|
int natts = rel->rd_rel->relnatts;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
|
@ -395,148 +395,112 @@ _bt_binsrch(Relation rel,
|
||||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||||
|
|
||||||
/* by convention, item 1 on any non-rightmost page is the high key */
|
/* by convention, item 1 on any non-rightmost page is the high key */
|
||||||
low = mid = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
|
low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
|
||||||
|
|
||||||
high = PageGetMaxOffsetNumber(page);
|
high = PageGetMaxOffsetNumber(page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since for non-rightmost pages, the first item on the page is the
|
* If there are no keys on the page, return the first available slot.
|
||||||
* high key, there are two notions of emptiness. One is if nothing
|
* Note this covers two cases: the page is really empty (no keys),
|
||||||
* appears on the page. The other is if nothing but the high key
|
* or it contains only a high key. The latter case is possible after
|
||||||
* does. The reason we test high <= low, rather than high == low, is
|
* vacuuming.
|
||||||
* that after vacuuming there may be nothing *but* the high key on a
|
|
||||||
* page. In that case, given the scheme above, low = 2 and high = 1.
|
|
||||||
*/
|
*/
|
||||||
|
if (high < low)
|
||||||
if (PageIsEmpty(page))
|
|
||||||
return low;
|
return low;
|
||||||
if ((!P_RIGHTMOST(opaque) && high <= low))
|
|
||||||
{
|
|
||||||
if (high < low ||
|
|
||||||
(srchtype == BT_DESCENT && !(opaque->btpo_flags & BTP_LEAF)))
|
|
||||||
return low;
|
|
||||||
/* It's insertion and high == low == 2 */
|
|
||||||
result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
|
|
||||||
if (result > 0)
|
|
||||||
return OffsetNumberNext(low);
|
|
||||||
return low;
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((high - low) > 1)
|
/*
|
||||||
|
* Binary search to find the first key on the page >= scan key.
|
||||||
|
* Loop invariant: all slots before 'low' are < scan key, all slots
|
||||||
|
* at or after 'high' are >= scan key. Also, haveEq is true if the
|
||||||
|
* tuple at 'high' is == scan key.
|
||||||
|
* We can fall out when high == low.
|
||||||
|
*/
|
||||||
|
high++; /* establish the loop invariant for high */
|
||||||
|
haveEq = false;
|
||||||
|
|
||||||
|
while (high > low)
|
||||||
{
|
{
|
||||||
mid = low + ((high - low) / 2);
|
OffsetNumber mid = low + ((high - low) / 2);
|
||||||
|
/* We have low <= mid < high, so mid points at a real slot */
|
||||||
|
|
||||||
result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid);
|
result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid);
|
||||||
|
|
||||||
if (result > 0)
|
if (result > 0)
|
||||||
low = mid;
|
low = mid + 1;
|
||||||
else if (result < 0)
|
|
||||||
high = mid - 1;
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, mid);
|
high = mid;
|
||||||
|
haveEq = (result == 0);
|
||||||
/*
|
|
||||||
* NOTE for multi-column indices: we may do scan using keys
|
|
||||||
* not for all attrs. But we handle duplicates using all attrs
|
|
||||||
* in _bt_insert/_bt_spool code. And so while searching on
|
|
||||||
* internal pages having number of attrs > keysize we want to
|
|
||||||
* point at the last item < the scankey, not at the first item
|
|
||||||
* = the scankey (!!!), and let _bt_moveright decide later
|
|
||||||
* whether to move right or not (see comments and example
|
|
||||||
* there). Note also that INSERTions are not affected by this
|
|
||||||
* code (natts == keysz). - vadim 04/15/97
|
|
||||||
*/
|
|
||||||
if (natts == keysz || opaque->btpo_flags & BTP_LEAF)
|
|
||||||
return mid;
|
|
||||||
low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
|
|
||||||
if (mid == low)
|
|
||||||
return mid;
|
|
||||||
return OffsetNumberPrev(mid);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*--------------------
|
||||||
* We terminated because the endpoints got too close together. There
|
* At this point we have high == low, but be careful: they could point
|
||||||
* are two cases to take care of.
|
* past the last slot on the page. We also know that haveEq is true
|
||||||
|
* if and only if there is an equal key (in which case high&low point
|
||||||
|
* at the first equal key).
|
||||||
*
|
*
|
||||||
* For non-insertion searches on internal pages, we want to point at the
|
* On a leaf page, we always return the first key >= scan key
|
||||||
* last key <, or first key =, the scankey on the page. This
|
* (which could be the last slot + 1).
|
||||||
* guarantees that we'll descend the tree correctly. (NOTE comments
|
*--------------------
|
||||||
* above for multi-column indices).
|
|
||||||
*
|
|
||||||
* For all other cases, we want to point at the first key >= the scankey
|
|
||||||
* on the page. This guarantees that scans and insertions will happen
|
|
||||||
* correctly.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT)
|
if (opaque->btpo_flags & BTP_LEAF)
|
||||||
{ /* We want the last key <, or first key
|
return low;
|
||||||
* ==, the scan key. */
|
|
||||||
result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
|
|
||||||
|
|
||||||
if (result == 0)
|
/*--------------------
|
||||||
{
|
* On a non-leaf page, there are special cases:
|
||||||
mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, high);
|
*
|
||||||
|
* For an insertion (srchtype != BT_DESCENT and natts == keysz)
|
||||||
|
* always return first key >= scan key (which could be off the end).
|
||||||
|
*
|
||||||
|
* For a standard search (srchtype == BT_DESCENT and natts == keysz)
|
||||||
|
* return the first equal key if one exists, else the last lesser key
|
||||||
|
* if one exists, else the first slot on the page.
|
||||||
|
*
|
||||||
|
* For a partial-match search (srchtype == BT_DESCENT and natts < keysz)
|
||||||
|
* return the last lesser key if one exists, else the first slot.
|
||||||
|
*
|
||||||
|
* Old comments:
|
||||||
|
* For multi-column indices, we may scan using keys
|
||||||
|
* not for all attrs. But we handle duplicates using all attrs
|
||||||
|
* in _bt_insert/_bt_spool code. And so while searching on
|
||||||
|
* internal pages having number of attrs > keysize we want to
|
||||||
|
* point at the last item < the scankey, not at the first item
|
||||||
|
* = the scankey (!!!), and let _bt_moveright decide later
|
||||||
|
* whether to move right or not (see comments and example
|
||||||
|
* there). Note also that INSERTions are not affected by this
|
||||||
|
* code (since natts == keysz for inserts). - vadim 04/15/97
|
||||||
|
*--------------------
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
if (haveEq)
|
||||||
* If natts > keysz we want last item < the scan key. See
|
{
|
||||||
* comments above for multi-column indices.
|
/*
|
||||||
*/
|
* There is an equal key. We return either the first equal key
|
||||||
if (natts == keysz)
|
* (which we just found), or the last lesser key.
|
||||||
return mid;
|
*
|
||||||
low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
|
* We need not check srchtype != BT_DESCENT here, since if that
|
||||||
if (mid == low)
|
* is true then natts == keysz by assumption.
|
||||||
return mid;
|
*/
|
||||||
return OffsetNumberPrev(mid);
|
if (natts == keysz)
|
||||||
}
|
return low; /* return first equal key */
|
||||||
else if (result > 0)
|
|
||||||
return high;
|
|
||||||
else
|
|
||||||
return low;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
/* we want the first key >= the scan key */
|
|
||||||
{
|
{
|
||||||
result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
|
/*
|
||||||
if (result <= 0)
|
* There is no equal key. We return either the first greater key
|
||||||
return low;
|
* (which we just found), or the last lesser key.
|
||||||
else
|
*/
|
||||||
{
|
if (srchtype != BT_DESCENT)
|
||||||
if (low == high)
|
return low; /* return first greater key */
|
||||||
return OffsetNumberNext(low);
|
|
||||||
|
|
||||||
result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
|
|
||||||
if (result <= 0)
|
|
||||||
return high;
|
|
||||||
else
|
|
||||||
return OffsetNumberNext(high);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
static OffsetNumber
|
|
||||||
_bt_firsteq(Relation rel,
|
|
||||||
TupleDesc itupdesc,
|
|
||||||
Page page,
|
|
||||||
Size keysz,
|
|
||||||
ScanKey scankey,
|
|
||||||
OffsetNumber offnum)
|
|
||||||
{
|
|
||||||
BTPageOpaque opaque;
|
|
||||||
OffsetNumber limit;
|
|
||||||
|
|
||||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
if (low == (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY))
|
||||||
|
return low; /* there is no prior item */
|
||||||
|
|
||||||
/* skip the high key, if any */
|
return OffsetNumberPrev(low);
|
||||||
limit = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
|
|
||||||
|
|
||||||
/* walk backwards looking for the first key in the chain of duplicates */
|
|
||||||
while (offnum > limit
|
|
||||||
&& _bt_compare(rel, itupdesc, page,
|
|
||||||
keysz, scankey, OffsetNumberPrev(offnum)) == 0)
|
|
||||||
offnum = OffsetNumberPrev(offnum);
|
|
||||||
|
|
||||||
return offnum;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -571,7 +535,6 @@ _bt_compare(Relation rel,
|
||||||
{
|
{
|
||||||
Datum datum;
|
Datum datum;
|
||||||
BTItem btitem;
|
BTItem btitem;
|
||||||
ItemId itemid;
|
|
||||||
IndexTuple itup;
|
IndexTuple itup;
|
||||||
BTPageOpaque opaque;
|
BTPageOpaque opaque;
|
||||||
ScanKey entry;
|
ScanKey entry;
|
||||||
|
@ -589,12 +552,11 @@ _bt_compare(Relation rel,
|
||||||
*/
|
*/
|
||||||
|
|
||||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||||
|
|
||||||
if (!(opaque->btpo_flags & BTP_LEAF)
|
if (!(opaque->btpo_flags & BTP_LEAF)
|
||||||
&& P_LEFTMOST(opaque)
|
&& P_LEFTMOST(opaque)
|
||||||
&& offnum == P_HIKEY)
|
&& offnum == P_HIKEY)
|
||||||
{
|
{
|
||||||
itemid = PageGetItemId(page, offnum);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we just have to believe that this will only be called with
|
* we just have to believe that this will only be called with
|
||||||
* offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the first
|
* offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the first
|
||||||
|
@ -621,7 +583,7 @@ _bt_compare(Relation rel,
|
||||||
* on the page is greater than anything.
|
* on the page is greater than anything.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
|
if (_bt_skeycmp(rel, keysz, scankey, page, PageGetItemId(page, offnum),
|
||||||
BTEqualStrategyNumber))
|
BTEqualStrategyNumber))
|
||||||
return 0;
|
return 0;
|
||||||
return 1;
|
return 1;
|
||||||
|
|
Loading…
Reference in New Issue