postgresql/src/backend/access/nbtree/nbtsort.c

822 lines
24 KiB
C

/*-------------------------------------------------------------------------
*
* nbtsort.c
* Build a btree from sorted input by loading leaf pages sequentially.
*
* NOTES
*
* We use tuplesort.c to sort the given index tuples into order.
* Then we scan the index tuples in order and build the btree pages
* for each level. We load source tuples into leaf-level pages.
* Whenever we fill a page at one level, we add a link to it to its
* parent level (starting a new parent level if necessary). When
* done, we write out each final page on each level, adding it to
* its parent level. When we have only one page on a level, it must be
* the root -- it can be attached to the btree metapage and we are done.
*
* This code is moderately slow (~10% slower) compared to the regular
* btree (insertion) build code on sorted or well-clustered data. On
* random data, however, the insertion build code is unusable -- the
* difference on a 60MB heap is a factor of 15 because the random
* probes into the btree thrash the buffer pool. (NOTE: the above
* "10%" estimate is probably obsolete, since it refers to an old and
* not very good external sort implementation that used to exist in
* this module. tuplesort.c is almost certainly faster.)
*
* It is not wise to pack the pages entirely full, since then *any*
* insertion would cause a split (and not only of the leaf page; the need
* for a split would cascade right up the tree). The steady-state load
* factor for btrees is usually estimated at 70%. We choose to pack leaf
* pages to 90% and upper pages to 70%. This gives us reasonable density
* (there aren't many upper pages if the keys are reasonable-size) without
* incurring a lot of cascading splits during early insertions.
*
* Formerly the index pages being built were kept in shared buffers, but
* that is of no value (since other backends have no interest in them yet)
* and it created locking problems for CHECKPOINT, because the upper-level
* pages were held exclusive-locked for long periods. Now we just build
* the pages in local memory and smgrwrite() them as we finish them. They
* will need to be re-read into shared buffers on first use after the build
* finishes.
*
* Since the index will never be used unless it is completely built,
* from a crash-recovery point of view there is no need to WAL-log the
* steps of the build. After completing the index build, we can just sync
* the whole file to disk using smgrimmedsync() before exiting this module.
* This can be seen to be sufficient for crash recovery by considering that
* it's effectively equivalent to what would happen if a CHECKPOINT occurred
* just after the index build. However, it is clearly not sufficient if the
* DBA is using the WAL log for PITR or replication purposes, since another
* machine would not be able to reconstruct the index from WAL. Therefore,
* we log the completed index pages to WAL if and only if WAL archiving is
* active.
*
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.90 2004/12/31 21:59:22 pgsql Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/nbtree.h"
#include "access/xlog.h"
#include "miscadmin.h"
#include "storage/smgr.h"
#include "utils/tuplesort.h"
/*
* Status record for spooling/sorting phase. (Note we may have two of
* these due to the special requirements for uniqueness-checking with
* dead tuples.)
*/
struct BTSpool
{
Tuplesortstate *sortstate; /* state data for tuplesort.c */
Relation index;
bool isunique;
};
/*
* Status record for a btree page being built. We have one of these
* for each active tree level.
*
* The reason we need to store a copy of the minimum key is that we'll
* need to propagate it to the parent node when this page is linked
* into its parent. However, if the page is not a leaf page, the first
* entry on the page doesn't need to contain a key, so we will not have
* stored the key itself on the page. (You might think we could skip
* copying the minimum key on leaf pages, but actually we must have a
* writable copy anyway because we'll poke the page's address into it
* before passing it up to the parent...)
*/
typedef struct BTPageState
{
Page btps_page; /* workspace for page building */
BlockNumber btps_blkno; /* block # to write this page at */
BTItem btps_minkey; /* copy of minimum key (first item) on
* page */
OffsetNumber btps_lastoff; /* last item offset loaded */
uint32 btps_level; /* tree level (0 = leaf) */
Size btps_full; /* "full" if less than this much free
* space */
struct BTPageState *btps_next; /* link to parent level, if any */
} BTPageState;
/*
* Overall status record for index writing phase.
*/
typedef struct BTWriteState
{
Relation index;
bool btws_use_wal; /* dump pages to WAL? */
BlockNumber btws_pages_alloced; /* # pages allocated */
BlockNumber btws_pages_written; /* # pages written out */
Page btws_zeropage; /* workspace for filling zeroes */
} BTWriteState;
#define BTITEMSZ(btitem) \
((btitem) ? \
(IndexTupleDSize((btitem)->bti_itup) + \
(sizeof(BTItemData) - sizeof(IndexTupleData))) : \
0)
static Page _bt_blnewpage(uint32 level);
static BTPageState *_bt_pagestate(BTWriteState *wstate, uint32 level);
static void _bt_slideleft(Page page);
static void _bt_sortaddtup(Page page, Size itemsize,
BTItem btitem, OffsetNumber itup_off);
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti);
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state);
static void _bt_load(BTWriteState *wstate,
BTSpool *btspool, BTSpool *btspool2);
/*
* Interface routines
*/
/*
* create and initialize a spool structure
*/
BTSpool *
_bt_spoolinit(Relation index, bool isunique, bool isdead)
{
BTSpool *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
int btKbytes;
btspool->index = index;
btspool->isunique = isunique;
/*
* We size the sort area as maintenance_work_mem rather than work_mem
* to speed index creation. This should be OK since a single backend
* can't run multiple index creations in parallel. Note that creation
* of a unique index actually requires two BTSpool objects. We expect
* that the second one (for dead tuples) won't get very full, so we
* give it only work_mem.
*/
btKbytes = isdead ? work_mem : maintenance_work_mem;
btspool->sortstate = tuplesort_begin_index(index, isunique,
btKbytes, false);
/*
* Currently, tuplesort provides sort functions on IndexTuples. If we
* kept anything in a BTItem other than a regular IndexTuple, we'd
* need to modify tuplesort to understand BTItems as such.
*/
Assert(sizeof(BTItemData) == sizeof(IndexTupleData));
return btspool;
}
/*
* clean up a spool structure and its substructures.
*/
void
_bt_spooldestroy(BTSpool *btspool)
{
tuplesort_end(btspool->sortstate);
pfree(btspool);
}
/*
* spool a btitem into the sort file.
*/
void
_bt_spool(BTItem btitem, BTSpool *btspool)
{
/* A BTItem is really just an IndexTuple */
tuplesort_puttuple(btspool->sortstate, (void *) btitem);
}
/*
* given a spool loaded by successive calls to _bt_spool,
* create an entire btree.
*/
void
_bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
{
BTWriteState wstate;
#ifdef BTREE_BUILD_STATS
if (log_btree_build_stats)
{
ShowUsage("BTREE BUILD (Spool) STATISTICS");
ResetUsage();
}
#endif /* BTREE_BUILD_STATS */
tuplesort_performsort(btspool->sortstate);
if (btspool2)
tuplesort_performsort(btspool2->sortstate);
wstate.index = btspool->index;
/*
* We need to log index creation in WAL iff WAL archiving is enabled
* AND it's not a temp index.
*/
wstate.btws_use_wal = XLogArchivingActive() && !wstate.index->rd_istemp;
/* reserve the metapage */
wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
wstate.btws_pages_written = 0;
wstate.btws_zeropage = NULL; /* until needed */
_bt_load(&wstate, btspool, btspool2);
}
/*
* Internal routines.
*/
/*
* allocate workspace for a new, clean btree page, not linked to any siblings.
*/
static Page
_bt_blnewpage(uint32 level)
{
Page page;
BTPageOpaque opaque;
page = (Page) palloc(BLCKSZ);
/* Zero the page and set up standard page header info */
_bt_pageinit(page, BLCKSZ);
/* Initialize BT opaque state */
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
opaque->btpo_prev = opaque->btpo_next = P_NONE;
opaque->btpo.level = level;
opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
/* Make the P_HIKEY line pointer appear allocated */
((PageHeader) page)->pd_lower += sizeof(ItemIdData);
return page;
}
/*
* emit a completed btree page, and release the working storage.
*/
static void
_bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
{
/* XLOG stuff */
if (wstate->btws_use_wal)
{
/* We use the heap NEWPAGE record type for this */
xl_heap_newpage xlrec;
XLogRecPtr recptr;
XLogRecData rdata[2];
/* NO ELOG(ERROR) from here till newpage op is logged */
START_CRIT_SECTION();
xlrec.node = wstate->index->rd_node;
xlrec.blkno = blkno;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapNewpage;
rdata[0].next = &(rdata[1]);
rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) page;
rdata[1].len = BLCKSZ;
rdata[1].next = NULL;
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION();
}
else
{
/* Leave the page LSN zero if not WAL-logged, but set TLI anyway */
PageSetTLI(page, ThisTimeLineID);
}
/*
* If we have to write pages nonsequentially, fill in the space with
* zeroes until we come back and overwrite. This is not logically
* necessary on standard Unix filesystems (unwritten space will read
* as zeroes anyway), but it should help to avoid fragmentation. The
* dummy pages aren't WAL-logged though.
*/
while (blkno > wstate->btws_pages_written)
{
if (!wstate->btws_zeropage)
wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
smgrwrite(wstate->index->rd_smgr, wstate->btws_pages_written++,
(char *) wstate->btws_zeropage,
true);
}
/*
* Now write the page. We say isTemp = true even if it's not a temp
* index, because there's no need for smgr to schedule an fsync for
* this write; we'll do it ourselves before ending the build.
*/
smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
if (blkno == wstate->btws_pages_written)
wstate->btws_pages_written++;
pfree(page);
}
/*
* allocate and initialize a new BTPageState. the returned structure
* is suitable for immediate use by _bt_buildadd.
*/
static BTPageState *
_bt_pagestate(BTWriteState *wstate, uint32 level)
{
BTPageState *state = (BTPageState *) palloc0(sizeof(BTPageState));
/* create initial page for level */
state->btps_page = _bt_blnewpage(level);
/* and assign it a page position */
state->btps_blkno = wstate->btws_pages_alloced++;
state->btps_minkey = NULL;
/* initialize lastoff so first item goes into P_FIRSTKEY */
state->btps_lastoff = P_HIKEY;
state->btps_level = level;
/* set "full" threshold based on level. See notes at head of file. */
if (level > 0)
state->btps_full = (PageGetPageSize(state->btps_page) * 3) / 10;
else
state->btps_full = PageGetPageSize(state->btps_page) / 10;
/* no parent level, yet */
state->btps_next = NULL;
return state;
}
/*
* slide an array of ItemIds back one slot (from P_FIRSTKEY to
* P_HIKEY, overwriting P_HIKEY). we need to do this when we discover
* that we have built an ItemId array in what has turned out to be a
* P_RIGHTMOST page.
*/
static void
_bt_slideleft(Page page)
{
OffsetNumber off;
OffsetNumber maxoff;
ItemId previi;
ItemId thisii;
if (!PageIsEmpty(page))
{
maxoff = PageGetMaxOffsetNumber(page);
previi = PageGetItemId(page, P_HIKEY);
for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
{
thisii = PageGetItemId(page, off);
*previi = *thisii;
previi = thisii;
}
((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
}
}
/*
* Add an item to a page being built.
*
* The main difference between this routine and a bare PageAddItem call
* is that this code knows that the leftmost data item on a non-leaf
* btree page doesn't need to have a key. Therefore, it strips such
* items down to just the item header.
*
* This is almost like nbtinsert.c's _bt_pgaddtup(), but we can't use
* that because it assumes that P_RIGHTMOST() will return the correct
* answer for the page. Here, we don't know yet if the page will be
* rightmost. Offset P_FIRSTKEY is always the first data key.
*/
static void
_bt_sortaddtup(Page page,
Size itemsize,
BTItem btitem,
OffsetNumber itup_off)
{
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
BTItemData truncitem;
if (!P_ISLEAF(opaque) && itup_off == P_FIRSTKEY)
{
memcpy(&truncitem, btitem, sizeof(BTItemData));
truncitem.bti_itup.t_info = sizeof(BTItemData);
btitem = &truncitem;
itemsize = sizeof(BTItemData);
}
if (PageAddItem(page, (Item) btitem, itemsize, itup_off,
LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to the index page");
}
/*----------
* Add an item to a disk page from the sort output.
*
* We must be careful to observe the page layout conventions of nbtsearch.c:
* - rightmost pages start data items at P_HIKEY instead of at P_FIRSTKEY.
* - on non-leaf pages, the key portion of the first item need not be
* stored, we should store only the link.
*
* A leaf page being built looks like:
*
* +----------------+---------------------------------+
* | PageHeaderData | linp0 linp1 linp2 ... |
* +-----------+----+---------------------------------+
* | ... linpN | |
* +-----------+--------------------------------------+
* | ^ last |
* | |
* +-------------+------------------------------------+
* | | itemN ... |
* +-------------+------------------+-----------------+
* | ... item3 item2 item1 | "special space" |
* +--------------------------------+-----------------+
*
* Contrast this with the diagram in bufpage.h; note the mismatch
* between linps and items. This is because we reserve linp0 as a
* placeholder for the pointer to the "high key" item; when we have
* filled up the page, we will set linp0 to point to itemN and clear
* linpN. On the other hand, if we find this is the last (rightmost)
* page, we leave the items alone and slide the linp array over.
*
* 'last' pointer indicates the last offset added to the page.
*----------
*/
static void
_bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti)
{
Page npage;
BlockNumber nblkno;
OffsetNumber last_off;
Size pgspc;
Size btisz;
npage = state->btps_page;
nblkno = state->btps_blkno;
last_off = state->btps_lastoff;
pgspc = PageGetFreeSpace(npage);
btisz = BTITEMSZ(bti);
btisz = MAXALIGN(btisz);
/*
* Check whether the item can fit on a btree page at all. (Eventually,
* we ought to try to apply TOAST methods if not.) We actually need to
* be able to fit three items on every page, so restrict any one item
* to 1/3 the per-page available space. Note that at this point, btisz
* doesn't include the ItemId.
*
* NOTE: similar code appears in _bt_insertonpg() to defend against
* oversize items being inserted into an already-existing index. But
* during creation of an index, we don't go through there.
*/
if (btisz > BTMaxItemSize(npage))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("index row size %lu exceeds btree maximum, %lu",
(unsigned long) btisz,
(unsigned long) BTMaxItemSize(npage))));
if (pgspc < btisz || pgspc < state->btps_full)
{
/*
* Item won't fit on this page, or we feel the page is full enough
* already. Finish off the page and write it out.
*/
Page opage = npage;
BlockNumber oblkno = nblkno;
ItemId ii;
ItemId hii;
BTItem obti;
/* Create new page of same level */
npage = _bt_blnewpage(state->btps_level);
/* and assign it a page position */
nblkno = wstate->btws_pages_alloced++;
/*
* We copy the last item on the page into the new page, and then
* rearrange the old page so that the 'last item' becomes its high
* key rather than a true data item. There had better be at least
* two items on the page already, else the page would be empty of
* useful data. (Hence, we must allow pages to be packed at least
* 2/3rds full; the 70% figure used above is close to minimum.)
*/
Assert(last_off > P_FIRSTKEY);
ii = PageGetItemId(opage, last_off);
obti = (BTItem) PageGetItem(opage, ii);
_bt_sortaddtup(npage, ItemIdGetLength(ii), obti, P_FIRSTKEY);
/*
* Move 'last' into the high key position on opage
*/
hii = PageGetItemId(opage, P_HIKEY);
*hii = *ii;
ii->lp_flags &= ~LP_USED;
((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
/*
* Link the old page into its parent, using its minimum key. If we
* don't have a parent, we have to create one; this adds a new
* btree level.
*/
if (state->btps_next == NULL)
state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
Assert(state->btps_minkey != NULL);
ItemPointerSet(&(state->btps_minkey->bti_itup.t_tid),
oblkno, P_HIKEY);
_bt_buildadd(wstate, state->btps_next, state->btps_minkey);
pfree(state->btps_minkey);
/*
* Save a copy of the minimum key for the new page. We have to
* copy it off the old page, not the new one, in case we are not
* at leaf level.
*/
state->btps_minkey = _bt_formitem(&(obti->bti_itup));
/*
* Set the sibling links for both pages.
*/
{
BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage);
BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage);
oopaque->btpo_next = nblkno;
nopaque->btpo_prev = oblkno;
nopaque->btpo_next = P_NONE; /* redundant */
}
/*
* Write out the old page. We never need to touch it again, so we
* can free the opage workspace too.
*/
_bt_blwritepage(wstate, opage, oblkno);
/*
* Reset last_off to point to new page
*/
last_off = P_FIRSTKEY;
}
/*
* If the new item is the first for its page, stash a copy for later.
* Note this will only happen for the first item on a level; on later
* pages, the first item for a page is copied from the prior page in
* the code above.
*/
if (last_off == P_HIKEY)
{
Assert(state->btps_minkey == NULL);
state->btps_minkey = _bt_formitem(&(bti->bti_itup));
}
/*
* Add the new item into the current page.
*/
last_off = OffsetNumberNext(last_off);
_bt_sortaddtup(npage, btisz, bti, last_off);
state->btps_page = npage;
state->btps_blkno = nblkno;
state->btps_lastoff = last_off;
}
/*
* Finish writing out the completed btree.
*/
static void
_bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
{
BTPageState *s;
BlockNumber rootblkno = P_NONE;
uint32 rootlevel = 0;
Page metapage;
/*
* Each iteration of this loop completes one more level of the tree.
*/
for (s = state; s != NULL; s = s->btps_next)
{
BlockNumber blkno;
BTPageOpaque opaque;
blkno = s->btps_blkno;
opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page);
/*
* We have to link the last page on this level to somewhere.
*
* If we're at the top, it's the root, so attach it to the metapage.
* Otherwise, add an entry for it to its parent using its minimum
* key. This may cause the last page of the parent level to
* split, but that's not a problem -- we haven't gotten to it yet.
*/
if (s->btps_next == NULL)
{
opaque->btpo_flags |= BTP_ROOT;
rootblkno = blkno;
rootlevel = s->btps_level;
}
else
{
Assert(s->btps_minkey != NULL);
ItemPointerSet(&(s->btps_minkey->bti_itup.t_tid),
blkno, P_HIKEY);
_bt_buildadd(wstate, s->btps_next, s->btps_minkey);
pfree(s->btps_minkey);
s->btps_minkey = NULL;
}
/*
* This is the rightmost page, so the ItemId array needs to be
* slid back one slot. Then we can dump out the page.
*/
_bt_slideleft(s->btps_page);
_bt_blwritepage(wstate, s->btps_page, s->btps_blkno);
s->btps_page = NULL; /* writepage freed the workspace */
}
/*
* As the last step in the process, construct the metapage and make it
* point to the new root (unless we had no data at all, in which case
* it's set to point to "P_NONE"). This changes the index to the
* "valid" state by filling in a valid magic number in the metapage.
*/
metapage = (Page) palloc(BLCKSZ);
_bt_initmetapage(metapage, rootblkno, rootlevel);
_bt_blwritepage(wstate, metapage, BTREE_METAPAGE);
}
/*
* Read tuples in correct sort order from tuplesort, and load them into
* btree leaves.
*/
static void
_bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
{
BTPageState *state = NULL;
bool merge = (btspool2 != NULL);
BTItem bti,
bti2 = NULL;
bool should_free,
should_free2,
load1;
TupleDesc tupdes = RelationGetDescr(wstate->index);
int i,
keysz = RelationGetNumberOfAttributes(wstate->index);
ScanKey indexScanKey = NULL;
if (merge)
{
/*
* Another BTSpool for dead tuples exists. Now we have to merge
* btspool and btspool2.
*/
/* the preparation of merge */
bti = (BTItem) tuplesort_getindextuple(btspool->sortstate,
true, &should_free);
bti2 = (BTItem) tuplesort_getindextuple(btspool2->sortstate,
true, &should_free2);
indexScanKey = _bt_mkscankey_nodata(wstate->index);
for (;;)
{
load1 = true; /* load BTSpool next ? */
if (bti2 == NULL)
{
if (bti == NULL)
break;
}
else if (bti != NULL)
{
for (i = 1; i <= keysz; i++)
{
ScanKey entry;
Datum attrDatum1,
attrDatum2;
bool isFirstNull,
isSecondNull;
entry = indexScanKey + i - 1;
attrDatum1 = index_getattr((IndexTuple) bti, i, tupdes,
&isFirstNull);
attrDatum2 = index_getattr((IndexTuple) bti2, i, tupdes,
&isSecondNull);
if (isFirstNull)
{
if (!isSecondNull)
{
load1 = false;
break;
}
}
else if (isSecondNull)
break;
else
{
int32 compare;
compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
attrDatum1,
attrDatum2));
if (compare > 0)
{
load1 = false;
break;
}
else if (compare < 0)
break;
}
}
}
else
load1 = false;
/* When we see first tuple, create first index page */
if (state == NULL)
state = _bt_pagestate(wstate, 0);
if (load1)
{
_bt_buildadd(wstate, state, bti);
if (should_free)
pfree(bti);
bti = (BTItem) tuplesort_getindextuple(btspool->sortstate,
true, &should_free);
}
else
{
_bt_buildadd(wstate, state, bti2);
if (should_free2)
pfree(bti2);
bti2 = (BTItem) tuplesort_getindextuple(btspool2->sortstate,
true, &should_free2);
}
}
_bt_freeskey(indexScanKey);
}
else
{
/* merge is unnecessary */
while ((bti = (BTItem) tuplesort_getindextuple(btspool->sortstate,
true, &should_free)) != NULL)
{
/* When we see first tuple, create first index page */
if (state == NULL)
state = _bt_pagestate(wstate, 0);
_bt_buildadd(wstate, state, bti);
if (should_free)
pfree(bti);
}
}
/* Close down final pages and write the metapage */
_bt_uppershutdown(wstate, state);
/*
* If the index isn't temp, we must fsync it down to disk before it's
* safe to commit the transaction. (For a temp index we don't care
* since the index will be uninteresting after a crash anyway.)
*
* It's obvious that we must do this when not WAL-logging the build. It's
* less obvious that we have to do it even if we did WAL-log the index
* pages. The reason is that since we're building outside shared
* buffers, a CHECKPOINT occurring during the build has no way to
* flush the previously written data to disk (indeed it won't know the
* index even exists). A crash later on would replay WAL from the
* checkpoint, therefore it wouldn't replay our earlier WAL entries.
* If we do not fsync those pages here, they might still not be on
* disk when the crash occurs.
*/
if (!wstate->index->rd_istemp)
smgrimmedsync(wstate->index->rd_smgr);
}