2016-12-02 18:03:30 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* freepage.c
|
|
|
|
* Management of free memory pages.
|
|
|
|
*
|
|
|
|
* The intention of this code is to provide infrastructure for memory
|
|
|
|
* allocators written specifically for PostgreSQL. At least in the case
|
|
|
|
* of dynamic shared memory, we can't simply use malloc() or even
|
|
|
|
* relatively thin wrappers like palloc() which sit on top of it, because
|
|
|
|
* no allocator built into the operating system will deal with relative
|
|
|
|
* pointers. In the future, we may find other cases in which greater
|
|
|
|
* control over our own memory management seems desirable.
|
|
|
|
*
|
|
|
|
* A FreePageManager keeps track of which 4kB pages of memory are currently
|
|
|
|
* unused from the point of view of some higher-level memory allocator.
|
|
|
|
* Unlike a user-facing allocator such as palloc(), a FreePageManager can
|
|
|
|
* only allocate and free in units of whole pages, and freeing an
|
|
|
|
* allocation can only be done given knowledge of its length in pages.
|
|
|
|
*
|
|
|
|
* Since a free page manager has only a fixed amount of dedicated memory,
|
|
|
|
* and since there is no underlying allocator, it uses the free pages
|
|
|
|
* it is given to manage to store its bookkeeping data. It keeps multiple
|
|
|
|
* freelists of runs of pages, sorted by the size of the run; the head of
|
|
|
|
* each freelist is stored in the FreePageManager itself, and the first
|
|
|
|
* page of each run contains a relative pointer to the next run. See
|
|
|
|
* FreePageManagerGetInternal for more details on how the freelists are
|
|
|
|
* managed.
|
|
|
|
*
|
|
|
|
* To avoid memory fragmentation, it's important to consolidate adjacent
|
|
|
|
* spans of pages whenever possible; otherwise, large allocation requests
|
|
|
|
* might not be satisfied even when sufficient contiguous space is
|
|
|
|
* available. Therefore, in addition to the freelists, we maintain an
|
|
|
|
* in-memory btree of free page ranges ordered by page number. If a
|
|
|
|
* range being freed precedes or follows a range that is already free,
|
|
|
|
* the existing range is extended; if it exactly bridges the gap between
|
|
|
|
* free ranges, then the two existing ranges are consolidated with the
|
|
|
|
* newly-freed range to form one great big range of free pages.
|
|
|
|
*
|
|
|
|
* When there is only one range of free pages, the btree is trivial and
|
|
|
|
* is stored within the FreePageManager proper; otherwise, pages are
|
|
|
|
* allocated from the area under management as needed. Even in cases
|
|
|
|
* where memory fragmentation is very severe, only a tiny fraction of
|
|
|
|
* the pages under management are consumed by this btree.
|
|
|
|
*
|
2021-01-02 19:06:25 +01:00
|
|
|
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
2016-12-02 18:03:30 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/utils/mmgr/freepage.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "lib/stringinfo.h"
|
|
|
|
#include "miscadmin.h"
|
|
|
|
|
|
|
|
#include "utils/freepage.h"
|
|
|
|
#include "utils/relptr.h"
|
|
|
|
|
|
|
|
|
|
|
|
/* Magic numbers to identify various page types */
|
|
|
|
#define FREE_PAGE_SPAN_LEADER_MAGIC 0xea4020f0
|
|
|
|
#define FREE_PAGE_LEAF_MAGIC 0x98eae728
|
|
|
|
#define FREE_PAGE_INTERNAL_MAGIC 0x19aa32c9
|
|
|
|
|
|
|
|
/* Doubly linked list of spans of free pages; stored in first page of span. */
|
|
|
|
struct FreePageSpanLeader
|
|
|
|
{
|
|
|
|
int magic; /* always FREE_PAGE_SPAN_LEADER_MAGIC */
|
|
|
|
Size npages; /* number of pages in span */
|
|
|
|
RelptrFreePageSpanLeader prev;
|
|
|
|
RelptrFreePageSpanLeader next;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Common header for btree leaf and internal pages. */
|
|
|
|
typedef struct FreePageBtreeHeader
|
|
|
|
{
|
|
|
|
int magic; /* FREE_PAGE_LEAF_MAGIC or
|
|
|
|
* FREE_PAGE_INTERNAL_MAGIC */
|
|
|
|
Size nused; /* number of items used */
|
|
|
|
RelptrFreePageBtree parent; /* uplink */
|
|
|
|
} FreePageBtreeHeader;
|
|
|
|
|
|
|
|
/* Internal key; points to next level of btree. */
|
|
|
|
typedef struct FreePageBtreeInternalKey
|
|
|
|
{
|
|
|
|
Size first_page; /* low bound for keys on child page */
|
|
|
|
RelptrFreePageBtree child; /* downlink */
|
|
|
|
} FreePageBtreeInternalKey;
|
|
|
|
|
|
|
|
/* Leaf key; no payload data. */
|
|
|
|
typedef struct FreePageBtreeLeafKey
|
|
|
|
{
|
|
|
|
Size first_page; /* first page in span */
|
|
|
|
Size npages; /* number of pages in span */
|
|
|
|
} FreePageBtreeLeafKey;
|
|
|
|
|
|
|
|
/* Work out how many keys will fit on a page. */
|
|
|
|
#define FPM_ITEMS_PER_INTERNAL_PAGE \
|
|
|
|
((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
|
|
|
|
sizeof(FreePageBtreeInternalKey))
|
|
|
|
#define FPM_ITEMS_PER_LEAF_PAGE \
|
|
|
|
((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
|
|
|
|
sizeof(FreePageBtreeLeafKey))
|
|
|
|
|
|
|
|
/* A btree page of either sort */
|
|
|
|
struct FreePageBtree
|
|
|
|
{
|
|
|
|
FreePageBtreeHeader hdr;
|
|
|
|
union
|
|
|
|
{
|
|
|
|
FreePageBtreeInternalKey internal_key[FPM_ITEMS_PER_INTERNAL_PAGE];
|
|
|
|
FreePageBtreeLeafKey leaf_key[FPM_ITEMS_PER_LEAF_PAGE];
|
|
|
|
} u;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Results of a btree search */
|
|
|
|
typedef struct FreePageBtreeSearchResult
|
|
|
|
{
|
|
|
|
FreePageBtree *page;
|
|
|
|
Size index;
|
|
|
|
bool found;
|
|
|
|
unsigned split_pages;
|
|
|
|
} FreePageBtreeSearchResult;
|
|
|
|
|
|
|
|
/* Helper functions */
|
|
|
|
static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
|
|
|
|
FreePageBtree *btp);
|
|
|
|
static Size FreePageBtreeCleanup(FreePageManager *fpm);
|
|
|
|
static FreePageBtree *FreePageBtreeFindLeftSibling(char *base,
|
|
|
|
FreePageBtree *btp);
|
|
|
|
static FreePageBtree *FreePageBtreeFindRightSibling(char *base,
|
|
|
|
FreePageBtree *btp);
|
|
|
|
static Size FreePageBtreeFirstKey(FreePageBtree *btp);
|
|
|
|
static FreePageBtree *FreePageBtreeGetRecycled(FreePageManager *fpm);
|
|
|
|
static void FreePageBtreeInsertInternal(char *base, FreePageBtree *btp,
|
|
|
|
Size index, Size first_page, FreePageBtree *child);
|
|
|
|
static void FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index,
|
|
|
|
Size first_page, Size npages);
|
|
|
|
static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
|
|
|
|
static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
|
|
|
|
Size index);
|
|
|
|
static void FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp);
|
|
|
|
static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
|
|
|
|
FreePageBtreeSearchResult *result);
|
|
|
|
static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
|
|
|
|
static Size FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page);
|
|
|
|
static FreePageBtree *FreePageBtreeSplitPage(FreePageManager *fpm,
|
|
|
|
FreePageBtree *btp);
|
|
|
|
static void FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp);
|
|
|
|
static void FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
|
|
|
|
FreePageBtree *parent, int level, StringInfo buf);
|
|
|
|
static void FreePageManagerDumpSpans(FreePageManager *fpm,
|
|
|
|
FreePageSpanLeader *span, Size expected_pages,
|
|
|
|
StringInfo buf);
|
|
|
|
static bool FreePageManagerGetInternal(FreePageManager *fpm, Size npages,
|
|
|
|
Size *first_page);
|
|
|
|
static Size FreePageManagerPutInternal(FreePageManager *fpm, Size first_page,
|
|
|
|
Size npages, bool soft);
|
|
|
|
static void FreePagePopSpanLeader(FreePageManager *fpm, Size pageno);
|
|
|
|
static void FreePagePushSpanLeader(FreePageManager *fpm, Size first_page,
|
|
|
|
Size npages);
|
|
|
|
static Size FreePageManagerLargestContiguous(FreePageManager *fpm);
|
|
|
|
static void FreePageManagerUpdateLargest(FreePageManager *fpm);
|
|
|
|
|
2019-10-19 18:21:58 +02:00
|
|
|
#ifdef FPM_EXTRA_ASSERTS
|
2016-12-02 18:03:30 +01:00
|
|
|
static Size sum_free_pages(FreePageManager *fpm);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize a new, empty free page manager.
|
|
|
|
*
|
|
|
|
* 'fpm' should reference caller-provided memory large enough to contain a
|
|
|
|
* FreePageManager. We'll initialize it here.
|
|
|
|
*
|
|
|
|
* 'base' is the address to which all pointers are relative. When managing
|
|
|
|
* a dynamic shared memory segment, it should normally be the base of the
|
|
|
|
* segment. When managing backend-private memory, it can be either NULL or,
|
|
|
|
* if managing a single contiguous extent of memory, the start of that extent.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
FreePageManagerInitialize(FreePageManager *fpm, char *base)
|
|
|
|
{
|
|
|
|
Size f;
|
|
|
|
|
|
|
|
relptr_store(base, fpm->self, fpm);
|
|
|
|
relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
|
|
|
|
relptr_store(base, fpm->btree_recycle, (FreePageSpanLeader *) NULL);
|
|
|
|
fpm->btree_depth = 0;
|
|
|
|
fpm->btree_recycle_count = 0;
|
|
|
|
fpm->singleton_first_page = 0;
|
|
|
|
fpm->singleton_npages = 0;
|
|
|
|
fpm->contiguous_pages = 0;
|
|
|
|
fpm->contiguous_pages_dirty = true;
|
|
|
|
#ifdef FPM_EXTRA_ASSERTS
|
|
|
|
fpm->free_pages = 0;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
for (f = 0; f < FPM_NUM_FREELISTS; f++)
|
|
|
|
relptr_store(base, fpm->freelist[f], (FreePageSpanLeader *) NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate a run of pages of the given length from the free page manager.
|
|
|
|
* The return value indicates whether we were able to satisfy the request;
|
|
|
|
* if true, the first page of the allocation is stored in *first_page.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
|
|
|
|
{
|
|
|
|
bool result;
|
|
|
|
Size contiguous_pages;
|
|
|
|
|
|
|
|
result = FreePageManagerGetInternal(fpm, npages, first_page);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It's a bit counterintuitive, but allocating pages can actually create
|
|
|
|
* opportunities for cleanup that create larger ranges. We might pull a
|
|
|
|
* key out of the btree that enables the item at the head of the btree
|
|
|
|
* recycle list to be inserted; and then if there are more items behind it
|
|
|
|
* one of those might cause two currently-separated ranges to merge,
|
|
|
|
* creating a single range of contiguous pages larger than any that
|
|
|
|
* existed previously. It might be worth trying to improve the cleanup
|
|
|
|
* algorithm to avoid such corner cases, but for now we just notice the
|
|
|
|
* condition and do the appropriate reporting.
|
|
|
|
*/
|
|
|
|
contiguous_pages = FreePageBtreeCleanup(fpm);
|
|
|
|
if (fpm->contiguous_pages < contiguous_pages)
|
|
|
|
fpm->contiguous_pages = contiguous_pages;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* FreePageManagerGetInternal may have set contiguous_pages_dirty.
|
2019-07-01 03:00:23 +02:00
|
|
|
* Recompute contiguous_pages if so.
|
2016-12-02 18:03:30 +01:00
|
|
|
*/
|
|
|
|
FreePageManagerUpdateLargest(fpm);
|
|
|
|
|
|
|
|
#ifdef FPM_EXTRA_ASSERTS
|
|
|
|
if (result)
|
|
|
|
{
|
|
|
|
Assert(fpm->free_pages >= npages);
|
|
|
|
fpm->free_pages -= npages;
|
|
|
|
}
|
|
|
|
Assert(fpm->free_pages == sum_free_pages(fpm));
|
|
|
|
Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
|
|
|
|
#endif
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef FPM_EXTRA_ASSERTS
|
|
|
|
static void
|
|
|
|
sum_free_pages_recurse(FreePageManager *fpm, FreePageBtree *btp, Size *sum)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ||
|
|
|
|
btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
|
|
|
|
++*sum;
|
|
|
|
if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
|
|
|
|
{
|
|
|
|
Size index;
|
|
|
|
|
|
|
|
|
|
|
|
for (index = 0; index < btp->hdr.nused; ++index)
|
|
|
|
{
|
|
|
|
FreePageBtree *child;
|
|
|
|
|
|
|
|
child = relptr_access(base, btp->u.internal_key[index].child);
|
|
|
|
sum_free_pages_recurse(fpm, child, sum);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
static Size
|
|
|
|
sum_free_pages(FreePageManager *fpm)
|
|
|
|
{
|
|
|
|
FreePageSpanLeader *recycle;
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
Size sum = 0;
|
|
|
|
int list;
|
|
|
|
|
|
|
|
/* Count the spans by scanning the freelists. */
|
|
|
|
for (list = 0; list < FPM_NUM_FREELISTS; ++list)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (!relptr_is_null(fpm->freelist[list]))
|
|
|
|
{
|
|
|
|
FreePageSpanLeader *candidate =
|
|
|
|
relptr_access(base, fpm->freelist[list]);
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
sum += candidate->npages;
|
|
|
|
candidate = relptr_access(base, candidate->next);
|
|
|
|
} while (candidate != NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Count btree internal pages. */
|
|
|
|
if (fpm->btree_depth > 0)
|
|
|
|
{
|
|
|
|
FreePageBtree *root = relptr_access(base, fpm->btree_root);
|
|
|
|
|
|
|
|
sum_free_pages_recurse(fpm, root, &sum);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Count the recycle list. */
|
|
|
|
for (recycle = relptr_access(base, fpm->btree_recycle);
|
|
|
|
recycle != NULL;
|
|
|
|
recycle = relptr_access(base, recycle->next))
|
|
|
|
{
|
|
|
|
Assert(recycle->npages == 1);
|
|
|
|
++sum;
|
|
|
|
}
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute the size of the largest run of pages that the user could
|
2017-02-06 10:33:58 +01:00
|
|
|
* successfully get.
|
2016-12-02 18:03:30 +01:00
|
|
|
*/
|
|
|
|
static Size
|
|
|
|
FreePageManagerLargestContiguous(FreePageManager *fpm)
|
|
|
|
{
|
|
|
|
char *base;
|
|
|
|
Size largest;
|
|
|
|
|
|
|
|
base = fpm_segment_base(fpm);
|
|
|
|
largest = 0;
|
|
|
|
if (!relptr_is_null(fpm->freelist[FPM_NUM_FREELISTS - 1]))
|
|
|
|
{
|
|
|
|
FreePageSpanLeader *candidate;
|
|
|
|
|
|
|
|
candidate = relptr_access(base, fpm->freelist[FPM_NUM_FREELISTS - 1]);
|
|
|
|
do
|
|
|
|
{
|
|
|
|
if (candidate->npages > largest)
|
|
|
|
largest = candidate->npages;
|
|
|
|
candidate = relptr_access(base, candidate->next);
|
|
|
|
} while (candidate != NULL);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Size f = FPM_NUM_FREELISTS - 1;
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
--f;
|
|
|
|
if (!relptr_is_null(fpm->freelist[f]))
|
|
|
|
{
|
|
|
|
largest = f + 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} while (f > 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
return largest;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Recompute the size of the largest run of pages that the user could
|
2017-02-06 10:33:58 +01:00
|
|
|
* successfully get, if it has been marked dirty.
|
2016-12-02 18:03:30 +01:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageManagerUpdateLargest(FreePageManager *fpm)
|
|
|
|
{
|
|
|
|
if (!fpm->contiguous_pages_dirty)
|
|
|
|
return;
|
|
|
|
|
|
|
|
fpm->contiguous_pages = FreePageManagerLargestContiguous(fpm);
|
|
|
|
fpm->contiguous_pages_dirty = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transfer a run of pages to the free page manager.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
|
|
|
|
{
|
|
|
|
Size contiguous_pages;
|
|
|
|
|
|
|
|
Assert(npages > 0);
|
|
|
|
|
|
|
|
/* Record the new pages. */
|
|
|
|
contiguous_pages =
|
|
|
|
FreePageManagerPutInternal(fpm, first_page, npages, false);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the new range we inserted into the page manager was contiguous with
|
|
|
|
* an existing range, it may have opened up cleanup opportunities.
|
|
|
|
*/
|
|
|
|
if (contiguous_pages > npages)
|
|
|
|
{
|
|
|
|
Size cleanup_contiguous_pages;
|
|
|
|
|
|
|
|
cleanup_contiguous_pages = FreePageBtreeCleanup(fpm);
|
|
|
|
if (cleanup_contiguous_pages > contiguous_pages)
|
|
|
|
contiguous_pages = cleanup_contiguous_pages;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See if we now have a new largest chunk. */
|
|
|
|
if (fpm->contiguous_pages < contiguous_pages)
|
|
|
|
fpm->contiguous_pages = contiguous_pages;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The earlier call to FreePageManagerPutInternal may have set
|
|
|
|
* contiguous_pages_dirty if it needed to allocate internal pages, so
|
|
|
|
* recompute contiguous_pages if necessary.
|
|
|
|
*/
|
|
|
|
FreePageManagerUpdateLargest(fpm);
|
|
|
|
|
|
|
|
#ifdef FPM_EXTRA_ASSERTS
|
|
|
|
fpm->free_pages += npages;
|
|
|
|
Assert(fpm->free_pages == sum_free_pages(fpm));
|
|
|
|
Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Produce a debugging dump of the state of a free page manager.
|
|
|
|
*/
|
|
|
|
char *
|
|
|
|
FreePageManagerDump(FreePageManager *fpm)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
StringInfoData buf;
|
|
|
|
FreePageSpanLeader *recycle;
|
|
|
|
bool dumped_any_freelist = false;
|
|
|
|
Size f;
|
|
|
|
|
|
|
|
/* Initialize output buffer. */
|
|
|
|
initStringInfo(&buf);
|
|
|
|
|
|
|
|
/* Dump general stuff. */
|
|
|
|
appendStringInfo(&buf, "metadata: self %zu max contiguous pages = %zu\n",
|
|
|
|
fpm->self.relptr_off, fpm->contiguous_pages);
|
|
|
|
|
|
|
|
/* Dump btree. */
|
|
|
|
if (fpm->btree_depth > 0)
|
|
|
|
{
|
|
|
|
FreePageBtree *root;
|
|
|
|
|
|
|
|
appendStringInfo(&buf, "btree depth %u:\n", fpm->btree_depth);
|
|
|
|
root = relptr_access(base, fpm->btree_root);
|
|
|
|
FreePageManagerDumpBtree(fpm, root, NULL, 0, &buf);
|
|
|
|
}
|
|
|
|
else if (fpm->singleton_npages > 0)
|
|
|
|
{
|
|
|
|
appendStringInfo(&buf, "singleton: %zu(%zu)\n",
|
|
|
|
fpm->singleton_first_page, fpm->singleton_npages);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Dump btree recycle list. */
|
|
|
|
recycle = relptr_access(base, fpm->btree_recycle);
|
|
|
|
if (recycle != NULL)
|
|
|
|
{
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoString(&buf, "btree recycle:");
|
2016-12-02 18:03:30 +01:00
|
|
|
FreePageManagerDumpSpans(fpm, recycle, 1, &buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Dump free lists. */
|
|
|
|
for (f = 0; f < FPM_NUM_FREELISTS; ++f)
|
|
|
|
{
|
|
|
|
FreePageSpanLeader *span;
|
|
|
|
|
|
|
|
if (relptr_is_null(fpm->freelist[f]))
|
|
|
|
continue;
|
|
|
|
if (!dumped_any_freelist)
|
|
|
|
{
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoString(&buf, "freelists:\n");
|
2016-12-02 18:03:30 +01:00
|
|
|
dumped_any_freelist = true;
|
|
|
|
}
|
|
|
|
appendStringInfo(&buf, " %zu:", f + 1);
|
|
|
|
span = relptr_access(base, fpm->freelist[f]);
|
|
|
|
FreePageManagerDumpSpans(fpm, span, f + 1, &buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* And return result to caller. */
|
|
|
|
return buf.data;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The first_page value stored at index zero in any non-root page must match
|
|
|
|
* the first_page value stored in its parent at the index which points to that
|
|
|
|
* page. So when the value stored at index zero in a btree page changes, we've
|
|
|
|
* got to walk up the tree adjusting ancestor keys until we reach an ancestor
|
|
|
|
* where that key isn't index zero. This function should be called after
|
|
|
|
* updating the first key on the target page; it will propagate the change
|
|
|
|
* upward as far as needed.
|
|
|
|
*
|
|
|
|
* We assume here that the first key on the page has not changed enough to
|
|
|
|
* require changes in the ordering of keys on its ancestor pages. Thus,
|
|
|
|
* if we search the parent page for the first key greater than or equal to
|
|
|
|
* the first key on the current page, the downlink to this page will be either
|
|
|
|
* the exact index returned by the search (if the first key decreased)
|
|
|
|
* or one less (if the first key increased).
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
Size first_page;
|
|
|
|
FreePageBtree *parent;
|
|
|
|
FreePageBtree *child;
|
|
|
|
|
|
|
|
/* This might be either a leaf or an internal page. */
|
|
|
|
Assert(btp->hdr.nused > 0);
|
|
|
|
if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
|
|
|
|
first_page = btp->u.leaf_key[0].first_page;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
|
|
|
|
first_page = btp->u.internal_key[0].first_page;
|
|
|
|
}
|
|
|
|
child = btp;
|
|
|
|
|
|
|
|
/* Loop until we find an ancestor that does not require adjustment. */
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
Size s;
|
|
|
|
|
|
|
|
parent = relptr_access(base, child->hdr.parent);
|
|
|
|
if (parent == NULL)
|
|
|
|
break;
|
|
|
|
s = FreePageBtreeSearchInternal(parent, first_page);
|
|
|
|
|
|
|
|
/* Key is either at index s or index s-1; figure out which. */
|
|
|
|
if (s >= parent->hdr.nused)
|
|
|
|
{
|
|
|
|
Assert(s == parent->hdr.nused);
|
|
|
|
--s;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
FreePageBtree *check;
|
|
|
|
|
|
|
|
check = relptr_access(base, parent->u.internal_key[s].child);
|
|
|
|
if (check != child)
|
|
|
|
{
|
|
|
|
Assert(s > 0);
|
|
|
|
--s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
/* Debugging double-check. */
|
|
|
|
{
|
|
|
|
FreePageBtree *check;
|
|
|
|
|
|
|
|
check = relptr_access(base, parent->u.internal_key[s].child);
|
|
|
|
Assert(s < parent->hdr.nused);
|
|
|
|
Assert(child == check);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Update the parent key. */
|
|
|
|
parent->u.internal_key[s].first_page = first_page;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this is the first key in the parent, go up another level; else
|
|
|
|
* done.
|
|
|
|
*/
|
|
|
|
if (s > 0)
|
|
|
|
break;
|
|
|
|
child = parent;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Attempt to reclaim space from the free-page btree. The return value is
|
|
|
|
* the largest range of contiguous pages created by the cleanup operation.
|
|
|
|
*/
|
|
|
|
static Size
|
|
|
|
FreePageBtreeCleanup(FreePageManager *fpm)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
Size max_contiguous_pages = 0;
|
|
|
|
|
|
|
|
/* Attempt to shrink the depth of the btree. */
|
|
|
|
while (!relptr_is_null(fpm->btree_root))
|
|
|
|
{
|
|
|
|
FreePageBtree *root = relptr_access(base, fpm->btree_root);
|
|
|
|
|
|
|
|
/* If the root contains only one key, reduce depth by one. */
|
|
|
|
if (root->hdr.nused == 1)
|
|
|
|
{
|
|
|
|
/* Shrink depth of tree by one. */
|
|
|
|
Assert(fpm->btree_depth > 0);
|
|
|
|
--fpm->btree_depth;
|
|
|
|
if (root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
|
|
|
|
{
|
|
|
|
/* If root is a leaf, convert only entry to singleton range. */
|
|
|
|
relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
|
|
|
|
fpm->singleton_first_page = root->u.leaf_key[0].first_page;
|
|
|
|
fpm->singleton_npages = root->u.leaf_key[0].npages;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
FreePageBtree *newroot;
|
|
|
|
|
|
|
|
/* If root is an internal page, make only child the root. */
|
|
|
|
Assert(root->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
relptr_copy(fpm->btree_root, root->u.internal_key[0].child);
|
|
|
|
newroot = relptr_access(base, fpm->btree_root);
|
|
|
|
relptr_store(base, newroot->hdr.parent, (FreePageBtree *) NULL);
|
|
|
|
}
|
|
|
|
FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, root));
|
|
|
|
}
|
|
|
|
else if (root->hdr.nused == 2 &&
|
|
|
|
root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
|
|
|
|
{
|
|
|
|
Size end_of_first;
|
|
|
|
Size start_of_second;
|
|
|
|
|
|
|
|
end_of_first = root->u.leaf_key[0].first_page +
|
|
|
|
root->u.leaf_key[0].npages;
|
|
|
|
start_of_second = root->u.leaf_key[1].first_page;
|
|
|
|
|
|
|
|
if (end_of_first + 1 == start_of_second)
|
|
|
|
{
|
|
|
|
Size root_page = fpm_pointer_to_page(base, root);
|
|
|
|
|
|
|
|
if (end_of_first == root_page)
|
|
|
|
{
|
|
|
|
FreePagePopSpanLeader(fpm, root->u.leaf_key[0].first_page);
|
|
|
|
FreePagePopSpanLeader(fpm, root->u.leaf_key[1].first_page);
|
|
|
|
fpm->singleton_first_page = root->u.leaf_key[0].first_page;
|
|
|
|
fpm->singleton_npages = root->u.leaf_key[0].npages +
|
|
|
|
root->u.leaf_key[1].npages + 1;
|
|
|
|
fpm->btree_depth = 0;
|
|
|
|
relptr_store(base, fpm->btree_root,
|
|
|
|
(FreePageBtree *) NULL);
|
|
|
|
FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
|
|
|
|
fpm->singleton_npages);
|
|
|
|
Assert(max_contiguous_pages == 0);
|
|
|
|
max_contiguous_pages = fpm->singleton_npages;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Whether it worked or not, it's time to stop. */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Nothing more to do. Stop. */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Attempt to free recycled btree pages. We skip this if releasing the
|
|
|
|
* recycled page would require a btree page split, because the page we're
|
|
|
|
* trying to recycle would be consumed by the split, which would be
|
|
|
|
* counterproductive.
|
|
|
|
*
|
|
|
|
* We also currently only ever attempt to recycle the first page on the
|
|
|
|
* list; that could be made more aggressive, but it's not clear that the
|
|
|
|
* complexity would be worthwhile.
|
|
|
|
*/
|
|
|
|
while (fpm->btree_recycle_count > 0)
|
|
|
|
{
|
|
|
|
FreePageBtree *btp;
|
|
|
|
Size first_page;
|
|
|
|
Size contiguous_pages;
|
|
|
|
|
|
|
|
btp = FreePageBtreeGetRecycled(fpm);
|
|
|
|
first_page = fpm_pointer_to_page(base, btp);
|
|
|
|
contiguous_pages = FreePageManagerPutInternal(fpm, first_page, 1, true);
|
|
|
|
if (contiguous_pages == 0)
|
|
|
|
{
|
|
|
|
FreePageBtreeRecycle(fpm, first_page);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (contiguous_pages > max_contiguous_pages)
|
|
|
|
max_contiguous_pages = contiguous_pages;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return max_contiguous_pages;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Consider consolidating the given page with its left or right sibling,
|
|
|
|
* if it's fairly empty.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageBtreeConsolidate(FreePageManager *fpm, FreePageBtree *btp)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
FreePageBtree *np;
|
|
|
|
Size max;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We only try to consolidate pages that are less than a third full. We
|
|
|
|
* could be more aggressive about this, but that might risk performing
|
|
|
|
* consolidation only to end up splitting again shortly thereafter. Since
|
|
|
|
* the btree should be very small compared to the space under management,
|
|
|
|
* our goal isn't so much to ensure that it always occupies the absolutely
|
|
|
|
* smallest possible number of pages as to reclaim pages before things get
|
|
|
|
* too egregiously out of hand.
|
|
|
|
*/
|
|
|
|
if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
|
|
|
|
max = FPM_ITEMS_PER_LEAF_PAGE;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
max = FPM_ITEMS_PER_INTERNAL_PAGE;
|
|
|
|
}
|
|
|
|
if (btp->hdr.nused >= max / 3)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we can fit our right sibling's keys onto this page, consolidate.
|
|
|
|
*/
|
|
|
|
np = FreePageBtreeFindRightSibling(base, btp);
|
|
|
|
if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
|
|
|
|
{
|
|
|
|
if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
|
|
|
|
{
|
|
|
|
memcpy(&btp->u.leaf_key[btp->hdr.nused], &np->u.leaf_key[0],
|
|
|
|
sizeof(FreePageBtreeLeafKey) * np->hdr.nused);
|
|
|
|
btp->hdr.nused += np->hdr.nused;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
memcpy(&btp->u.internal_key[btp->hdr.nused], &np->u.internal_key[0],
|
|
|
|
sizeof(FreePageBtreeInternalKey) * np->hdr.nused);
|
|
|
|
btp->hdr.nused += np->hdr.nused;
|
|
|
|
FreePageBtreeUpdateParentPointers(base, btp);
|
|
|
|
}
|
|
|
|
FreePageBtreeRemovePage(fpm, np);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we can fit our keys onto our left sibling's page, consolidate. In
|
2019-08-19 09:21:39 +02:00
|
|
|
* this case, we move our keys onto the other page rather than vice versa,
|
|
|
|
* to avoid having to adjust ancestor keys.
|
2016-12-02 18:03:30 +01:00
|
|
|
*/
|
|
|
|
np = FreePageBtreeFindLeftSibling(base, btp);
|
|
|
|
if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
|
|
|
|
{
|
|
|
|
if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
|
|
|
|
{
|
|
|
|
memcpy(&np->u.leaf_key[np->hdr.nused], &btp->u.leaf_key[0],
|
|
|
|
sizeof(FreePageBtreeLeafKey) * btp->hdr.nused);
|
|
|
|
np->hdr.nused += btp->hdr.nused;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
memcpy(&np->u.internal_key[np->hdr.nused], &btp->u.internal_key[0],
|
|
|
|
sizeof(FreePageBtreeInternalKey) * btp->hdr.nused);
|
|
|
|
np->hdr.nused += btp->hdr.nused;
|
|
|
|
FreePageBtreeUpdateParentPointers(base, np);
|
|
|
|
}
|
|
|
|
FreePageBtreeRemovePage(fpm, btp);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the passed page's left sibling; that is, the page at the same level
|
|
|
|
* of the tree whose keyspace immediately precedes ours.
|
|
|
|
*/
|
|
|
|
static FreePageBtree *
|
|
|
|
FreePageBtreeFindLeftSibling(char *base, FreePageBtree *btp)
|
|
|
|
{
|
|
|
|
FreePageBtree *p = btp;
|
|
|
|
int levels = 0;
|
|
|
|
|
|
|
|
/* Move up until we can move left. */
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
Size first_page;
|
|
|
|
Size index;
|
|
|
|
|
|
|
|
first_page = FreePageBtreeFirstKey(p);
|
|
|
|
p = relptr_access(base, p->hdr.parent);
|
|
|
|
|
|
|
|
if (p == NULL)
|
|
|
|
return NULL; /* we were passed the rightmost page */
|
|
|
|
|
|
|
|
index = FreePageBtreeSearchInternal(p, first_page);
|
|
|
|
if (index > 0)
|
|
|
|
{
|
|
|
|
Assert(p->u.internal_key[index].first_page == first_page);
|
|
|
|
p = relptr_access(base, p->u.internal_key[index - 1].child);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Assert(index == 0);
|
|
|
|
++levels;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Descend left. */
|
|
|
|
while (levels > 0)
|
|
|
|
{
|
|
|
|
Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
p = relptr_access(base, p->u.internal_key[p->hdr.nused - 1].child);
|
|
|
|
--levels;
|
|
|
|
}
|
|
|
|
Assert(p->hdr.magic == btp->hdr.magic);
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the passed page's right sibling; that is, the page at the same level
|
|
|
|
* of the tree whose keyspace immediately follows ours.
|
|
|
|
*/
|
|
|
|
static FreePageBtree *
|
|
|
|
FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
|
|
|
|
{
|
|
|
|
FreePageBtree *p = btp;
|
|
|
|
int levels = 0;
|
|
|
|
|
|
|
|
/* Move up until we can move right. */
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
Size first_page;
|
|
|
|
Size index;
|
|
|
|
|
|
|
|
first_page = FreePageBtreeFirstKey(p);
|
|
|
|
p = relptr_access(base, p->hdr.parent);
|
|
|
|
|
|
|
|
if (p == NULL)
|
|
|
|
return NULL; /* we were passed the rightmost page */
|
|
|
|
|
|
|
|
index = FreePageBtreeSearchInternal(p, first_page);
|
|
|
|
if (index < p->hdr.nused - 1)
|
|
|
|
{
|
|
|
|
Assert(p->u.internal_key[index].first_page == first_page);
|
|
|
|
p = relptr_access(base, p->u.internal_key[index + 1].child);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Assert(index == p->hdr.nused - 1);
|
|
|
|
++levels;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Descend left. */
|
|
|
|
while (levels > 0)
|
|
|
|
{
|
|
|
|
Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
p = relptr_access(base, p->u.internal_key[0].child);
|
|
|
|
--levels;
|
|
|
|
}
|
|
|
|
Assert(p->hdr.magic == btp->hdr.magic);
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the first key on a btree page.
|
|
|
|
*/
|
|
|
|
static Size
|
|
|
|
FreePageBtreeFirstKey(FreePageBtree *btp)
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.nused > 0);
|
|
|
|
|
|
|
|
if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
|
|
|
|
return btp->u.leaf_key[0].first_page;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
return btp->u.internal_key[0].first_page;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a page from the btree recycle list for use as a btree page.
|
|
|
|
*/
|
|
|
|
static FreePageBtree *
|
|
|
|
FreePageBtreeGetRecycled(FreePageManager *fpm)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
FreePageSpanLeader *victim = relptr_access(base, fpm->btree_recycle);
|
|
|
|
FreePageSpanLeader *newhead;
|
|
|
|
|
|
|
|
Assert(victim != NULL);
|
|
|
|
newhead = relptr_access(base, victim->next);
|
|
|
|
if (newhead != NULL)
|
|
|
|
relptr_copy(newhead->prev, victim->prev);
|
|
|
|
relptr_store(base, fpm->btree_recycle, newhead);
|
|
|
|
Assert(fpm_pointer_is_page_aligned(base, victim));
|
|
|
|
fpm->btree_recycle_count--;
|
|
|
|
return (FreePageBtree *) victim;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Insert an item into an internal page.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageBtreeInsertInternal(char *base, FreePageBtree *btp, Size index,
|
|
|
|
Size first_page, FreePageBtree *child)
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
|
|
|
|
Assert(index <= btp->hdr.nused);
|
|
|
|
memmove(&btp->u.internal_key[index + 1], &btp->u.internal_key[index],
|
|
|
|
sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index));
|
|
|
|
btp->u.internal_key[index].first_page = first_page;
|
|
|
|
relptr_store(base, btp->u.internal_key[index].child, child);
|
|
|
|
++btp->hdr.nused;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Insert an item into a leaf page.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index, Size first_page,
|
|
|
|
Size npages)
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
|
|
|
|
Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
|
|
|
|
Assert(index <= btp->hdr.nused);
|
|
|
|
memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index],
|
|
|
|
sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
|
|
|
|
btp->u.leaf_key[index].first_page = first_page;
|
|
|
|
btp->u.leaf_key[index].npages = npages;
|
|
|
|
++btp->hdr.nused;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Put a page on the btree recycle list.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
FreePageSpanLeader *head = relptr_access(base, fpm->btree_recycle);
|
|
|
|
FreePageSpanLeader *span;
|
|
|
|
|
|
|
|
span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
|
|
|
|
span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
|
|
|
|
span->npages = 1;
|
|
|
|
relptr_store(base, span->next, head);
|
|
|
|
relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
|
|
|
|
if (head != NULL)
|
|
|
|
relptr_store(base, head->prev, span);
|
|
|
|
relptr_store(base, fpm->btree_recycle, span);
|
|
|
|
fpm->btree_recycle_count++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove an item from the btree at the given position on the given page.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
|
|
|
|
Assert(index < btp->hdr.nused);
|
|
|
|
|
|
|
|
/* When last item is removed, extirpate entire page from btree. */
|
|
|
|
if (btp->hdr.nused == 1)
|
|
|
|
{
|
|
|
|
FreePageBtreeRemovePage(fpm, btp);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Physically remove the key from the page. */
|
|
|
|
--btp->hdr.nused;
|
|
|
|
if (index < btp->hdr.nused)
|
|
|
|
memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
|
|
|
|
sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
|
|
|
|
|
|
|
|
/* If we just removed the first key, adjust ancestor keys. */
|
|
|
|
if (index == 0)
|
|
|
|
FreePageBtreeAdjustAncestorKeys(fpm, btp);
|
|
|
|
|
|
|
|
/* Consider whether to consolidate this page with a sibling. */
|
|
|
|
FreePageBtreeConsolidate(fpm, btp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove a page from the btree. Caller is responsible for having relocated
|
|
|
|
* any keys from this page that are still wanted. The page is placed on the
|
|
|
|
* recycled list.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
FreePageBtree *parent;
|
|
|
|
Size index;
|
|
|
|
Size first_page;
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
/* Find parent page. */
|
|
|
|
parent = relptr_access(base, btp->hdr.parent);
|
|
|
|
if (parent == NULL)
|
|
|
|
{
|
|
|
|
/* We are removing the root page. */
|
|
|
|
relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
|
|
|
|
fpm->btree_depth = 0;
|
|
|
|
Assert(fpm->singleton_first_page == 0);
|
|
|
|
Assert(fpm->singleton_npages == 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the parent contains only one item, we need to remove it as well.
|
|
|
|
*/
|
|
|
|
if (parent->hdr.nused > 1)
|
|
|
|
break;
|
|
|
|
FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
|
|
|
|
btp = parent;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Find and remove the downlink. */
|
|
|
|
first_page = FreePageBtreeFirstKey(btp);
|
|
|
|
if (parent->hdr.magic == FREE_PAGE_LEAF_MAGIC)
|
|
|
|
{
|
|
|
|
index = FreePageBtreeSearchLeaf(parent, first_page);
|
|
|
|
Assert(index < parent->hdr.nused);
|
|
|
|
if (index < parent->hdr.nused - 1)
|
|
|
|
memmove(&parent->u.leaf_key[index],
|
|
|
|
&parent->u.leaf_key[index + 1],
|
|
|
|
sizeof(FreePageBtreeLeafKey)
|
|
|
|
* (parent->hdr.nused - index - 1));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
index = FreePageBtreeSearchInternal(parent, first_page);
|
|
|
|
Assert(index < parent->hdr.nused);
|
|
|
|
if (index < parent->hdr.nused - 1)
|
|
|
|
memmove(&parent->u.internal_key[index],
|
|
|
|
&parent->u.internal_key[index + 1],
|
|
|
|
sizeof(FreePageBtreeInternalKey)
|
|
|
|
* (parent->hdr.nused - index - 1));
|
|
|
|
}
|
|
|
|
parent->hdr.nused--;
|
|
|
|
Assert(parent->hdr.nused > 0);
|
|
|
|
|
|
|
|
/* Recycle the page. */
|
|
|
|
FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
|
|
|
|
|
|
|
|
/* Adjust ancestor keys if needed. */
|
|
|
|
if (index == 0)
|
|
|
|
FreePageBtreeAdjustAncestorKeys(fpm, parent);
|
|
|
|
|
|
|
|
/* Consider whether to consolidate the parent with a sibling. */
|
|
|
|
FreePageBtreeConsolidate(fpm, parent);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search the btree for an entry for the given first page and initialize
|
|
|
|
* *result with the results of the search. result->page and result->index
|
|
|
|
* indicate either the position of an exact match or the position at which
|
|
|
|
* the new key should be inserted. result->found is true for an exact match,
|
|
|
|
* otherwise false. result->split_pages will contain the number of additional
|
|
|
|
* btree pages that will be needed when performing a split to insert a key.
|
|
|
|
* Except as described above, the contents of fields in the result object are
|
|
|
|
* undefined on return.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
|
|
|
|
FreePageBtreeSearchResult *result)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
FreePageBtree *btp = relptr_access(base, fpm->btree_root);
|
|
|
|
Size index;
|
|
|
|
|
|
|
|
result->split_pages = 1;
|
|
|
|
|
|
|
|
/* If the btree is empty, there's nothing to find. */
|
|
|
|
if (btp == NULL)
|
|
|
|
{
|
|
|
|
result->page = NULL;
|
|
|
|
result->found = false;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Descend until we hit a leaf. */
|
|
|
|
while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
|
|
|
|
{
|
|
|
|
FreePageBtree *child;
|
|
|
|
bool found_exact;
|
|
|
|
|
|
|
|
index = FreePageBtreeSearchInternal(btp, first_page);
|
|
|
|
found_exact = index < btp->hdr.nused &&
|
|
|
|
btp->u.internal_key[index].first_page == first_page;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we found an exact match we descend directly. Otherwise, we
|
|
|
|
* descend into the child to the left if possible so that we can find
|
|
|
|
* the insertion point at that child's high end.
|
|
|
|
*/
|
|
|
|
if (!found_exact && index > 0)
|
|
|
|
--index;
|
|
|
|
|
|
|
|
/* Track required split depth for leaf insert. */
|
|
|
|
if (btp->hdr.nused >= FPM_ITEMS_PER_INTERNAL_PAGE)
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
|
|
|
|
result->split_pages++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
result->split_pages = 0;
|
|
|
|
|
|
|
|
/* Descend to appropriate child page. */
|
|
|
|
Assert(index < btp->hdr.nused);
|
|
|
|
child = relptr_access(base, btp->u.internal_key[index].child);
|
|
|
|
Assert(relptr_access(base, child->hdr.parent) == btp);
|
|
|
|
btp = child;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Track required split depth for leaf insert. */
|
|
|
|
if (btp->hdr.nused >= FPM_ITEMS_PER_LEAF_PAGE)
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
|
|
|
|
result->split_pages++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
result->split_pages = 0;
|
|
|
|
|
|
|
|
/* Search leaf page. */
|
|
|
|
index = FreePageBtreeSearchLeaf(btp, first_page);
|
|
|
|
|
|
|
|
/* Assemble results. */
|
|
|
|
result->page = btp;
|
|
|
|
result->index = index;
|
|
|
|
result->found = index < btp->hdr.nused &&
|
|
|
|
first_page == btp->u.leaf_key[index].first_page;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search an internal page for the first key greater than or equal to a given
|
|
|
|
* page number. Returns the index of that key, or one greater than the number
|
|
|
|
* of keys on the page if none.
|
|
|
|
*/
|
|
|
|
static Size
|
|
|
|
FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page)
|
|
|
|
{
|
|
|
|
Size low = 0;
|
|
|
|
Size high = btp->hdr.nused;
|
|
|
|
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
Assert(high > 0 && high <= FPM_ITEMS_PER_INTERNAL_PAGE);
|
|
|
|
|
|
|
|
while (low < high)
|
|
|
|
{
|
|
|
|
Size mid = (low + high) / 2;
|
|
|
|
Size val = btp->u.internal_key[mid].first_page;
|
|
|
|
|
|
|
|
if (first_page == val)
|
|
|
|
return mid;
|
|
|
|
else if (first_page < val)
|
|
|
|
high = mid;
|
|
|
|
else
|
|
|
|
low = mid + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return low;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search a leaf page for the first key greater than or equal to a given
|
|
|
|
* page number. Returns the index of that key, or one greater than the number
|
|
|
|
* of keys on the page if none.
|
|
|
|
*/
|
|
|
|
static Size
|
|
|
|
FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page)
|
|
|
|
{
|
|
|
|
Size low = 0;
|
|
|
|
Size high = btp->hdr.nused;
|
|
|
|
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
|
|
|
|
Assert(high > 0 && high <= FPM_ITEMS_PER_LEAF_PAGE);
|
|
|
|
|
|
|
|
while (low < high)
|
|
|
|
{
|
|
|
|
Size mid = (low + high) / 2;
|
|
|
|
Size val = btp->u.leaf_key[mid].first_page;
|
|
|
|
|
|
|
|
if (first_page == val)
|
|
|
|
return mid;
|
|
|
|
else if (first_page < val)
|
|
|
|
high = mid;
|
|
|
|
else
|
|
|
|
low = mid + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return low;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate a new btree page and move half the keys from the provided page
|
|
|
|
* to the new page. Caller is responsible for making sure that there's a
|
|
|
|
* page available from fpm->btree_recycle. Returns a pointer to the new page,
|
|
|
|
* to which caller must add a downlink.
|
|
|
|
*/
|
|
|
|
static FreePageBtree *
|
|
|
|
FreePageBtreeSplitPage(FreePageManager *fpm, FreePageBtree *btp)
|
|
|
|
{
|
|
|
|
FreePageBtree *newsibling;
|
|
|
|
|
|
|
|
newsibling = FreePageBtreeGetRecycled(fpm);
|
|
|
|
newsibling->hdr.magic = btp->hdr.magic;
|
|
|
|
newsibling->hdr.nused = btp->hdr.nused / 2;
|
|
|
|
relptr_copy(newsibling->hdr.parent, btp->hdr.parent);
|
|
|
|
btp->hdr.nused -= newsibling->hdr.nused;
|
|
|
|
|
|
|
|
if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
|
|
|
|
memcpy(&newsibling->u.leaf_key,
|
|
|
|
&btp->u.leaf_key[btp->hdr.nused],
|
|
|
|
sizeof(FreePageBtreeLeafKey) * newsibling->hdr.nused);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
memcpy(&newsibling->u.internal_key,
|
|
|
|
&btp->u.internal_key[btp->hdr.nused],
|
|
|
|
sizeof(FreePageBtreeInternalKey) * newsibling->hdr.nused);
|
|
|
|
FreePageBtreeUpdateParentPointers(fpm_segment_base(fpm), newsibling);
|
|
|
|
}
|
|
|
|
|
|
|
|
return newsibling;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When internal pages are split or merged, the parent pointers of their
|
|
|
|
* children must be updated.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp)
|
|
|
|
{
|
|
|
|
Size i;
|
|
|
|
|
|
|
|
Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
|
|
|
|
for (i = 0; i < btp->hdr.nused; ++i)
|
|
|
|
{
|
|
|
|
FreePageBtree *child;
|
|
|
|
|
|
|
|
child = relptr_access(base, btp->u.internal_key[i].child);
|
|
|
|
relptr_store(base, child->hdr.parent, btp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Debugging dump of btree data.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
|
|
|
|
FreePageBtree *parent, int level, StringInfo buf)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
Size pageno = fpm_pointer_to_page(base, btp);
|
|
|
|
Size index;
|
|
|
|
FreePageBtree *check_parent;
|
|
|
|
|
|
|
|
check_stack_depth();
|
|
|
|
check_parent = relptr_access(base, btp->hdr.parent);
|
|
|
|
appendStringInfo(buf, " %zu@%d %c", pageno, level,
|
|
|
|
btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ? 'i' : 'l');
|
|
|
|
if (parent != check_parent)
|
|
|
|
appendStringInfo(buf, " [actual parent %zu, expected %zu]",
|
|
|
|
fpm_pointer_to_page(base, check_parent),
|
|
|
|
fpm_pointer_to_page(base, parent));
|
|
|
|
appendStringInfoChar(buf, ':');
|
|
|
|
for (index = 0; index < btp->hdr.nused; ++index)
|
|
|
|
{
|
|
|
|
if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
|
|
|
|
appendStringInfo(buf, " %zu->%zu",
|
|
|
|
btp->u.internal_key[index].first_page,
|
|
|
|
btp->u.internal_key[index].child.relptr_off / FPM_PAGE_SIZE);
|
|
|
|
else
|
|
|
|
appendStringInfo(buf, " %zu(%zu)",
|
|
|
|
btp->u.leaf_key[index].first_page,
|
|
|
|
btp->u.leaf_key[index].npages);
|
|
|
|
}
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoChar(buf, '\n');
|
2016-12-02 18:03:30 +01:00
|
|
|
|
|
|
|
if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
|
|
|
|
{
|
|
|
|
for (index = 0; index < btp->hdr.nused; ++index)
|
|
|
|
{
|
|
|
|
FreePageBtree *child;
|
|
|
|
|
|
|
|
child = relptr_access(base, btp->u.internal_key[index].child);
|
|
|
|
FreePageManagerDumpBtree(fpm, child, btp, level + 1, buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Debugging dump of free-span data.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePageManagerDumpSpans(FreePageManager *fpm, FreePageSpanLeader *span,
|
|
|
|
Size expected_pages, StringInfo buf)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
|
|
|
|
while (span != NULL)
|
|
|
|
{
|
|
|
|
if (span->npages != expected_pages)
|
|
|
|
appendStringInfo(buf, " %zu(%zu)", fpm_pointer_to_page(base, span),
|
|
|
|
span->npages);
|
|
|
|
else
|
|
|
|
appendStringInfo(buf, " %zu", fpm_pointer_to_page(base, span));
|
|
|
|
span = relptr_access(base, span->next);
|
|
|
|
}
|
|
|
|
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoChar(buf, '\n');
|
2016-12-02 18:03:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This function allocates a run of pages of the given length from the free
|
|
|
|
* page manager.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
FreePageManagerGetInternal(FreePageManager *fpm, Size npages, Size *first_page)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
FreePageSpanLeader *victim = NULL;
|
|
|
|
FreePageSpanLeader *prev;
|
|
|
|
FreePageSpanLeader *next;
|
|
|
|
FreePageBtreeSearchResult result;
|
|
|
|
Size victim_page = 0; /* placate compiler */
|
|
|
|
Size f;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search for a free span.
|
|
|
|
*
|
|
|
|
* Right now, we use a simple best-fit policy here, but it's possible for
|
|
|
|
* this to result in memory fragmentation if we're repeatedly asked to
|
|
|
|
* allocate chunks just a little smaller than what we have available.
|
|
|
|
* Hopefully, this is unlikely, because we expect most requests to be
|
|
|
|
* single pages or superblock-sized chunks -- but no policy can be optimal
|
|
|
|
* under all circumstances unless it has knowledge of future allocation
|
|
|
|
* patterns.
|
|
|
|
*/
|
|
|
|
for (f = Min(npages, FPM_NUM_FREELISTS) - 1; f < FPM_NUM_FREELISTS; ++f)
|
|
|
|
{
|
|
|
|
/* Skip empty freelists. */
|
|
|
|
if (relptr_is_null(fpm->freelist[f]))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All of the freelists except the last one contain only items of a
|
|
|
|
* single size, so we just take the first one. But the final free
|
|
|
|
* list contains everything too big for any of the other lists, so we
|
|
|
|
* need to search the list.
|
|
|
|
*/
|
|
|
|
if (f < FPM_NUM_FREELISTS - 1)
|
|
|
|
victim = relptr_access(base, fpm->freelist[f]);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
FreePageSpanLeader *candidate;
|
|
|
|
|
|
|
|
candidate = relptr_access(base, fpm->freelist[f]);
|
|
|
|
do
|
|
|
|
{
|
|
|
|
if (candidate->npages >= npages && (victim == NULL ||
|
|
|
|
victim->npages > candidate->npages))
|
|
|
|
{
|
|
|
|
victim = candidate;
|
|
|
|
if (victim->npages == npages)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
candidate = relptr_access(base, candidate->next);
|
|
|
|
} while (candidate != NULL);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we didn't find an allocatable span, return failure. */
|
|
|
|
if (victim == NULL)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Remove span from free list. */
|
|
|
|
Assert(victim->magic == FREE_PAGE_SPAN_LEADER_MAGIC);
|
|
|
|
prev = relptr_access(base, victim->prev);
|
|
|
|
next = relptr_access(base, victim->next);
|
|
|
|
if (prev != NULL)
|
|
|
|
relptr_copy(prev->next, victim->next);
|
|
|
|
else
|
|
|
|
relptr_copy(fpm->freelist[f], victim->next);
|
|
|
|
if (next != NULL)
|
|
|
|
relptr_copy(next->prev, victim->prev);
|
|
|
|
victim_page = fpm_pointer_to_page(base, victim);
|
|
|
|
|
|
|
|
/* Decide whether we might be invalidating contiguous_pages. */
|
|
|
|
if (f == FPM_NUM_FREELISTS - 1 &&
|
|
|
|
victim->npages == fpm->contiguous_pages)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The victim span came from the oversized freelist, and had the same
|
|
|
|
* size as the longest span. There may or may not be another one of
|
|
|
|
* the same size, so contiguous_pages must be recomputed just to be
|
|
|
|
* safe.
|
|
|
|
*/
|
|
|
|
fpm->contiguous_pages_dirty = true;
|
|
|
|
}
|
|
|
|
else if (f + 1 == fpm->contiguous_pages &&
|
|
|
|
relptr_is_null(fpm->freelist[f]))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The victim span came from a fixed sized freelist, and it was the
|
|
|
|
* list for spans of the same size as the current longest span, and
|
|
|
|
* the list is now empty after removing the victim. So
|
|
|
|
* contiguous_pages must be recomputed without a doubt.
|
|
|
|
*/
|
|
|
|
fpm->contiguous_pages_dirty = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we haven't initialized the btree yet, the victim must be the single
|
|
|
|
* span stored within the FreePageManager itself. Otherwise, we need to
|
|
|
|
* update the btree.
|
|
|
|
*/
|
|
|
|
if (relptr_is_null(fpm->btree_root))
|
|
|
|
{
|
|
|
|
Assert(victim_page == fpm->singleton_first_page);
|
|
|
|
Assert(victim->npages == fpm->singleton_npages);
|
|
|
|
Assert(victim->npages >= npages);
|
|
|
|
fpm->singleton_first_page += npages;
|
|
|
|
fpm->singleton_npages -= npages;
|
|
|
|
if (fpm->singleton_npages > 0)
|
|
|
|
FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
|
|
|
|
fpm->singleton_npages);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If the span we found is exactly the right size, remove it from the
|
|
|
|
* btree completely. Otherwise, adjust the btree entry to reflect the
|
|
|
|
* still-unallocated portion of the span, and put that portion on the
|
|
|
|
* appropriate free list.
|
|
|
|
*/
|
|
|
|
FreePageBtreeSearch(fpm, victim_page, &result);
|
|
|
|
Assert(result.found);
|
|
|
|
if (victim->npages == npages)
|
|
|
|
FreePageBtreeRemove(fpm, result.page, result.index);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
FreePageBtreeLeafKey *key;
|
|
|
|
|
|
|
|
/* Adjust btree to reflect remaining pages. */
|
|
|
|
Assert(victim->npages > npages);
|
|
|
|
key = &result.page->u.leaf_key[result.index];
|
|
|
|
Assert(key->npages == victim->npages);
|
|
|
|
key->first_page += npages;
|
|
|
|
key->npages -= npages;
|
|
|
|
if (result.index == 0)
|
|
|
|
FreePageBtreeAdjustAncestorKeys(fpm, result.page);
|
|
|
|
|
|
|
|
/* Put the unallocated pages back on the appropriate free list. */
|
|
|
|
FreePagePushSpanLeader(fpm, victim_page + npages,
|
|
|
|
victim->npages - npages);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Return results to caller. */
|
|
|
|
*first_page = fpm_pointer_to_page(base, victim);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Put a range of pages into the btree and freelists, consolidating it with
|
|
|
|
* existing free spans just before and/or after it. If 'soft' is true,
|
|
|
|
* only perform the insertion if it can be done without allocating new btree
|
|
|
|
* pages; if false, do it always. Returns 0 if the soft flag caused the
|
|
|
|
* insertion to be skipped, or otherwise the size of the contiguous span
|
|
|
|
* created by the insertion. This may be larger than npages if we're able
|
2019-02-13 01:14:10 +01:00
|
|
|
* to consolidate with an adjacent range.
|
2016-12-02 18:03:30 +01:00
|
|
|
*/
|
|
|
|
static Size
|
|
|
|
FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
|
|
|
|
bool soft)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
FreePageBtreeSearchResult result;
|
|
|
|
FreePageBtreeLeafKey *prevkey = NULL;
|
|
|
|
FreePageBtreeLeafKey *nextkey = NULL;
|
|
|
|
FreePageBtree *np;
|
|
|
|
Size nindex;
|
|
|
|
|
|
|
|
Assert(npages > 0);
|
|
|
|
|
|
|
|
/* We can store a single free span without initializing the btree. */
|
|
|
|
if (fpm->btree_depth == 0)
|
|
|
|
{
|
|
|
|
if (fpm->singleton_npages == 0)
|
|
|
|
{
|
|
|
|
/* Don't have a span yet; store this one. */
|
|
|
|
fpm->singleton_first_page = first_page;
|
|
|
|
fpm->singleton_npages = npages;
|
|
|
|
FreePagePushSpanLeader(fpm, first_page, npages);
|
|
|
|
return fpm->singleton_npages;
|
|
|
|
}
|
|
|
|
else if (fpm->singleton_first_page + fpm->singleton_npages ==
|
|
|
|
first_page)
|
|
|
|
{
|
|
|
|
/* New span immediately follows sole existing span. */
|
|
|
|
fpm->singleton_npages += npages;
|
|
|
|
FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
|
|
|
|
FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
|
|
|
|
fpm->singleton_npages);
|
|
|
|
return fpm->singleton_npages;
|
|
|
|
}
|
|
|
|
else if (first_page + npages == fpm->singleton_first_page)
|
|
|
|
{
|
|
|
|
/* New span immediately precedes sole existing span. */
|
|
|
|
FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
|
|
|
|
fpm->singleton_first_page = first_page;
|
|
|
|
fpm->singleton_npages += npages;
|
|
|
|
FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
|
|
|
|
fpm->singleton_npages);
|
|
|
|
return fpm->singleton_npages;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Not contiguous; we need to initialize the btree. */
|
|
|
|
Size root_page;
|
|
|
|
FreePageBtree *root;
|
|
|
|
|
|
|
|
if (!relptr_is_null(fpm->btree_recycle))
|
|
|
|
root = FreePageBtreeGetRecycled(fpm);
|
2019-02-13 01:14:10 +01:00
|
|
|
else if (soft)
|
|
|
|
return 0; /* Should not allocate if soft. */
|
2016-12-02 18:03:30 +01:00
|
|
|
else if (FreePageManagerGetInternal(fpm, 1, &root_page))
|
|
|
|
root = (FreePageBtree *) fpm_page_to_pointer(base, root_page);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* We'd better be able to get a page from the existing range. */
|
|
|
|
elog(FATAL, "free page manager btree is corrupt");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create the btree and move the preexisting range into it. */
|
|
|
|
root->hdr.magic = FREE_PAGE_LEAF_MAGIC;
|
|
|
|
root->hdr.nused = 1;
|
|
|
|
relptr_store(base, root->hdr.parent, (FreePageBtree *) NULL);
|
|
|
|
root->u.leaf_key[0].first_page = fpm->singleton_first_page;
|
|
|
|
root->u.leaf_key[0].npages = fpm->singleton_npages;
|
|
|
|
relptr_store(base, fpm->btree_root, root);
|
|
|
|
fpm->singleton_first_page = 0;
|
|
|
|
fpm->singleton_npages = 0;
|
|
|
|
fpm->btree_depth = 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Corner case: it may be that the btree root took the very last
|
|
|
|
* free page. In that case, the sole btree entry covers a zero
|
|
|
|
* page run, which is invalid. Overwrite it with the entry we're
|
|
|
|
* trying to insert and get out.
|
|
|
|
*/
|
|
|
|
if (root->u.leaf_key[0].npages == 0)
|
|
|
|
{
|
|
|
|
root->u.leaf_key[0].first_page = first_page;
|
|
|
|
root->u.leaf_key[0].npages = npages;
|
|
|
|
FreePagePushSpanLeader(fpm, first_page, npages);
|
|
|
|
return npages;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Fall through to insert the new key. */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Search the btree. */
|
|
|
|
FreePageBtreeSearch(fpm, first_page, &result);
|
|
|
|
Assert(!result.found);
|
|
|
|
if (result.index > 0)
|
|
|
|
prevkey = &result.page->u.leaf_key[result.index - 1];
|
|
|
|
if (result.index < result.page->hdr.nused)
|
|
|
|
{
|
|
|
|
np = result.page;
|
|
|
|
nindex = result.index;
|
|
|
|
nextkey = &result.page->u.leaf_key[result.index];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
np = FreePageBtreeFindRightSibling(base, result.page);
|
|
|
|
nindex = 0;
|
|
|
|
if (np != NULL)
|
|
|
|
nextkey = &np->u.leaf_key[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Consolidate with the previous entry if possible. */
|
|
|
|
if (prevkey != NULL && prevkey->first_page + prevkey->npages >= first_page)
|
|
|
|
{
|
|
|
|
bool remove_next = false;
|
|
|
|
Size result;
|
|
|
|
|
|
|
|
Assert(prevkey->first_page + prevkey->npages == first_page);
|
|
|
|
prevkey->npages = (first_page - prevkey->first_page) + npages;
|
|
|
|
|
|
|
|
/* Check whether we can *also* consolidate with the following entry. */
|
|
|
|
if (nextkey != NULL &&
|
|
|
|
prevkey->first_page + prevkey->npages >= nextkey->first_page)
|
|
|
|
{
|
|
|
|
Assert(prevkey->first_page + prevkey->npages ==
|
|
|
|
nextkey->first_page);
|
|
|
|
prevkey->npages = (nextkey->first_page - prevkey->first_page)
|
|
|
|
+ nextkey->npages;
|
|
|
|
FreePagePopSpanLeader(fpm, nextkey->first_page);
|
|
|
|
remove_next = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Put the span on the correct freelist and save size. */
|
|
|
|
FreePagePopSpanLeader(fpm, prevkey->first_page);
|
|
|
|
FreePagePushSpanLeader(fpm, prevkey->first_page, prevkey->npages);
|
|
|
|
result = prevkey->npages;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we consolidated with both the preceding and following entries,
|
|
|
|
* we must remove the following entry. We do this last, because
|
|
|
|
* removing an element from the btree may invalidate pointers we hold
|
|
|
|
* into the current data structure.
|
|
|
|
*
|
|
|
|
* NB: The btree is technically in an invalid state a this point
|
|
|
|
* because we've already updated prevkey to cover the same key space
|
|
|
|
* as nextkey. FreePageBtreeRemove() shouldn't notice that, though.
|
|
|
|
*/
|
|
|
|
if (remove_next)
|
|
|
|
FreePageBtreeRemove(fpm, np, nindex);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Consolidate with the next entry if possible. */
|
|
|
|
if (nextkey != NULL && first_page + npages >= nextkey->first_page)
|
|
|
|
{
|
|
|
|
Size newpages;
|
|
|
|
|
|
|
|
/* Compute new size for span. */
|
|
|
|
Assert(first_page + npages == nextkey->first_page);
|
|
|
|
newpages = (nextkey->first_page - first_page) + nextkey->npages;
|
|
|
|
|
|
|
|
/* Put span on correct free list. */
|
|
|
|
FreePagePopSpanLeader(fpm, nextkey->first_page);
|
|
|
|
FreePagePushSpanLeader(fpm, first_page, newpages);
|
|
|
|
|
|
|
|
/* Update key in place. */
|
|
|
|
nextkey->first_page = first_page;
|
|
|
|
nextkey->npages = newpages;
|
|
|
|
|
|
|
|
/* If reducing first key on page, ancestors might need adjustment. */
|
|
|
|
if (nindex == 0)
|
|
|
|
FreePageBtreeAdjustAncestorKeys(fpm, np);
|
|
|
|
|
|
|
|
return nextkey->npages;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Split leaf page and as many of its ancestors as necessary. */
|
|
|
|
if (result.split_pages > 0)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* NB: We could consider various coping strategies here to avoid a
|
|
|
|
* split; most obviously, if np != result.page, we could target that
|
|
|
|
* page instead. More complicated shuffling strategies could be
|
|
|
|
* possible as well; basically, unless every single leaf page is 100%
|
|
|
|
* full, we can jam this key in there if we try hard enough. It's
|
|
|
|
* unlikely that trying that hard is worthwhile, but it's possible we
|
|
|
|
* might need to make more than no effort. For now, we just do the
|
|
|
|
* easy thing, which is nothing.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* If this is a soft insert, it's time to give up. */
|
|
|
|
if (soft)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Check whether we need to allocate more btree pages to split. */
|
|
|
|
if (result.split_pages > fpm->btree_recycle_count)
|
|
|
|
{
|
|
|
|
Size pages_needed;
|
|
|
|
Size recycle_page;
|
|
|
|
Size i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate the required number of pages and split each one in
|
|
|
|
* turn. This should never fail, because if we've got enough
|
|
|
|
* spans of free pages kicking around that we need additional
|
|
|
|
* storage space just to remember them all, then we should
|
|
|
|
* certainly have enough to expand the btree, which should only
|
|
|
|
* ever use a tiny number of pages compared to the number under
|
|
|
|
* management. If it does, something's badly screwed up.
|
|
|
|
*/
|
|
|
|
pages_needed = result.split_pages - fpm->btree_recycle_count;
|
|
|
|
for (i = 0; i < pages_needed; ++i)
|
|
|
|
{
|
|
|
|
if (!FreePageManagerGetInternal(fpm, 1, &recycle_page))
|
|
|
|
elog(FATAL, "free page manager btree is corrupt");
|
|
|
|
FreePageBtreeRecycle(fpm, recycle_page);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The act of allocating pages to recycle may have invalidated the
|
2019-10-30 02:03:00 +01:00
|
|
|
* results of our previous btree research, so repeat it. (We could
|
2016-12-02 18:03:30 +01:00
|
|
|
* recheck whether any of our split-avoidance strategies that were
|
|
|
|
* not viable before now are, but it hardly seems worthwhile, so
|
|
|
|
* we don't bother. Consolidation can't be possible now if it
|
|
|
|
* wasn't previously.)
|
|
|
|
*/
|
|
|
|
FreePageBtreeSearch(fpm, first_page, &result);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The act of allocating pages for use in constructing our btree
|
|
|
|
* should never cause any page to become more full, so the new
|
|
|
|
* split depth should be no greater than the old one, and perhaps
|
2017-02-06 10:33:58 +01:00
|
|
|
* less if we fortuitously allocated a chunk that freed up a slot
|
2016-12-02 18:03:30 +01:00
|
|
|
* on the page we need to update.
|
|
|
|
*/
|
|
|
|
Assert(result.split_pages <= fpm->btree_recycle_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we still need to perform a split, do it. */
|
|
|
|
if (result.split_pages > 0)
|
|
|
|
{
|
|
|
|
FreePageBtree *split_target = result.page;
|
|
|
|
FreePageBtree *child = NULL;
|
|
|
|
Size key = first_page;
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
FreePageBtree *newsibling;
|
|
|
|
FreePageBtree *parent;
|
|
|
|
|
|
|
|
/* Identify parent page, which must receive downlink. */
|
|
|
|
parent = relptr_access(base, split_target->hdr.parent);
|
|
|
|
|
|
|
|
/* Split the page - downlink not added yet. */
|
|
|
|
newsibling = FreePageBtreeSplitPage(fpm, split_target);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* At this point in the loop, we're always carrying a pending
|
|
|
|
* insertion. On the first pass, it's the actual key we're
|
|
|
|
* trying to insert; on subsequent passes, it's the downlink
|
|
|
|
* that needs to be added as a result of the split performed
|
|
|
|
* during the previous loop iteration. Since we've just split
|
|
|
|
* the page, there's definitely room on one of the two
|
|
|
|
* resulting pages.
|
|
|
|
*/
|
|
|
|
if (child == NULL)
|
|
|
|
{
|
|
|
|
Size index;
|
|
|
|
FreePageBtree *insert_into;
|
|
|
|
|
|
|
|
insert_into = key < newsibling->u.leaf_key[0].first_page ?
|
|
|
|
split_target : newsibling;
|
|
|
|
index = FreePageBtreeSearchLeaf(insert_into, key);
|
|
|
|
FreePageBtreeInsertLeaf(insert_into, index, key, npages);
|
|
|
|
if (index == 0 && insert_into == split_target)
|
|
|
|
FreePageBtreeAdjustAncestorKeys(fpm, split_target);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Size index;
|
|
|
|
FreePageBtree *insert_into;
|
|
|
|
|
|
|
|
insert_into =
|
|
|
|
key < newsibling->u.internal_key[0].first_page ?
|
|
|
|
split_target : newsibling;
|
|
|
|
index = FreePageBtreeSearchInternal(insert_into, key);
|
|
|
|
FreePageBtreeInsertInternal(base, insert_into, index,
|
|
|
|
key, child);
|
|
|
|
relptr_store(base, child->hdr.parent, insert_into);
|
|
|
|
if (index == 0 && insert_into == split_target)
|
|
|
|
FreePageBtreeAdjustAncestorKeys(fpm, split_target);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the page we just split has no parent, split the root. */
|
|
|
|
if (parent == NULL)
|
|
|
|
{
|
|
|
|
FreePageBtree *newroot;
|
|
|
|
|
|
|
|
newroot = FreePageBtreeGetRecycled(fpm);
|
|
|
|
newroot->hdr.magic = FREE_PAGE_INTERNAL_MAGIC;
|
|
|
|
newroot->hdr.nused = 2;
|
|
|
|
relptr_store(base, newroot->hdr.parent,
|
|
|
|
(FreePageBtree *) NULL);
|
|
|
|
newroot->u.internal_key[0].first_page =
|
|
|
|
FreePageBtreeFirstKey(split_target);
|
|
|
|
relptr_store(base, newroot->u.internal_key[0].child,
|
|
|
|
split_target);
|
|
|
|
relptr_store(base, split_target->hdr.parent, newroot);
|
|
|
|
newroot->u.internal_key[1].first_page =
|
|
|
|
FreePageBtreeFirstKey(newsibling);
|
|
|
|
relptr_store(base, newroot->u.internal_key[1].child,
|
|
|
|
newsibling);
|
|
|
|
relptr_store(base, newsibling->hdr.parent, newroot);
|
|
|
|
relptr_store(base, fpm->btree_root, newroot);
|
|
|
|
fpm->btree_depth++;
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the parent page isn't full, insert the downlink. */
|
|
|
|
key = newsibling->u.internal_key[0].first_page;
|
|
|
|
if (parent->hdr.nused < FPM_ITEMS_PER_INTERNAL_PAGE)
|
|
|
|
{
|
|
|
|
Size index;
|
|
|
|
|
|
|
|
index = FreePageBtreeSearchInternal(parent, key);
|
|
|
|
FreePageBtreeInsertInternal(base, parent, index,
|
|
|
|
key, newsibling);
|
|
|
|
relptr_store(base, newsibling->hdr.parent, parent);
|
|
|
|
if (index == 0)
|
|
|
|
FreePageBtreeAdjustAncestorKeys(fpm, parent);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The parent also needs to be split, so loop around. */
|
|
|
|
child = newsibling;
|
|
|
|
split_target = parent;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The loop above did the insert, so just need to update the free
|
|
|
|
* list, and we're done.
|
|
|
|
*/
|
|
|
|
FreePagePushSpanLeader(fpm, first_page, npages);
|
|
|
|
|
|
|
|
return npages;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Physically add the key to the page. */
|
|
|
|
Assert(result.page->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE);
|
|
|
|
FreePageBtreeInsertLeaf(result.page, result.index, first_page, npages);
|
|
|
|
|
|
|
|
/* If new first key on page, ancestors might need adjustment. */
|
|
|
|
if (result.index == 0)
|
|
|
|
FreePageBtreeAdjustAncestorKeys(fpm, result.page);
|
|
|
|
|
|
|
|
/* Put it on the free list. */
|
|
|
|
FreePagePushSpanLeader(fpm, first_page, npages);
|
|
|
|
|
|
|
|
return npages;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove a FreePageSpanLeader from the linked-list that contains it, either
|
|
|
|
* because we're changing the size of the span, or because we're allocating it.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePagePopSpanLeader(FreePageManager *fpm, Size pageno)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
FreePageSpanLeader *span;
|
|
|
|
FreePageSpanLeader *next;
|
|
|
|
FreePageSpanLeader *prev;
|
|
|
|
|
|
|
|
span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
|
|
|
|
|
|
|
|
next = relptr_access(base, span->next);
|
|
|
|
prev = relptr_access(base, span->prev);
|
|
|
|
if (next != NULL)
|
|
|
|
relptr_copy(next->prev, span->prev);
|
|
|
|
if (prev != NULL)
|
|
|
|
relptr_copy(prev->next, span->next);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Size f = Min(span->npages, FPM_NUM_FREELISTS) - 1;
|
|
|
|
|
|
|
|
Assert(fpm->freelist[f].relptr_off == pageno * FPM_PAGE_SIZE);
|
|
|
|
relptr_copy(fpm->freelist[f], span->next);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize a new FreePageSpanLeader and put it on the appropriate free list.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FreePagePushSpanLeader(FreePageManager *fpm, Size first_page, Size npages)
|
|
|
|
{
|
|
|
|
char *base = fpm_segment_base(fpm);
|
|
|
|
Size f = Min(npages, FPM_NUM_FREELISTS) - 1;
|
|
|
|
FreePageSpanLeader *head = relptr_access(base, fpm->freelist[f]);
|
|
|
|
FreePageSpanLeader *span;
|
|
|
|
|
|
|
|
span = (FreePageSpanLeader *) fpm_page_to_pointer(base, first_page);
|
|
|
|
span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
|
|
|
|
span->npages = npages;
|
|
|
|
relptr_store(base, span->next, head);
|
|
|
|
relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
|
|
|
|
if (head != NULL)
|
|
|
|
relptr_store(base, head->prev, span);
|
|
|
|
relptr_store(base, fpm->freelist[f], span);
|
|
|
|
}
|