1155 lines
35 KiB
C
1155 lines
35 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* slab.c
|
|
* SLAB allocator definitions.
|
|
*
|
|
* SLAB is a MemoryContext implementation designed for cases where large
|
|
* numbers of equally-sized objects can be allocated and freed efficiently
|
|
* with minimal memory wastage and fragmentation.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/utils/mmgr/slab.c
|
|
*
|
|
*
|
|
* NOTE:
|
|
* The constant allocation size allows significant simplification and various
|
|
* optimizations over more general purpose allocators. The blocks are carved
|
|
* into chunks of exactly the right size, wasting only the space required to
|
|
* MAXALIGN the allocated chunks.
|
|
*
|
|
* Slab can also help reduce memory fragmentation in cases where longer-lived
|
|
* chunks remain stored on blocks while most of the other chunks have already
|
|
* been pfree'd. We give priority to putting new allocations into the
|
|
* "fullest" block. This help avoid having too many sparsely used blocks
|
|
* around and allows blocks to more easily become completely unused which
|
|
* allows them to be eventually free'd.
|
|
*
|
|
* We identify the "fullest" block to put new allocations on by using a block
|
|
* from the lowest populated element of the context's "blocklist" array.
|
|
* This is an array of dlists containing blocks which we partition by the
|
|
* number of free chunks which block has. Blocks with fewer free chunks are
|
|
* stored in a lower indexed dlist array slot. Full blocks go on the 0th
|
|
* element of the blocklist array. So that we don't have to have too many
|
|
* elements in the array, each dlist in the array is responsible for a range
|
|
* of free chunks. When a chunk is palloc'd or pfree'd we may need to move
|
|
* the block onto another dlist if the number of free chunks crosses the
|
|
* range boundary that the current list is responsible for. Having just a
|
|
* few blocklist elements reduces the number of times we must move the block
|
|
* onto another dlist element.
|
|
*
|
|
* We keep track of free chunks within each block by using a block-level free
|
|
* list. We consult this list when we allocate a new chunk in the block.
|
|
* The free list is a linked list, the head of which is pointed to with
|
|
* SlabBlock's freehead field. Each subsequent list item is stored in the
|
|
* free chunk's memory. We ensure chunks are large enough to store this
|
|
* address.
|
|
*
|
|
* When we allocate a new block, technically all chunks are free, however, to
|
|
* avoid having to write out the entire block to set the linked list for the
|
|
* free chunks for every chunk in the block, we instead store a pointer to
|
|
* the next "unused" chunk on the block and keep track of how many of these
|
|
* unused chunks there are. When a new block is malloc'd, all chunks are
|
|
* unused. The unused pointer starts with the first chunk on the block and
|
|
* as chunks are allocated, the unused pointer is incremented. As chunks are
|
|
* pfree'd, the unused pointer never goes backwards. The unused pointer can
|
|
* be thought of as a high watermark for the maximum number of chunks in the
|
|
* block which have been in use concurrently. When a chunk is pfree'd the
|
|
* chunk is put onto the head of the free list and the unused pointer is not
|
|
* changed. We only consume more unused chunks if we run out of free chunks
|
|
* on the free list. This method effectively gives priority to using
|
|
* previously used chunks over previously unused chunks, which should perform
|
|
* better due to CPU caching effects.
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "lib/ilist.h"
|
|
#include "utils/memdebug.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/memutils_internal.h"
|
|
#include "utils/memutils_memorychunk.h"
|
|
|
|
#define Slab_BLOCKHDRSZ MAXALIGN(sizeof(SlabBlock))
|
|
|
|
#ifdef MEMORY_CONTEXT_CHECKING
|
|
/*
|
|
* Size of the memory required to store the SlabContext.
|
|
* MEMORY_CONTEXT_CHECKING builds need some extra memory for the isChunkFree
|
|
* array.
|
|
*/
|
|
#define Slab_CONTEXT_HDRSZ(chunksPerBlock) \
|
|
(sizeof(SlabContext) + ((chunksPerBlock) * sizeof(bool)))
|
|
#else
|
|
#define Slab_CONTEXT_HDRSZ(chunksPerBlock) sizeof(SlabContext)
|
|
#endif
|
|
|
|
/*
|
|
* The number of partitions to divide the blocklist into based their number of
|
|
* free chunks. There must be at least 2.
|
|
*/
|
|
#define SLAB_BLOCKLIST_COUNT 3
|
|
|
|
/* The maximum number of completely empty blocks to keep around for reuse. */
|
|
#define SLAB_MAXIMUM_EMPTY_BLOCKS 10
|
|
|
|
/*
|
|
* SlabContext is a specialized implementation of MemoryContext.
|
|
*/
|
|
typedef struct SlabContext
|
|
{
|
|
MemoryContextData header; /* Standard memory-context fields */
|
|
/* Allocation parameters for this context: */
|
|
uint32 chunkSize; /* the requested (non-aligned) chunk size */
|
|
uint32 fullChunkSize; /* chunk size with chunk header and alignment */
|
|
uint32 blockSize; /* the size to make each block of chunks */
|
|
int32 chunksPerBlock; /* number of chunks that fit in 1 block */
|
|
int32 curBlocklistIndex; /* index into the blocklist[] element
|
|
* containing the fullest, blocks */
|
|
#ifdef MEMORY_CONTEXT_CHECKING
|
|
bool *isChunkFree; /* array to mark free chunks in a block during
|
|
* SlabCheck */
|
|
#endif
|
|
|
|
int32 blocklist_shift; /* number of bits to shift the nfree count
|
|
* by to get the index into blocklist[] */
|
|
dclist_head emptyblocks; /* empty blocks to use up first instead of
|
|
* mallocing new blocks */
|
|
|
|
/*
|
|
* Blocks with free space, grouped by the number of free chunks they
|
|
* contain. Completely full blocks are stored in the 0th element.
|
|
* Completely empty blocks are stored in emptyblocks or free'd if we have
|
|
* enough empty blocks already.
|
|
*/
|
|
dlist_head blocklist[SLAB_BLOCKLIST_COUNT];
|
|
} SlabContext;
|
|
|
|
/*
|
|
* SlabBlock
|
|
* Structure of a single slab block.
|
|
*
|
|
* slab: pointer back to the owning MemoryContext
|
|
* nfree: number of chunks on the block which are unallocated
|
|
* nunused: number of chunks on the block unallocated and not on the block's
|
|
* freelist.
|
|
* freehead: linked-list header storing a pointer to the first free chunk on
|
|
* the block. Subsequent pointers are stored in the chunk's memory. NULL
|
|
* indicates the end of the list.
|
|
* unused: pointer to the next chunk which has yet to be used.
|
|
* node: doubly-linked list node for the context's blocklist
|
|
*/
|
|
typedef struct SlabBlock
|
|
{
|
|
SlabContext *slab; /* owning context */
|
|
int32 nfree; /* number of chunks on free + unused chunks */
|
|
int32 nunused; /* number of unused chunks */
|
|
MemoryChunk *freehead; /* pointer to the first free chunk */
|
|
MemoryChunk *unused; /* pointer to the next unused chunk */
|
|
dlist_node node; /* doubly-linked list for blocklist[] */
|
|
} SlabBlock;
|
|
|
|
|
|
#define Slab_CHUNKHDRSZ sizeof(MemoryChunk)
|
|
#define SlabChunkGetPointer(chk) \
|
|
((void *) (((char *) (chk)) + sizeof(MemoryChunk)))
|
|
|
|
/*
|
|
* SlabBlockGetChunk
|
|
* Obtain a pointer to the nth (0-based) chunk in the block
|
|
*/
|
|
#define SlabBlockGetChunk(slab, block, n) \
|
|
((MemoryChunk *) ((char *) (block) + Slab_BLOCKHDRSZ \
|
|
+ ((n) * (slab)->fullChunkSize)))
|
|
|
|
#if defined(MEMORY_CONTEXT_CHECKING) || defined(USE_ASSERT_CHECKING)
|
|
|
|
/*
|
|
* SlabChunkIndex
|
|
* Get the 0-based index of how many chunks into the block the given
|
|
* chunk is.
|
|
*/
|
|
#define SlabChunkIndex(slab, block, chunk) \
|
|
(((char *) (chunk) - (char *) SlabBlockGetChunk(slab, block, 0)) / \
|
|
(slab)->fullChunkSize)
|
|
|
|
/*
|
|
* SlabChunkMod
|
|
* A MemoryChunk should always be at an address which is a multiple of
|
|
* fullChunkSize starting from the 0th chunk position. This will return
|
|
* non-zero if it's not.
|
|
*/
|
|
#define SlabChunkMod(slab, block, chunk) \
|
|
(((char *) (chunk) - (char *) SlabBlockGetChunk(slab, block, 0)) % \
|
|
(slab)->fullChunkSize)
|
|
|
|
#endif
|
|
|
|
/*
|
|
* SlabIsValid
|
|
* True iff set is a valid slab allocation set.
|
|
*/
|
|
#define SlabIsValid(set) (PointerIsValid(set) && IsA(set, SlabContext))
|
|
|
|
/*
|
|
* SlabBlockIsValid
|
|
* True iff block is a valid block of slab allocation set.
|
|
*/
|
|
#define SlabBlockIsValid(block) \
|
|
(PointerIsValid(block) && SlabIsValid((block)->slab))
|
|
|
|
/*
|
|
* SlabBlocklistIndex
|
|
* Determine the blocklist index that a block should be in for the given
|
|
* number of free chunks.
|
|
*/
|
|
static inline int32
|
|
SlabBlocklistIndex(SlabContext *slab, int nfree)
|
|
{
|
|
int32 index;
|
|
int32 blocklist_shift = slab->blocklist_shift;
|
|
|
|
Assert(nfree >= 0 && nfree <= slab->chunksPerBlock);
|
|
|
|
/*
|
|
* Determine the blocklist index based on the number of free chunks. We
|
|
* must ensure that 0 free chunks is dedicated to index 0. Everything
|
|
* else must be >= 1 and < SLAB_BLOCKLIST_COUNT.
|
|
*
|
|
* To make this as efficient as possible, we exploit some two's complement
|
|
* arithmetic where we reverse the sign before bit shifting. This results
|
|
* in an nfree of 0 using index 0 and anything non-zero staying non-zero.
|
|
* This is exploiting 0 and -0 being the same in two's complement. When
|
|
* we're done, we just need to flip the sign back over again for a
|
|
* positive index.
|
|
*/
|
|
index = -((-nfree) >> blocklist_shift);
|
|
|
|
if (nfree == 0)
|
|
Assert(index == 0);
|
|
else
|
|
Assert(index >= 1 && index < SLAB_BLOCKLIST_COUNT);
|
|
|
|
return index;
|
|
}
|
|
|
|
/*
|
|
* SlabFindNextBlockListIndex
|
|
* Search blocklist for blocks which have free chunks and return the
|
|
* index of the blocklist found containing at least 1 block with free
|
|
* chunks. If no block can be found we return 0.
|
|
*
|
|
* Note: We give priority to fuller blocks so that these are filled before
|
|
* emptier blocks. This is done to increase the chances that mostly-empty
|
|
* blocks will eventually become completely empty so they can be free'd.
|
|
*/
|
|
static int32
|
|
SlabFindNextBlockListIndex(SlabContext *slab)
|
|
{
|
|
/* start at 1 as blocklist[0] is for full blocks. */
|
|
for (int i = 1; i < SLAB_BLOCKLIST_COUNT; i++)
|
|
{
|
|
/* return the first found non-empty index */
|
|
if (!dlist_is_empty(&slab->blocklist[i]))
|
|
return i;
|
|
}
|
|
|
|
/* no blocks with free space */
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* SlabGetNextFreeChunk
|
|
* Return the next free chunk in block and update the block to account
|
|
* for the returned chunk now being used.
|
|
*/
|
|
static inline MemoryChunk *
|
|
SlabGetNextFreeChunk(SlabContext *slab, SlabBlock *block)
|
|
{
|
|
MemoryChunk *chunk;
|
|
|
|
Assert(block->nfree > 0);
|
|
|
|
if (block->freehead != NULL)
|
|
{
|
|
chunk = block->freehead;
|
|
|
|
/*
|
|
* Pop the chunk from the linked list of free chunks. The pointer to
|
|
* the next free chunk is stored in the chunk itself.
|
|
*/
|
|
VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(chunk), sizeof(MemoryChunk *));
|
|
block->freehead = *(MemoryChunk **) SlabChunkGetPointer(chunk);
|
|
|
|
/* check nothing stomped on the free chunk's memory */
|
|
Assert(block->freehead == NULL ||
|
|
(block->freehead >= SlabBlockGetChunk(slab, block, 0) &&
|
|
block->freehead <= SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1) &&
|
|
SlabChunkMod(slab, block, block->freehead) == 0));
|
|
}
|
|
else
|
|
{
|
|
Assert(block->nunused > 0);
|
|
|
|
chunk = block->unused;
|
|
block->unused = (MemoryChunk *) (((char *) block->unused) + slab->fullChunkSize);
|
|
block->nunused--;
|
|
}
|
|
|
|
block->nfree--;
|
|
|
|
return chunk;
|
|
}
|
|
|
|
/*
|
|
* SlabContextCreate
|
|
* Create a new Slab context.
|
|
*
|
|
* parent: parent context, or NULL if top-level context
|
|
* name: name of context (must be statically allocated)
|
|
* blockSize: allocation block size
|
|
* chunkSize: allocation chunk size
|
|
*
|
|
* The Slab_CHUNKHDRSZ + MAXALIGN(chunkSize + 1) may not exceed
|
|
* MEMORYCHUNK_MAX_VALUE.
|
|
* 'blockSize' may not exceed MEMORYCHUNK_MAX_BLOCKOFFSET.
|
|
*/
|
|
MemoryContext
|
|
SlabContextCreate(MemoryContext parent,
|
|
const char *name,
|
|
Size blockSize,
|
|
Size chunkSize)
|
|
{
|
|
int chunksPerBlock;
|
|
Size fullChunkSize;
|
|
SlabContext *slab;
|
|
int i;
|
|
|
|
/* ensure MemoryChunk's size is properly maxaligned */
|
|
StaticAssertDecl(Slab_CHUNKHDRSZ == MAXALIGN(Slab_CHUNKHDRSZ),
|
|
"sizeof(MemoryChunk) is not maxaligned");
|
|
Assert(blockSize <= MEMORYCHUNK_MAX_BLOCKOFFSET);
|
|
|
|
/*
|
|
* Ensure there's enough space to store the pointer to the next free chunk
|
|
* in the memory of the (otherwise) unused allocation.
|
|
*/
|
|
if (chunkSize < sizeof(MemoryChunk *))
|
|
chunkSize = sizeof(MemoryChunk *);
|
|
|
|
/* length of the maxaligned chunk including the chunk header */
|
|
#ifdef MEMORY_CONTEXT_CHECKING
|
|
/* ensure there's always space for the sentinel byte */
|
|
fullChunkSize = Slab_CHUNKHDRSZ + MAXALIGN(chunkSize + 1);
|
|
#else
|
|
fullChunkSize = Slab_CHUNKHDRSZ + MAXALIGN(chunkSize);
|
|
#endif
|
|
|
|
Assert(fullChunkSize <= MEMORYCHUNK_MAX_VALUE);
|
|
|
|
/* compute the number of chunks that will fit on each block */
|
|
chunksPerBlock = (blockSize - Slab_BLOCKHDRSZ) / fullChunkSize;
|
|
|
|
/* Make sure the block can store at least one chunk. */
|
|
if (chunksPerBlock == 0)
|
|
elog(ERROR, "block size %zu for slab is too small for %zu-byte chunks",
|
|
blockSize, chunkSize);
|
|
|
|
|
|
|
|
slab = (SlabContext *) malloc(Slab_CONTEXT_HDRSZ(chunksPerBlock));
|
|
if (slab == NULL)
|
|
{
|
|
MemoryContextStats(TopMemoryContext);
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
errmsg("out of memory"),
|
|
errdetail("Failed while creating memory context \"%s\".",
|
|
name)));
|
|
}
|
|
|
|
/*
|
|
* Avoid writing code that can fail between here and MemoryContextCreate;
|
|
* we'd leak the header if we ereport in this stretch.
|
|
*/
|
|
|
|
/* Fill in SlabContext-specific header fields */
|
|
slab->chunkSize = (uint32) chunkSize;
|
|
slab->fullChunkSize = (uint32) fullChunkSize;
|
|
slab->blockSize = (uint32) blockSize;
|
|
slab->chunksPerBlock = chunksPerBlock;
|
|
slab->curBlocklistIndex = 0;
|
|
|
|
/*
|
|
* Compute a shift that guarantees that shifting chunksPerBlock with it is
|
|
* < SLAB_BLOCKLIST_COUNT - 1. The reason that we subtract 1 from
|
|
* SLAB_BLOCKLIST_COUNT in this calculation is that we reserve the 0th
|
|
* blocklist element for blocks which have no free chunks.
|
|
*
|
|
* We calculate the number of bits to shift by rather than a divisor to
|
|
* divide by as performing division each time we need to find the
|
|
* blocklist index would be much slower.
|
|
*/
|
|
slab->blocklist_shift = 0;
|
|
while ((slab->chunksPerBlock >> slab->blocklist_shift) >= (SLAB_BLOCKLIST_COUNT - 1))
|
|
slab->blocklist_shift++;
|
|
|
|
/* initialize the list to store empty blocks to be reused */
|
|
dclist_init(&slab->emptyblocks);
|
|
|
|
/* initialize each blocklist slot */
|
|
for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
|
|
dlist_init(&slab->blocklist[i]);
|
|
|
|
#ifdef MEMORY_CONTEXT_CHECKING
|
|
/* set the isChunkFree pointer right after the end of the context */
|
|
slab->isChunkFree = (bool *) ((char *) slab + sizeof(SlabContext));
|
|
#endif
|
|
|
|
/* Finally, do the type-independent part of context creation */
|
|
MemoryContextCreate((MemoryContext) slab,
|
|
T_SlabContext,
|
|
MCTX_SLAB_ID,
|
|
parent,
|
|
name);
|
|
|
|
return (MemoryContext) slab;
|
|
}
|
|
|
|
/*
|
|
* SlabReset
|
|
* Frees all memory which is allocated in the given set.
|
|
*
|
|
* The code simply frees all the blocks in the context - we don't keep any
|
|
* keeper blocks or anything like that.
|
|
*/
|
|
void
|
|
SlabReset(MemoryContext context)
|
|
{
|
|
SlabContext *slab = (SlabContext *) context;
|
|
dlist_mutable_iter miter;
|
|
int i;
|
|
|
|
Assert(SlabIsValid(slab));
|
|
|
|
#ifdef MEMORY_CONTEXT_CHECKING
|
|
/* Check for corruption and leaks before freeing */
|
|
SlabCheck(context);
|
|
#endif
|
|
|
|
/* release any retained empty blocks */
|
|
dclist_foreach_modify(miter, &slab->emptyblocks)
|
|
{
|
|
SlabBlock *block = dlist_container(SlabBlock, node, miter.cur);
|
|
|
|
dclist_delete_from(&slab->emptyblocks, miter.cur);
|
|
|
|
#ifdef CLOBBER_FREED_MEMORY
|
|
wipe_mem(block, slab->blockSize);
|
|
#endif
|
|
free(block);
|
|
context->mem_allocated -= slab->blockSize;
|
|
}
|
|
|
|
/* walk over blocklist and free the blocks */
|
|
for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
|
|
{
|
|
dlist_foreach_modify(miter, &slab->blocklist[i])
|
|
{
|
|
SlabBlock *block = dlist_container(SlabBlock, node, miter.cur);
|
|
|
|
dlist_delete(miter.cur);
|
|
|
|
#ifdef CLOBBER_FREED_MEMORY
|
|
wipe_mem(block, slab->blockSize);
|
|
#endif
|
|
free(block);
|
|
context->mem_allocated -= slab->blockSize;
|
|
}
|
|
}
|
|
|
|
slab->curBlocklistIndex = 0;
|
|
|
|
Assert(context->mem_allocated == 0);
|
|
}
|
|
|
|
/*
|
|
* SlabDelete
|
|
* Free all memory which is allocated in the given context.
|
|
*/
|
|
void
|
|
SlabDelete(MemoryContext context)
|
|
{
|
|
/* Reset to release all the SlabBlocks */
|
|
SlabReset(context);
|
|
/* And free the context header */
|
|
free(context);
|
|
}
|
|
|
|
/*
|
|
* Small helper for allocating a new chunk from a chunk, to avoid duplicating
|
|
* the code between SlabAlloc() and SlabAllocFromNewBlock().
|
|
*/
|
|
static inline void *
|
|
SlabAllocSetupNewChunk(MemoryContext context, SlabBlock *block,
|
|
MemoryChunk *chunk, Size size)
|
|
{
|
|
SlabContext *slab = (SlabContext *) context;
|
|
|
|
/*
|
|
* Check that the chunk pointer is actually somewhere on the block and is
|
|
* aligned as expected.
|
|
*/
|
|
Assert(chunk >= SlabBlockGetChunk(slab, block, 0));
|
|
Assert(chunk <= SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1));
|
|
Assert(SlabChunkMod(slab, block, chunk) == 0);
|
|
|
|
/* Prepare to initialize the chunk header. */
|
|
VALGRIND_MAKE_MEM_UNDEFINED(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
MemoryChunkSetHdrMask(chunk, block, MAXALIGN(slab->chunkSize), MCTX_SLAB_ID);
|
|
|
|
#ifdef MEMORY_CONTEXT_CHECKING
|
|
/* slab mark to catch clobber of "unused" space */
|
|
Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ));
|
|
set_sentinel(MemoryChunkGetPointer(chunk), size);
|
|
VALGRIND_MAKE_MEM_NOACCESS(((char *) chunk) + Slab_CHUNKHDRSZ +
|
|
slab->chunkSize,
|
|
slab->fullChunkSize -
|
|
(slab->chunkSize + Slab_CHUNKHDRSZ));
|
|
#endif
|
|
|
|
#ifdef RANDOMIZE_ALLOCATED_MEMORY
|
|
/* fill the allocated space with junk */
|
|
randomize_mem((char *) MemoryChunkGetPointer(chunk), size);
|
|
#endif
|
|
|
|
/* Disallow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
return MemoryChunkGetPointer(chunk);
|
|
}
|
|
|
|
pg_noinline
|
|
static void *
|
|
SlabAllocFromNewBlock(MemoryContext context, Size size, int flags)
|
|
{
|
|
SlabContext *slab = (SlabContext *) context;
|
|
SlabBlock *block;
|
|
MemoryChunk *chunk;
|
|
dlist_head *blocklist;
|
|
int blocklist_idx;
|
|
|
|
/* to save allocating a new one, first check the empty blocks list */
|
|
if (dclist_count(&slab->emptyblocks) > 0)
|
|
{
|
|
dlist_node *node = dclist_pop_head_node(&slab->emptyblocks);
|
|
|
|
block = dlist_container(SlabBlock, node, node);
|
|
|
|
/*
|
|
* SlabFree() should have left this block in a valid state with all
|
|
* chunks free. Ensure that's the case.
|
|
*/
|
|
Assert(block->nfree == slab->chunksPerBlock);
|
|
|
|
/* fetch the next chunk from this block */
|
|
chunk = SlabGetNextFreeChunk(slab, block);
|
|
}
|
|
else
|
|
{
|
|
block = (SlabBlock *) malloc(slab->blockSize);
|
|
|
|
if (unlikely(block == NULL))
|
|
return MemoryContextAllocationFailure(context, size, flags);
|
|
|
|
block->slab = slab;
|
|
context->mem_allocated += slab->blockSize;
|
|
|
|
/* use the first chunk in the new block */
|
|
chunk = SlabBlockGetChunk(slab, block, 0);
|
|
|
|
block->nfree = slab->chunksPerBlock - 1;
|
|
block->unused = SlabBlockGetChunk(slab, block, 1);
|
|
block->freehead = NULL;
|
|
block->nunused = slab->chunksPerBlock - 1;
|
|
}
|
|
|
|
/* find the blocklist element for storing blocks with 1 used chunk */
|
|
blocklist_idx = SlabBlocklistIndex(slab, block->nfree);
|
|
blocklist = &slab->blocklist[blocklist_idx];
|
|
|
|
/* this better be empty. We just added a block thinking it was */
|
|
Assert(dlist_is_empty(blocklist));
|
|
|
|
dlist_push_head(blocklist, &block->node);
|
|
|
|
slab->curBlocklistIndex = blocklist_idx;
|
|
|
|
return SlabAllocSetupNewChunk(context, block, chunk, size);
|
|
}
|
|
|
|
/*
|
|
* SlabAllocInvalidSize
|
|
* Handle raising an ERROR for an invalid size request. We don't do this
|
|
* in slab alloc as calling the elog functions would force the compiler
|
|
* to setup the stack frame in SlabAlloc. For performance reasons, we
|
|
* want to avoid that.
|
|
*/
|
|
pg_noinline
|
|
static void
|
|
pg_attribute_noreturn()
|
|
SlabAllocInvalidSize(MemoryContext context, Size size)
|
|
{
|
|
SlabContext *slab = (SlabContext *) context;
|
|
|
|
elog(ERROR, "unexpected alloc chunk size %zu (expected %u)", size,
|
|
slab->chunkSize);
|
|
}
|
|
|
|
/*
|
|
* SlabAlloc
|
|
* Returns a pointer to a newly allocated memory chunk or raises an ERROR
|
|
* on allocation failure, or returns NULL when flags contains
|
|
* MCXT_ALLOC_NO_OOM. 'size' must be the same size as was specified
|
|
* during SlabContextCreate().
|
|
*
|
|
* This function should only contain the most common code paths. Everything
|
|
* else should be in pg_noinline helper functions, thus avoiding the overhead
|
|
* of creating a stack frame for the common cases. Allocating memory is often
|
|
* a bottleneck in many workloads, so avoiding stack frame setup is
|
|
* worthwhile. Helper functions should always directly return the newly
|
|
* allocated memory so that we can just return that address directly as a tail
|
|
* call.
|
|
*/
|
|
void *
|
|
SlabAlloc(MemoryContext context, Size size, int flags)
|
|
{
|
|
SlabContext *slab = (SlabContext *) context;
|
|
SlabBlock *block;
|
|
MemoryChunk *chunk;
|
|
|
|
Assert(SlabIsValid(slab));
|
|
|
|
/* sanity check that this is pointing to a valid blocklist */
|
|
Assert(slab->curBlocklistIndex >= 0);
|
|
Assert(slab->curBlocklistIndex <= SlabBlocklistIndex(slab, slab->chunksPerBlock));
|
|
|
|
/*
|
|
* Make sure we only allow correct request size. This doubles as the
|
|
* MemoryContextCheckSize check.
|
|
*/
|
|
if (unlikely(size != slab->chunkSize))
|
|
SlabAllocInvalidSize(context, size);
|
|
|
|
if (unlikely(slab->curBlocklistIndex == 0))
|
|
{
|
|
/*
|
|
* Handle the case when there are no partially filled blocks
|
|
* available. This happens either when the last allocation took the
|
|
* last chunk in the block, or when SlabFree() free'd the final block.
|
|
*/
|
|
return SlabAllocFromNewBlock(context, size, flags);
|
|
}
|
|
else
|
|
{
|
|
dlist_head *blocklist = &slab->blocklist[slab->curBlocklistIndex];
|
|
int new_blocklist_idx;
|
|
|
|
Assert(!dlist_is_empty(blocklist));
|
|
|
|
/* grab the block from the blocklist */
|
|
block = dlist_head_element(SlabBlock, node, blocklist);
|
|
|
|
/* make sure we actually got a valid block, with matching nfree */
|
|
Assert(block != NULL);
|
|
Assert(slab->curBlocklistIndex == SlabBlocklistIndex(slab, block->nfree));
|
|
Assert(block->nfree > 0);
|
|
|
|
/* fetch the next chunk from this block */
|
|
chunk = SlabGetNextFreeChunk(slab, block);
|
|
|
|
/* get the new blocklist index based on the new free chunk count */
|
|
new_blocklist_idx = SlabBlocklistIndex(slab, block->nfree);
|
|
|
|
/*
|
|
* Handle the case where the blocklist index changes. This also deals
|
|
* with blocks becoming full as only full blocks go at index 0.
|
|
*/
|
|
if (unlikely(slab->curBlocklistIndex != new_blocklist_idx))
|
|
{
|
|
dlist_delete_from(blocklist, &block->node);
|
|
dlist_push_head(&slab->blocklist[new_blocklist_idx], &block->node);
|
|
|
|
if (dlist_is_empty(blocklist))
|
|
slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab);
|
|
}
|
|
}
|
|
|
|
return SlabAllocSetupNewChunk(context, block, chunk, size);
|
|
}
|
|
|
|
/*
|
|
* SlabFree
|
|
* Frees allocated memory; memory is removed from the slab.
|
|
*/
|
|
void
|
|
SlabFree(void *pointer)
|
|
{
|
|
MemoryChunk *chunk = PointerGetMemoryChunk(pointer);
|
|
SlabBlock *block;
|
|
SlabContext *slab;
|
|
int curBlocklistIdx;
|
|
int newBlocklistIdx;
|
|
|
|
/* Allow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
block = MemoryChunkGetBlock(chunk);
|
|
|
|
/*
|
|
* For speed reasons we just Assert that the referenced block is good.
|
|
* Future field experience may show that this Assert had better become a
|
|
* regular runtime test-and-elog check.
|
|
*/
|
|
Assert(SlabBlockIsValid(block));
|
|
slab = block->slab;
|
|
|
|
#ifdef MEMORY_CONTEXT_CHECKING
|
|
/* Test for someone scribbling on unused space in chunk */
|
|
Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ));
|
|
if (!sentinel_ok(pointer, slab->chunkSize))
|
|
elog(WARNING, "detected write past chunk end in %s %p",
|
|
slab->header.name, chunk);
|
|
#endif
|
|
|
|
/* push this chunk onto the head of the block's free list */
|
|
*(MemoryChunk **) pointer = block->freehead;
|
|
block->freehead = chunk;
|
|
|
|
block->nfree++;
|
|
|
|
Assert(block->nfree > 0);
|
|
Assert(block->nfree <= slab->chunksPerBlock);
|
|
|
|
#ifdef CLOBBER_FREED_MEMORY
|
|
/* don't wipe the free list MemoryChunk pointer stored in the chunk */
|
|
wipe_mem((char *) pointer + sizeof(MemoryChunk *),
|
|
slab->chunkSize - sizeof(MemoryChunk *));
|
|
#endif
|
|
|
|
curBlocklistIdx = SlabBlocklistIndex(slab, block->nfree - 1);
|
|
newBlocklistIdx = SlabBlocklistIndex(slab, block->nfree);
|
|
|
|
/*
|
|
* Check if the block needs to be moved to another element on the
|
|
* blocklist based on it now having 1 more free chunk.
|
|
*/
|
|
if (unlikely(curBlocklistIdx != newBlocklistIdx))
|
|
{
|
|
/* do the move */
|
|
dlist_delete_from(&slab->blocklist[curBlocklistIdx], &block->node);
|
|
dlist_push_head(&slab->blocklist[newBlocklistIdx], &block->node);
|
|
|
|
/*
|
|
* The blocklist[curBlocklistIdx] may now be empty or we may now be
|
|
* able to use a lower-element blocklist. We'll need to redetermine
|
|
* what the slab->curBlocklistIndex is if the current blocklist was
|
|
* changed or if a lower element one was changed. We must ensure we
|
|
* use the list with the fullest block(s).
|
|
*/
|
|
if (slab->curBlocklistIndex >= curBlocklistIdx)
|
|
{
|
|
slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab);
|
|
|
|
/*
|
|
* We know there must be a block with at least 1 unused chunk as
|
|
* we just pfree'd one. Ensure curBlocklistIndex reflects this.
|
|
*/
|
|
Assert(slab->curBlocklistIndex > 0);
|
|
}
|
|
}
|
|
|
|
/* Handle when a block becomes completely empty */
|
|
if (unlikely(block->nfree == slab->chunksPerBlock))
|
|
{
|
|
/* remove the block */
|
|
dlist_delete_from(&slab->blocklist[newBlocklistIdx], &block->node);
|
|
|
|
/*
|
|
* To avoid thrashing malloc/free, we keep a list of empty blocks that
|
|
* we can reuse again instead of having to malloc a new one.
|
|
*/
|
|
if (dclist_count(&slab->emptyblocks) < SLAB_MAXIMUM_EMPTY_BLOCKS)
|
|
dclist_push_head(&slab->emptyblocks, &block->node);
|
|
else
|
|
{
|
|
/*
|
|
* When we have enough empty blocks stored already, we actually
|
|
* free the block.
|
|
*/
|
|
#ifdef CLOBBER_FREED_MEMORY
|
|
wipe_mem(block, slab->blockSize);
|
|
#endif
|
|
free(block);
|
|
slab->header.mem_allocated -= slab->blockSize;
|
|
}
|
|
|
|
/*
|
|
* Check if we need to reset the blocklist index. This is required
|
|
* when the blocklist this block is on has become completely empty.
|
|
*/
|
|
if (slab->curBlocklistIndex == newBlocklistIdx &&
|
|
dlist_is_empty(&slab->blocklist[newBlocklistIdx]))
|
|
slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* SlabRealloc
|
|
* Change the allocated size of a chunk.
|
|
*
|
|
* As Slab is designed for allocating equally-sized chunks of memory, it can't
|
|
* do an actual chunk size change. We try to be gentle and allow calls with
|
|
* exactly the same size, as in that case we can simply return the same
|
|
* chunk. When the size differs, we throw an error.
|
|
*
|
|
* We could also allow requests with size < chunkSize. That however seems
|
|
* rather pointless - Slab is meant for chunks of constant size, and moreover
|
|
* realloc is usually used to enlarge the chunk.
|
|
*/
|
|
void *
|
|
SlabRealloc(void *pointer, Size size, int flags)
|
|
{
|
|
MemoryChunk *chunk = PointerGetMemoryChunk(pointer);
|
|
SlabBlock *block;
|
|
SlabContext *slab;
|
|
|
|
/* Allow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
block = MemoryChunkGetBlock(chunk);
|
|
|
|
/* Disallow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
/*
|
|
* Try to verify that we have a sane block pointer: the block header
|
|
* should reference a slab context. (We use a test-and-elog, not just
|
|
* Assert, because it seems highly likely that we're here in error in the
|
|
* first place.)
|
|
*/
|
|
if (!SlabBlockIsValid(block))
|
|
elog(ERROR, "could not find block containing chunk %p", chunk);
|
|
slab = block->slab;
|
|
|
|
/* can't do actual realloc with slab, but let's try to be gentle */
|
|
if (size == slab->chunkSize)
|
|
return pointer;
|
|
|
|
elog(ERROR, "slab allocator does not support realloc()");
|
|
return NULL; /* keep compiler quiet */
|
|
}
|
|
|
|
/*
|
|
* SlabGetChunkContext
|
|
* Return the MemoryContext that 'pointer' belongs to.
|
|
*/
|
|
MemoryContext
|
|
SlabGetChunkContext(void *pointer)
|
|
{
|
|
MemoryChunk *chunk = PointerGetMemoryChunk(pointer);
|
|
SlabBlock *block;
|
|
|
|
/* Allow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
block = MemoryChunkGetBlock(chunk);
|
|
|
|
/* Disallow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
Assert(SlabBlockIsValid(block));
|
|
|
|
return &block->slab->header;
|
|
}
|
|
|
|
/*
|
|
* SlabGetChunkSpace
|
|
* Given a currently-allocated chunk, determine the total space
|
|
* it occupies (including all memory-allocation overhead).
|
|
*/
|
|
Size
|
|
SlabGetChunkSpace(void *pointer)
|
|
{
|
|
MemoryChunk *chunk = PointerGetMemoryChunk(pointer);
|
|
SlabBlock *block;
|
|
SlabContext *slab;
|
|
|
|
/* Allow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
block = MemoryChunkGetBlock(chunk);
|
|
|
|
/* Disallow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
Assert(SlabBlockIsValid(block));
|
|
slab = block->slab;
|
|
|
|
return slab->fullChunkSize;
|
|
}
|
|
|
|
/*
|
|
* SlabIsEmpty
|
|
* Is the slab empty of any allocated space?
|
|
*/
|
|
bool
|
|
SlabIsEmpty(MemoryContext context)
|
|
{
|
|
Assert(SlabIsValid((SlabContext *) context));
|
|
|
|
return (context->mem_allocated == 0);
|
|
}
|
|
|
|
/*
|
|
* SlabStats
|
|
* Compute stats about memory consumption of a Slab context.
|
|
*
|
|
* printfunc: if not NULL, pass a human-readable stats string to this.
|
|
* passthru: pass this pointer through to printfunc.
|
|
* totals: if not NULL, add stats about this context into *totals.
|
|
* print_to_stderr: print stats to stderr if true, elog otherwise.
|
|
*/
|
|
void
|
|
SlabStats(MemoryContext context,
|
|
MemoryStatsPrintFunc printfunc, void *passthru,
|
|
MemoryContextCounters *totals,
|
|
bool print_to_stderr)
|
|
{
|
|
SlabContext *slab = (SlabContext *) context;
|
|
Size nblocks = 0;
|
|
Size freechunks = 0;
|
|
Size totalspace;
|
|
Size freespace = 0;
|
|
int i;
|
|
|
|
Assert(SlabIsValid(slab));
|
|
|
|
/* Include context header in totalspace */
|
|
totalspace = Slab_CONTEXT_HDRSZ(slab->chunksPerBlock);
|
|
|
|
/* Add the space consumed by blocks in the emptyblocks list */
|
|
totalspace += dclist_count(&slab->emptyblocks) * slab->blockSize;
|
|
|
|
for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
|
|
{
|
|
dlist_iter iter;
|
|
|
|
dlist_foreach(iter, &slab->blocklist[i])
|
|
{
|
|
SlabBlock *block = dlist_container(SlabBlock, node, iter.cur);
|
|
|
|
nblocks++;
|
|
totalspace += slab->blockSize;
|
|
freespace += slab->fullChunkSize * block->nfree;
|
|
freechunks += block->nfree;
|
|
}
|
|
}
|
|
|
|
if (printfunc)
|
|
{
|
|
char stats_string[200];
|
|
|
|
/* XXX should we include free chunks on empty blocks? */
|
|
snprintf(stats_string, sizeof(stats_string),
|
|
"%zu total in %zu blocks; %u empty blocks; %zu free (%zu chunks); %zu used",
|
|
totalspace, nblocks, dclist_count(&slab->emptyblocks),
|
|
freespace, freechunks, totalspace - freespace);
|
|
printfunc(context, passthru, stats_string, print_to_stderr);
|
|
}
|
|
|
|
if (totals)
|
|
{
|
|
totals->nblocks += nblocks;
|
|
totals->freechunks += freechunks;
|
|
totals->totalspace += totalspace;
|
|
totals->freespace += freespace;
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef MEMORY_CONTEXT_CHECKING
|
|
|
|
/*
|
|
* SlabCheck
|
|
* Walk through all blocks looking for inconsistencies.
|
|
*
|
|
* NOTE: report errors as WARNING, *not* ERROR or FATAL. Otherwise you'll
|
|
* find yourself in an infinite loop when trouble occurs, because this
|
|
* routine will be entered again when elog cleanup tries to release memory!
|
|
*/
|
|
void
|
|
SlabCheck(MemoryContext context)
|
|
{
|
|
SlabContext *slab = (SlabContext *) context;
|
|
int i;
|
|
int nblocks = 0;
|
|
const char *name = slab->header.name;
|
|
dlist_iter iter;
|
|
|
|
Assert(SlabIsValid(slab));
|
|
Assert(slab->chunksPerBlock > 0);
|
|
|
|
/*
|
|
* Have a look at the empty blocks. These should have all their chunks
|
|
* marked as free. Ensure that's the case.
|
|
*/
|
|
dclist_foreach(iter, &slab->emptyblocks)
|
|
{
|
|
SlabBlock *block = dlist_container(SlabBlock, node, iter.cur);
|
|
|
|
if (block->nfree != slab->chunksPerBlock)
|
|
elog(WARNING, "problem in slab %s: empty block %p should have %d free chunks but has %d chunks free",
|
|
name, block, slab->chunksPerBlock, block->nfree);
|
|
}
|
|
|
|
/* walk the non-empty block lists */
|
|
for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
|
|
{
|
|
int j,
|
|
nfree;
|
|
|
|
/* walk all blocks on this blocklist */
|
|
dlist_foreach(iter, &slab->blocklist[i])
|
|
{
|
|
SlabBlock *block = dlist_container(SlabBlock, node, iter.cur);
|
|
MemoryChunk *cur_chunk;
|
|
|
|
/*
|
|
* Make sure the number of free chunks (in the block header)
|
|
* matches the position in the blocklist.
|
|
*/
|
|
if (SlabBlocklistIndex(slab, block->nfree) != i)
|
|
elog(WARNING, "problem in slab %s: block %p is on blocklist %d but should be on blocklist %d",
|
|
name, block, i, SlabBlocklistIndex(slab, block->nfree));
|
|
|
|
/* make sure the block is not empty */
|
|
if (block->nfree >= slab->chunksPerBlock)
|
|
elog(WARNING, "problem in slab %s: empty block %p incorrectly stored on blocklist element %d",
|
|
name, block, i);
|
|
|
|
/* make sure the slab pointer correctly points to this context */
|
|
if (block->slab != slab)
|
|
elog(WARNING, "problem in slab %s: bogus slab link in block %p",
|
|
name, block);
|
|
|
|
/* reset the array of free chunks for this block */
|
|
memset(slab->isChunkFree, 0, (slab->chunksPerBlock * sizeof(bool)));
|
|
nfree = 0;
|
|
|
|
/* walk through the block's free list chunks */
|
|
cur_chunk = block->freehead;
|
|
while (cur_chunk != NULL)
|
|
{
|
|
int chunkidx = SlabChunkIndex(slab, block, cur_chunk);
|
|
|
|
/*
|
|
* Ensure the free list link points to something on the block
|
|
* at an address aligned according to the full chunk size.
|
|
*/
|
|
if (cur_chunk < SlabBlockGetChunk(slab, block, 0) ||
|
|
cur_chunk > SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1) ||
|
|
SlabChunkMod(slab, block, cur_chunk) != 0)
|
|
elog(WARNING, "problem in slab %s: bogus free list link %p in block %p",
|
|
name, cur_chunk, block);
|
|
|
|
/* count the chunk and mark it free on the free chunk array */
|
|
nfree++;
|
|
slab->isChunkFree[chunkidx] = true;
|
|
|
|
/* read pointer of the next free chunk */
|
|
VALGRIND_MAKE_MEM_DEFINED(MemoryChunkGetPointer(cur_chunk), sizeof(MemoryChunk *));
|
|
cur_chunk = *(MemoryChunk **) SlabChunkGetPointer(cur_chunk);
|
|
}
|
|
|
|
/* check that the unused pointer matches what nunused claims */
|
|
if (SlabBlockGetChunk(slab, block, slab->chunksPerBlock - block->nunused) !=
|
|
block->unused)
|
|
elog(WARNING, "problem in slab %s: mismatch detected between nunused chunks and unused pointer in block %p",
|
|
name, block);
|
|
|
|
/*
|
|
* count the remaining free chunks that have yet to make it onto
|
|
* the block's free list.
|
|
*/
|
|
cur_chunk = block->unused;
|
|
for (j = 0; j < block->nunused; j++)
|
|
{
|
|
int chunkidx = SlabChunkIndex(slab, block, cur_chunk);
|
|
|
|
|
|
/* count the chunk as free and mark it as so in the array */
|
|
nfree++;
|
|
if (chunkidx < slab->chunksPerBlock)
|
|
slab->isChunkFree[chunkidx] = true;
|
|
|
|
/* move forward 1 chunk */
|
|
cur_chunk = (MemoryChunk *) (((char *) cur_chunk) + slab->fullChunkSize);
|
|
}
|
|
|
|
for (j = 0; j < slab->chunksPerBlock; j++)
|
|
{
|
|
if (!slab->isChunkFree[j])
|
|
{
|
|
MemoryChunk *chunk = SlabBlockGetChunk(slab, block, j);
|
|
SlabBlock *chunkblock;
|
|
|
|
/* Allow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
chunkblock = (SlabBlock *) MemoryChunkGetBlock(chunk);
|
|
|
|
/* Disallow access to the chunk header. */
|
|
VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
|
|
|
|
/*
|
|
* check the chunk's blockoffset correctly points back to
|
|
* the block
|
|
*/
|
|
if (chunkblock != block)
|
|
elog(WARNING, "problem in slab %s: bogus block link in block %p, chunk %p",
|
|
name, block, chunk);
|
|
|
|
/* check the sentinel byte is intact */
|
|
Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ));
|
|
if (!sentinel_ok(chunk, Slab_CHUNKHDRSZ + slab->chunkSize))
|
|
elog(WARNING, "problem in slab %s: detected write past chunk end in block %p, chunk %p",
|
|
name, block, chunk);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Make sure we got the expected number of free chunks (as tracked
|
|
* in the block header).
|
|
*/
|
|
if (nfree != block->nfree)
|
|
elog(WARNING, "problem in slab %s: nfree in block %p is %d but %d chunk were found as free",
|
|
name, block, block->nfree, nfree);
|
|
|
|
nblocks++;
|
|
}
|
|
}
|
|
|
|
/* the stored empty blocks are tracked in mem_allocated too */
|
|
nblocks += dclist_count(&slab->emptyblocks);
|
|
|
|
Assert(nblocks * slab->blockSize == context->mem_allocated);
|
|
}
|
|
|
|
#endif /* MEMORY_CONTEXT_CHECKING */
|