diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c index c2f9bb6ad3..c366febdcc 100644 --- a/src/backend/utils/mmgr/slab.c +++ b/src/backend/utils/mmgr/slab.c @@ -4,7 +4,8 @@ * SLAB allocator definitions. * * SLAB is a MemoryContext implementation designed for cases where large - * numbers of equally-sized objects are allocated (and freed). + * numbers of equally-sized objects can be allocated and freed efficiently + * with minimal memory wastage and fragmentation. * * * Portions Copyright (c) 2017-2022, PostgreSQL Global Development Group @@ -16,36 +17,51 @@ * NOTE: * The constant allocation size allows significant simplification and various * optimizations over more general purpose allocators. The blocks are carved - * into chunks of exactly the right size (plus alignment), not wasting any - * memory. + * into chunks of exactly the right size, wasting only the space required to + * MAXALIGN the allocated chunks. * - * The information about free chunks is maintained both at the block level and - * global (context) level. This is possible as the chunk size (and thus also - * the number of chunks per block) is fixed. + * Slab can also help reduce memory fragmentation in cases where longer-lived + * chunks remain stored on blocks while most of the other chunks have already + * been pfree'd. We give priority to putting new allocations into the + * "fullest" block. This help avoid having too many sparsely used blocks + * around and allows blocks to more easily become completely unused which + * allows them to be eventually free'd. * - * On each block, free chunks are tracked in a simple linked list. Contents - * of free chunks is replaced with an index of the next free chunk, forming - * a very simple linked list. Each block also contains a counter of free - * chunks. Combined with the local block-level freelist, it makes it trivial - * to eventually free the whole block. + * We identify the "fullest" block to put new allocations on by using a block + * from the lowest populated element of the context's "blocklist" array. + * This is an array of dlists containing blocks which we partition by the + * number of free chunks which block has. Blocks with fewer free chunks are + * stored in a lower indexed dlist array slot. Full blocks go on the 0th + * element of the blocklist array. So that we don't have to have too many + * elements in the array, each dlist in the array is responsible for a range + * of free chunks. When a chunk is palloc'd or pfree'd we may need to move + * the block onto another dlist if the number of free chunks crosses the + * range boundary that the current list is responsible for. Having just a + * few blocklist elements reduces the number of times we must move the block + * onto another dlist element. * - * At the context level, we use 'freelist' to track blocks ordered by number - * of free chunks, starting with blocks having a single allocated chunk, and - * with completely full blocks on the tail. + * We keep track of free chunks within each block by using a block-level free + * list. We consult this list when we allocate a new chunk in the block. + * The free list is a linked list, the head of which is pointed to with + * SlabBlock's freehead field. Each subsequent list item is stored in the + * free chunk's memory. We ensure chunks are large enough to store this + * address. * - * This also allows various optimizations - for example when searching for - * free chunk, the allocator reuses space from the fullest blocks first, in - * the hope that some of the less full blocks will get completely empty (and - * returned back to the OS). - * - * For each block, we maintain pointer to the first free chunk - this is quite - * cheap and allows us to skip all the preceding used chunks, eliminating - * a significant number of lookups in many common usage patterns. In the worst - * case this performs as if the pointer was not maintained. - * - * We cache the freelist index for the blocks with the fewest free chunks - * (minFreeChunks), so that we don't have to search the freelist on every - * SlabAlloc() call, which is quite expensive. + * When we allocate a new block, technically all chunks are free, however, to + * avoid having to write out the entire block to set the linked list for the + * free chunks for every chunk in the block, we instead store a pointer to + * the next "unused" chunk on the block and keep track of how many of these + * unused chunks there are. When a new block is malloc'd, all chunks are + * unused. The unused pointer starts with the first chunk on the block and + * as chunks are allocated, the unused pointer is incremented. As chunks are + * pfree'd, the unused pointer never goes backwards. The unused pointer can + * be thought of as a high watermark for the maximum number of chunks in the + * block which have been in use concurrently. When a chunk is pfree'd the + * chunk is put onto the head of the free list and the unused pointer is not + * changed. We only consume more unused chunks if we run out of free chunks + * on the free list. This method effectively gives priority to using + * previously used chunks over previously unused chunks, which should perform + * better due to CPU caching effects. * *------------------------------------------------------------------------- */ @@ -60,6 +76,27 @@ #define Slab_BLOCKHDRSZ MAXALIGN(sizeof(SlabBlock)) +#ifdef MEMORY_CONTEXT_CHECKING +/* + * Size of the memory required to store the SlabContext. + * MEMORY_CONTEXT_CHECKING builds need some extra memory for the isChunkFree + * array. + */ +#define Slab_CONTEXT_HDRSZ(chunksPerBlock) \ + (sizeof(SlabContext) + ((chunksPerBlock) * sizeof(bool))) +#else +#define Slab_CONTEXT_HDRSZ(chunksPerBlock) sizeof(SlabContext) +#endif + +/* + * The number of partitions to divide the blocklist into based their number of + * free chunks. There must be at least 2. + */ +#define SLAB_BLOCKLIST_COUNT 3 + +/* The maximum number of completely empty blocks to keep around for reuse. */ +#define SLAB_MAXIMUM_EMPTY_BLOCKS 10 + /* * SlabContext is a specialized implementation of MemoryContext. */ @@ -67,64 +104,206 @@ typedef struct SlabContext { MemoryContextData header; /* Standard memory-context fields */ /* Allocation parameters for this context: */ - Size chunkSize; /* chunk size */ - Size fullChunkSize; /* chunk size including header and alignment */ - Size blockSize; /* block size */ - Size headerSize; /* allocated size of context header */ - int chunksPerBlock; /* number of chunks per block */ - int minFreeChunks; /* min number of free chunks in any block */ - int nblocks; /* number of blocks allocated */ + Size chunkSize; /* the requested (non-aligned) chunk size */ + Size fullChunkSize; /* chunk size with chunk header and alignment */ + Size blockSize; /* the size to make each block of chunks */ + int32 chunksPerBlock; /* number of chunks that fit in 1 block */ + int32 curBlocklistIndex; /* index into the blocklist[] element + * containing the fullest, blocks */ #ifdef MEMORY_CONTEXT_CHECKING - bool *freechunks; /* bitmap of free chunks in a block */ + bool *isChunkFree; /* array to mark free chunks in a block during + * SlabCheck */ #endif - /* blocks with free space, grouped by number of free chunks: */ - dlist_head freelist[FLEXIBLE_ARRAY_MEMBER]; + + int32 blocklist_shift; /* number of bits to shift the nfree count + * by to get the index into blocklist[] */ + dclist_head emptyblocks; /* empty blocks to use up first instead of + * mallocing new blocks */ + + /* + * Blocks with free space, grouped by the number of free chunks they + * contain. Completely full blocks are stored in the 0th element. + * Completely empty blocks are stored in emptyblocks or free'd if we have + * enough empty blocks already. + */ + dlist_head blocklist[SLAB_BLOCKLIST_COUNT]; } SlabContext; /* * SlabBlock - * Structure of a single block in SLAB allocator. + * Structure of a single slab block. * - * node: doubly-linked list of blocks in global freelist - * nfree: number of free chunks in this block - * firstFreeChunk: index of the first free chunk + * slab: pointer back to the owning MemoryContext + * nfree: number of chunks on the block which are unallocated + * nunused: number of chunks on the block unallocated and not on the block's + * freelist. + * freehead: linked-list header storing a pointer to the first free chunk on + * the block. Subsequent pointers are stored in the chunk's memory. NULL + * indicates the end of the list. + * unused: pointer to the next chunk which has yet to be used. + * node: doubly-linked list node for the context's blocklist */ typedef struct SlabBlock { - dlist_node node; /* doubly-linked list */ - int nfree; /* number of free chunks */ - int firstFreeChunk; /* index of the first free chunk in the block */ SlabContext *slab; /* owning context */ + int32 nfree; /* number of chunks on free + unused chunks */ + int32 nunused; /* number of unused chunks */ + MemoryChunk *freehead; /* pointer to the first free chunk */ + MemoryChunk *unused; /* pointer to the next unused chunk */ + dlist_node node; /* doubly-linked list for blocklist[] */ } SlabBlock; #define Slab_CHUNKHDRSZ sizeof(MemoryChunk) -#define SlabPointerGetChunk(ptr) \ - ((MemoryChunk *)(((char *)(ptr)) - sizeof(MemoryChunk))) #define SlabChunkGetPointer(chk) \ - ((void *)(((char *)(chk)) + sizeof(MemoryChunk))) -#define SlabBlockGetChunk(slab, block, idx) \ + ((void *) (((char *) (chk)) + sizeof(MemoryChunk))) + +/* + * SlabBlockGetChunk + * Obtain a pointer to the nth (0-based) chunk in the block + */ +#define SlabBlockGetChunk(slab, block, n) \ ((MemoryChunk *) ((char *) (block) + Slab_BLOCKHDRSZ \ - + (idx * slab->fullChunkSize))) -#define SlabBlockStart(block) \ - ((char *) block + Slab_BLOCKHDRSZ) + + ((n) * (slab)->fullChunkSize))) + +#if defined(MEMORY_CONTEXT_CHECKING) || defined(USE_ASSERT_CHECKING) + +/* + * SlabChunkIndex + * Get the 0-based index of how many chunks into the block the given + * chunk is. +*/ #define SlabChunkIndex(slab, block, chunk) \ - (((char *) chunk - SlabBlockStart(block)) / slab->fullChunkSize) + (((char *) (chunk) - (char *) SlabBlockGetChunk(slab, block, 0)) / \ + (slab)->fullChunkSize) + +/* + * SlabChunkMod + * A MemoryChunk should always be at an address which is a multiple of + * fullChunkSize starting from the 0th chunk position. This will return + * non-zero if it's not. + */ +#define SlabChunkMod(slab, block, chunk) \ + (((char *) (chunk) - (char *) SlabBlockGetChunk(slab, block, 0)) % \ + (slab)->fullChunkSize) + +#endif /* * SlabIsValid - * True iff set is valid slab allocation set. + * True iff set is a valid slab allocation set. */ -#define SlabIsValid(set) \ - (PointerIsValid(set) && IsA(set, SlabContext)) +#define SlabIsValid(set) (PointerIsValid(set) && IsA(set, SlabContext)) /* * SlabBlockIsValid - * True iff block is valid block of slab allocation set. + * True iff block is a valid block of slab allocation set. */ #define SlabBlockIsValid(block) \ (PointerIsValid(block) && SlabIsValid((block)->slab)) +/* + * SlabBlocklistIndex + * Determine the blocklist index that a block should be in for the given + * number of free chunks. + */ +static inline int32 +SlabBlocklistIndex(SlabContext *slab, int nfree) +{ + int32 index; + int32 blocklist_shift = slab->blocklist_shift; + + Assert(nfree >= 0 && nfree <= slab->chunksPerBlock); + + /* + * Determine the blocklist index based on the number of free chunks. We + * must ensure that 0 free chunks is dedicated to index 0. Everything + * else must be >= 1 and < SLAB_BLOCKLIST_COUNT. + * + * To make this as efficient as possible, we exploit some two's complement + * arithmetic where we reverse the sign before bit shifting. This results + * in an nfree of 0 using index 0 and anything non-zero staying non-zero. + * This is exploiting 0 and -0 being the same in two's complement. When + * we're done, we just need to flip the sign back over again for a + * positive index. + */ + index = -((-nfree) >> blocklist_shift); + + if (nfree == 0) + Assert(index == 0); + else + Assert(index >= 1 && index < SLAB_BLOCKLIST_COUNT); + + return index; +} + +/* + * SlabFindNextBlockListIndex + * Search blocklist for blocks which have free chunks and return the + * index of the blocklist found containing at least 1 block with free + * chunks. If no block can be found we return 0. + * + * Note: We give priority to fuller blocks so that these are filled before + * emptier blocks. This is done to increase the chances that mostly-empty + * blocks will eventually become completely empty so they can be free'd. + */ +static int32 +SlabFindNextBlockListIndex(SlabContext *slab) +{ + /* start at 1 as blocklist[0] is for full blocks. */ + for (int i = 1; i < SLAB_BLOCKLIST_COUNT; i++) + { + /* return the first found non-empty index */ + if (!dlist_is_empty(&slab->blocklist[i])) + return i; + } + + /* no blocks with free space */ + return 0; +} + +/* + * SlabGetNextFreeChunk + * Return the next free chunk in block and update the block to account + * for the returned chunk now being used. + */ +static inline MemoryChunk * +SlabGetNextFreeChunk(SlabContext *slab, SlabBlock *block) +{ + MemoryChunk *chunk; + + Assert(block->nfree > 0); + + if (block->freehead != NULL) + { + chunk = block->freehead; + + /* + * Pop the chunk from the linked list of free chunks. The pointer to + * the next free chunk is stored in the chunk itself. + */ + VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(chunk), sizeof(MemoryChunk *)); + block->freehead = *(MemoryChunk **) SlabChunkGetPointer(chunk); + + /* check nothing stomped on the free chunk's memory */ + Assert(block->freehead == NULL || + (block->freehead >= SlabBlockGetChunk(slab, block, 0) && + block->freehead <= SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1) && + SlabChunkMod(slab, block, block->freehead) == 0)); + } + else + { + Assert(block->nunused > 0); + + chunk = block->unused; + block->unused = (MemoryChunk *) (((char *) block->unused) + slab->fullChunkSize); + block->nunused--; + } + + block->nfree--; + + return chunk; +} /* * SlabContextCreate @@ -145,8 +324,6 @@ SlabContextCreate(MemoryContext parent, { int chunksPerBlock; Size fullChunkSize; - Size freelistSize; - Size headerSize; SlabContext *slab; int i; @@ -155,11 +332,14 @@ SlabContextCreate(MemoryContext parent, "sizeof(MemoryChunk) is not maxaligned"); Assert(MAXALIGN(chunkSize) <= MEMORYCHUNK_MAX_VALUE); - /* Make sure the linked list node fits inside a freed chunk */ - if (chunkSize < sizeof(int)) - chunkSize = sizeof(int); + /* + * Ensure there's enough space to store the pointer to the next free chunk + * in the memory of the (otherwise) unused allocation. + */ + if (chunkSize < sizeof(MemoryChunk *)) + chunkSize = sizeof(MemoryChunk *); - /* chunk, including SLAB header (both addresses nicely aligned) */ + /* length of the maxaligned chunk including the chunk header */ #ifdef MEMORY_CONTEXT_CHECKING /* ensure there's always space for the sentinel byte */ fullChunkSize = Slab_CHUNKHDRSZ + MAXALIGN(chunkSize + 1); @@ -167,36 +347,17 @@ SlabContextCreate(MemoryContext parent, fullChunkSize = Slab_CHUNKHDRSZ + MAXALIGN(chunkSize); #endif - /* Make sure the block can store at least one chunk. */ - if (blockSize < fullChunkSize + Slab_BLOCKHDRSZ) - elog(ERROR, "block size %zu for slab is too small for %zu chunks", - blockSize, chunkSize); - - /* Compute maximum number of chunks per block */ + /* compute the number of chunks that will fit on each block */ chunksPerBlock = (blockSize - Slab_BLOCKHDRSZ) / fullChunkSize; - /* The freelist starts with 0, ends with chunksPerBlock. */ - freelistSize = sizeof(dlist_head) * (chunksPerBlock + 1); + /* Make sure the block can store at least one chunk. */ + if (chunksPerBlock == 0) + elog(ERROR, "block size %zu for slab is too small for %zu-byte chunks", + blockSize, chunkSize); - /* - * Allocate the context header. Unlike aset.c, we never try to combine - * this with the first regular block; not worth the extra complication. - */ - /* Size of the memory context header */ - headerSize = offsetof(SlabContext, freelist) + freelistSize; -#ifdef MEMORY_CONTEXT_CHECKING - - /* - * With memory checking, we need to allocate extra space for the bitmap of - * free chunks. The bitmap is an array of bools, so we don't need to worry - * about alignment. - */ - headerSize += chunksPerBlock * sizeof(bool); -#endif - - slab = (SlabContext *) malloc(headerSize); + slab = (SlabContext *) malloc(Slab_CONTEXT_HDRSZ(chunksPerBlock)); if (slab == NULL) { MemoryContextStats(TopMemoryContext); @@ -216,19 +377,33 @@ SlabContextCreate(MemoryContext parent, slab->chunkSize = chunkSize; slab->fullChunkSize = fullChunkSize; slab->blockSize = blockSize; - slab->headerSize = headerSize; slab->chunksPerBlock = chunksPerBlock; - slab->minFreeChunks = 0; - slab->nblocks = 0; + slab->curBlocklistIndex = 0; - /* initialize the freelist slots */ - for (i = 0; i < (slab->chunksPerBlock + 1); i++) - dlist_init(&slab->freelist[i]); + /* + * Compute a shift that guarantees that shifting chunksPerBlock with it is + * < SLAB_BLOCKLIST_COUNT - 1. The reason that we subtract 1 from + * SLAB_BLOCKLIST_COUNT in this calculation is that we reserve the 0th + * blocklist element for blocks which have no free chunks. + * + * We calculate the number of bits to shift by rather than a divisor to + * divide by as performing division each time we need to find the + * blocklist index would be much slower. + */ + slab->blocklist_shift = 0; + while ((slab->chunksPerBlock >> slab->blocklist_shift) >= (SLAB_BLOCKLIST_COUNT - 1)) + slab->blocklist_shift++; + + /* initialize the list to store empty blocks to be reused */ + dclist_init(&slab->emptyblocks); + + /* initialize each blocklist slot */ + for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++) + dlist_init(&slab->blocklist[i]); #ifdef MEMORY_CONTEXT_CHECKING - /* set the freechunks pointer right after the freelists array */ - slab->freechunks - = (bool *) slab + offsetof(SlabContext, freelist) + freelistSize; + /* set the isChunkFree pointer right after the end of the context */ + slab->isChunkFree = (bool *) ((char *) slab + sizeof(SlabContext)); #endif /* Finally, do the type-independent part of context creation */ @@ -252,6 +427,7 @@ void SlabReset(MemoryContext context) { SlabContext *slab = (SlabContext *) context; + dlist_mutable_iter miter; int i; Assert(SlabIsValid(slab)); @@ -261,12 +437,24 @@ SlabReset(MemoryContext context) SlabCheck(context); #endif - /* walk over freelists and free the blocks */ - for (i = 0; i <= slab->chunksPerBlock; i++) + /* release any retained empty blocks */ + dclist_foreach_modify(miter, &slab->emptyblocks) { - dlist_mutable_iter miter; + SlabBlock *block = dlist_container(SlabBlock, node, miter.cur); - dlist_foreach_modify(miter, &slab->freelist[i]) + dclist_delete_from(&slab->emptyblocks, miter.cur); + +#ifdef CLOBBER_FREED_MEMORY + wipe_mem(block, slab->blockSize); +#endif + free(block); + context->mem_allocated -= slab->blockSize; + } + + /* walk over blocklist and free the blocks */ + for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++) + { + dlist_foreach_modify(miter, &slab->blocklist[i]) { SlabBlock *block = dlist_container(SlabBlock, node, miter.cur); @@ -276,14 +464,12 @@ SlabReset(MemoryContext context) wipe_mem(block, slab->blockSize); #endif free(block); - slab->nblocks--; context->mem_allocated -= slab->blockSize; } } - slab->minFreeChunks = 0; + slab->curBlocklistIndex = 0; - Assert(slab->nblocks == 0); Assert(context->mem_allocated == 0); } @@ -302,7 +488,7 @@ SlabDelete(MemoryContext context) /* * SlabAlloc - * Returns pointer to allocated memory of given size or NULL if + * Returns a pointer to allocated memory of given size or NULL if * request could not be completed; memory is added to the slab. */ void * @@ -311,127 +497,118 @@ SlabAlloc(MemoryContext context, Size size) SlabContext *slab = (SlabContext *) context; SlabBlock *block; MemoryChunk *chunk; - int idx; Assert(SlabIsValid(slab)); - Assert((slab->minFreeChunks >= 0) && - (slab->minFreeChunks < slab->chunksPerBlock)); + /* sanity check that this is pointing to a valid blocklist */ + Assert(slab->curBlocklistIndex >= 0); + Assert(slab->curBlocklistIndex <= SlabBlocklistIndex(slab, slab->chunksPerBlock)); /* make sure we only allow correct request size */ - if (size != slab->chunkSize) + if (unlikely(size != slab->chunkSize)) elog(ERROR, "unexpected alloc chunk size %zu (expected %zu)", size, slab->chunkSize); /* - * If there are no free chunks in any existing block, create a new block - * and put it to the last freelist bucket. - * - * slab->minFreeChunks == 0 means there are no blocks with free chunks, - * thanks to how minFreeChunks is updated at the end of SlabAlloc(). + * Handle the case when there are no partially filled blocks available. + * SlabFree() will have updated the curBlocklistIndex setting it to zero + * to indicate that it has freed the final block. Also later in + * SlabAlloc() we will set the curBlocklistIndex to zero if we end up + * filling the final block. */ - if (slab->minFreeChunks == 0) + if (unlikely(slab->curBlocklistIndex == 0)) { - block = (SlabBlock *) malloc(slab->blockSize); + dlist_head *blocklist; + int blocklist_idx; - if (block == NULL) - return NULL; - - block->nfree = slab->chunksPerBlock; - block->firstFreeChunk = 0; - block->slab = slab; - - /* - * Put all the chunks on a freelist. Walk the chunks and point each - * one to the next one. - */ - for (idx = 0; idx < slab->chunksPerBlock; idx++) + /* to save allocating a new one, first check the empty blocks list */ + if (dclist_count(&slab->emptyblocks) > 0) { - chunk = SlabBlockGetChunk(slab, block, idx); - *(int32 *) MemoryChunkGetPointer(chunk) = (idx + 1); + dlist_node *node = dclist_pop_head_node(&slab->emptyblocks); + + block = dlist_container(SlabBlock, node, node); + + /* + * SlabFree() should have left this block in a valid state with + * all chunks free. Ensure that's the case. + */ + Assert(block->nfree == slab->chunksPerBlock); + + /* fetch the next chunk from this block */ + chunk = SlabGetNextFreeChunk(slab, block); + } + else + { + block = (SlabBlock *) malloc(slab->blockSize); + + if (unlikely(block == NULL)) + return NULL; + + block->slab = slab; + context->mem_allocated += slab->blockSize; + + /* use the first chunk in the new block */ + chunk = SlabBlockGetChunk(slab, block, 0); + + block->nfree = slab->chunksPerBlock - 1; + block->unused = SlabBlockGetChunk(slab, block, 1); + block->freehead = NULL; + block->nunused = slab->chunksPerBlock - 1; } - /* - * And add it to the last freelist with all chunks empty. - * - * We know there are no blocks in the freelist, otherwise we wouldn't - * need a new block. - */ - Assert(dlist_is_empty(&slab->freelist[slab->chunksPerBlock])); + /* find the blocklist element for storing blocks with 1 used chunk */ + blocklist_idx = SlabBlocklistIndex(slab, block->nfree); + blocklist = &slab->blocklist[blocklist_idx]; - dlist_push_head(&slab->freelist[slab->chunksPerBlock], &block->node); + /* this better be empty. We just added a block thinking it was */ + Assert(dlist_is_empty(blocklist)); - slab->minFreeChunks = slab->chunksPerBlock; - slab->nblocks += 1; - context->mem_allocated += slab->blockSize; + dlist_push_head(blocklist, &block->node); + + slab->curBlocklistIndex = blocklist_idx; } - - /* grab the block from the freelist (even the new block is there) */ - block = dlist_head_element(SlabBlock, node, - &slab->freelist[slab->minFreeChunks]); - - /* make sure we actually got a valid block, with matching nfree */ - Assert(block != NULL); - Assert(slab->minFreeChunks == block->nfree); - Assert(block->nfree > 0); - - /* we know index of the first free chunk in the block */ - idx = block->firstFreeChunk; - - /* make sure the chunk index is valid, and that it's marked as empty */ - Assert((idx >= 0) && (idx < slab->chunksPerBlock)); - - /* compute the chunk location block start (after the block header) */ - chunk = SlabBlockGetChunk(slab, block, idx); - - /* - * Update the block nfree count, and also the minFreeChunks as we've - * decreased nfree for a block with the minimum number of free chunks - * (because that's how we chose the block). - */ - block->nfree--; - slab->minFreeChunks = block->nfree; - - /* - * Remove the chunk from the freelist head. The index of the next free - * chunk is stored in the chunk itself. - */ - VALGRIND_MAKE_MEM_DEFINED(MemoryChunkGetPointer(chunk), sizeof(int32)); - block->firstFreeChunk = *(int32 *) MemoryChunkGetPointer(chunk); - - Assert(block->firstFreeChunk >= 0); - Assert(block->firstFreeChunk <= slab->chunksPerBlock); - - Assert((block->nfree != 0 && - block->firstFreeChunk < slab->chunksPerBlock) || - (block->nfree == 0 && - block->firstFreeChunk == slab->chunksPerBlock)); - - /* move the whole block to the right place in the freelist */ - dlist_delete(&block->node); - dlist_push_head(&slab->freelist[block->nfree], &block->node); - - /* - * And finally update minFreeChunks, i.e. the index to the block with the - * lowest number of free chunks. We only need to do that when the block - * got full (otherwise we know the current block is the right one). We'll - * simply walk the freelist until we find a non-empty entry. - */ - if (slab->minFreeChunks == 0) + else { - for (idx = 1; idx <= slab->chunksPerBlock; idx++) - { - if (dlist_is_empty(&slab->freelist[idx])) - continue; + dlist_head *blocklist = &slab->blocklist[slab->curBlocklistIndex]; + int new_blocklist_idx; - /* found a non-empty freelist */ - slab->minFreeChunks = idx; - break; + Assert(!dlist_is_empty(blocklist)); + + /* grab the block from the blocklist */ + block = dlist_head_element(SlabBlock, node, blocklist); + + /* make sure we actually got a valid block, with matching nfree */ + Assert(block != NULL); + Assert(slab->curBlocklistIndex == SlabBlocklistIndex(slab, block->nfree)); + Assert(block->nfree > 0); + + /* fetch the next chunk from this block */ + chunk = SlabGetNextFreeChunk(slab, block); + + /* get the new blocklist index based on the new free chunk count */ + new_blocklist_idx = SlabBlocklistIndex(slab, block->nfree); + + /* + * Handle the case where the blocklist index changes. This also deals + * with blocks becoming full as only full blocks go at index 0. + */ + if (unlikely(slab->curBlocklistIndex != new_blocklist_idx)) + { + dlist_delete_from(blocklist, &block->node); + dlist_push_head(&slab->blocklist[new_blocklist_idx], &block->node); + + if (dlist_is_empty(blocklist)) + slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab); } } - if (slab->minFreeChunks == slab->chunksPerBlock) - slab->minFreeChunks = 0; + /* + * Check that the chunk pointer is actually somewhere on the block and is + * aligned as expected. + */ + Assert(chunk >= SlabBlockGetChunk(slab, block, 0)); + Assert(chunk <= SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1)); + Assert(SlabChunkMod(slab, block, chunk) == 0); /* Prepare to initialize the chunk header. */ VALGRIND_MAKE_MEM_UNDEFINED(chunk, Slab_CHUNKHDRSZ); @@ -453,8 +630,6 @@ SlabAlloc(MemoryContext context, Size size) randomize_mem((char *) MemoryChunkGetPointer(chunk), size); #endif - Assert(slab->nblocks * slab->blockSize == context->mem_allocated); - return MemoryChunkGetPointer(chunk); } @@ -468,7 +643,8 @@ SlabFree(void *pointer) MemoryChunk *chunk = PointerGetMemoryChunk(pointer); SlabBlock *block = MemoryChunkGetBlock(chunk); SlabContext *slab; - int idx; + int curBlocklistIdx; + int newBlocklistIdx; /* * For speed reasons we just Assert that the referenced block is good. @@ -486,63 +662,82 @@ SlabFree(void *pointer) slab->header.name, chunk); #endif - /* compute index of the chunk with respect to block start */ - idx = SlabChunkIndex(slab, block, chunk); + /* push this chunk onto the head of the block's free list */ + *(MemoryChunk **) pointer = block->freehead; + block->freehead = chunk; - /* add chunk to freelist, and update block nfree count */ - *(int32 *) pointer = block->firstFreeChunk; - block->firstFreeChunk = idx; block->nfree++; Assert(block->nfree > 0); Assert(block->nfree <= slab->chunksPerBlock); #ifdef CLOBBER_FREED_MEMORY - /* XXX don't wipe the int32 index, used for block-level freelist */ - wipe_mem((char *) pointer + sizeof(int32), - slab->chunkSize - sizeof(int32)); + /* don't wipe the free list MemoryChunk pointer stored in the chunk */ + wipe_mem((char *) pointer + sizeof(MemoryChunk *), + slab->chunkSize - sizeof(MemoryChunk *)); #endif - /* remove the block from a freelist */ - dlist_delete(&block->node); + curBlocklistIdx = SlabBlocklistIndex(slab, block->nfree - 1); + newBlocklistIdx = SlabBlocklistIndex(slab, block->nfree); /* - * See if we need to update the minFreeChunks field for the slab - we only - * need to do that if there the block had that number of free chunks - * before we freed one. In that case, we check if there still are blocks - * in the original freelist and we either keep the current value (if there - * still are blocks) or increment it by one (the new block is still the - * one with minimum free chunks). - * - * The one exception is when the block will get completely free - in that - * case we will free it, se we can't use it for minFreeChunks. It however - * means there are no more blocks with free chunks. + * Check if the block needs to be moved to another element on the + * blocklist based on it now having 1 more free chunk. */ - if (slab->minFreeChunks == (block->nfree - 1)) + if (unlikely(curBlocklistIdx != newBlocklistIdx)) { - /* Have we removed the last chunk from the freelist? */ - if (dlist_is_empty(&slab->freelist[slab->minFreeChunks])) + /* do the move */ + dlist_delete_from(&slab->blocklist[curBlocklistIdx], &block->node); + dlist_push_head(&slab->blocklist[newBlocklistIdx], &block->node); + + /* + * It's possible that we've no blocks in the blocklist at the + * curBlocklistIndex position. When this happens we must find the + * next blocklist index which contains blocks. We can be certain + * we'll find a block as at least one must exist for the chunk we're + * currently freeing. + */ + if (slab->curBlocklistIndex == curBlocklistIdx && + dlist_is_empty(&slab->blocklist[curBlocklistIdx])) { - /* but if we made the block entirely free, we'll free it */ - if (block->nfree == slab->chunksPerBlock) - slab->minFreeChunks = 0; - else - slab->minFreeChunks++; + slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab); + Assert(slab->curBlocklistIndex > 0); } } - /* If the block is now completely empty, free it. */ - if (block->nfree == slab->chunksPerBlock) + /* Handle when a block becomes completely empty */ + if (unlikely(block->nfree == slab->chunksPerBlock)) { - free(block); - slab->nblocks--; - slab->header.mem_allocated -= slab->blockSize; - } - else - dlist_push_head(&slab->freelist[block->nfree], &block->node); + /* remove the block */ + dlist_delete_from(&slab->blocklist[newBlocklistIdx], &block->node); - Assert(slab->nblocks >= 0); - Assert(slab->nblocks * slab->blockSize == slab->header.mem_allocated); + /* + * To avoid thrashing malloc/free, we keep a list of empty blocks that + * we can reuse again instead of having to malloc a new one. + */ + if (dclist_count(&slab->emptyblocks) < SLAB_MAXIMUM_EMPTY_BLOCKS) + dclist_push_head(&slab->emptyblocks, &block->node); + else + { + /* + * When we have enough empty blocks stored already, we actually + * free the block. + */ +#ifdef CLOBBER_FREED_MEMORY + wipe_mem(block, slab->blockSize); +#endif + free(block); + slab->header.mem_allocated -= slab->blockSize; + } + + /* + * Check if we need to reset the blocklist index. This is required + * when the blocklist this block is on has become completely empty. + */ + if (slab->curBlocklistIndex == newBlocklistIdx && + dlist_is_empty(&slab->blocklist[newBlocklistIdx])) + slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab); + } } /* @@ -617,16 +812,14 @@ SlabGetChunkSpace(void *pointer) /* * SlabIsEmpty - * Is an Slab empty of any allocated space? + * Is the slab empty of any allocated space? */ bool SlabIsEmpty(MemoryContext context) { - SlabContext *slab = (SlabContext *) context; + Assert(SlabIsValid((SlabContext *) context)); - Assert(SlabIsValid(slab)); - - return (slab->nblocks == 0); + return (context->mem_allocated == 0); } /* @@ -654,13 +847,16 @@ SlabStats(MemoryContext context, Assert(SlabIsValid(slab)); /* Include context header in totalspace */ - totalspace = slab->headerSize; + totalspace = Slab_CONTEXT_HDRSZ(slab->chunksPerBlock); - for (i = 0; i <= slab->chunksPerBlock; i++) + /* Add the space consumed by blocks in the emptyblocks list */ + totalspace += dclist_count(&slab->emptyblocks) * slab->blockSize; + + for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++) { dlist_iter iter; - dlist_foreach(iter, &slab->freelist[i]) + dlist_foreach(iter, &slab->blocklist[i]) { SlabBlock *block = dlist_container(SlabBlock, node, iter.cur); @@ -675,10 +871,11 @@ SlabStats(MemoryContext context, { char stats_string[200]; + /* XXX should we include free chunks on empty blocks? */ snprintf(stats_string, sizeof(stats_string), - "%zu total in %zu blocks; %zu free (%zu chunks); %zu used", - totalspace, nblocks, freespace, freechunks, - totalspace - freespace); + "%zu total in %zu blocks; %u empty blocks; %zu free (%zu chunks); %zu used", + totalspace, nblocks, dclist_count(&slab->emptyblocks), + freespace, freechunks, totalspace - freespace); printfunc(context, passthru, stats_string, print_to_stderr); } @@ -696,7 +893,7 @@ SlabStats(MemoryContext context, /* * SlabCheck - * Walk through chunks and check consistency of memory. + * Walk through all blocks looking for inconsistencies. * * NOTE: report errors as WARNING, *not* ERROR or FATAL. Otherwise you'll * find yourself in an infinite loop when trouble occurs, because this @@ -707,67 +904,113 @@ SlabCheck(MemoryContext context) { SlabContext *slab = (SlabContext *) context; int i; + int nblocks = 0; const char *name = slab->header.name; + dlist_iter iter; Assert(SlabIsValid(slab)); Assert(slab->chunksPerBlock > 0); - /* walk all the freelists */ - for (i = 0; i <= slab->chunksPerBlock; i++) + /* + * Have a look at the empty blocks. These should have all their chunks + * marked as free. Ensure that's the case. + */ + dclist_foreach(iter, &slab->emptyblocks) + { + SlabBlock *block = dlist_container(SlabBlock, node, iter.cur); + + if (block->nfree != slab->chunksPerBlock) + elog(WARNING, "problem in slab %s: empty block %p should have %d free chunks but has %d chunks free", + name, block, slab->chunksPerBlock, block->nfree); + } + + /* walk the non-empty block lists */ + for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++) { int j, nfree; - dlist_iter iter; - /* walk all blocks on this freelist */ - dlist_foreach(iter, &slab->freelist[i]) + /* walk all blocks on this blocklist */ + dlist_foreach(iter, &slab->blocklist[i]) { - int idx; SlabBlock *block = dlist_container(SlabBlock, node, iter.cur); + MemoryChunk *cur_chunk; /* * Make sure the number of free chunks (in the block header) - * matches position in the freelist. + * matches the position in the blocklist. */ - if (block->nfree != i) - elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match freelist %d", - name, block->nfree, block, i); + if (SlabBlocklistIndex(slab, block->nfree) != i) + elog(WARNING, "problem in slab %s: block %p is on blocklist %d but should be on blocklist %d", + name, block, i, SlabBlocklistIndex(slab, block->nfree)); + + /* make sure the block is not empty */ + if (block->nfree >= slab->chunksPerBlock) + elog(WARNING, "problem in slab %s: empty block %p incorrectly stored on blocklist element %d", + name, block, i); /* make sure the slab pointer correctly points to this context */ if (block->slab != slab) elog(WARNING, "problem in slab %s: bogus slab link in block %p", name, block); - /* reset the bitmap of free chunks for this block */ - memset(slab->freechunks, 0, (slab->chunksPerBlock * sizeof(bool))); - idx = block->firstFreeChunk; + /* reset the array of free chunks for this block */ + memset(slab->isChunkFree, 0, (slab->chunksPerBlock * sizeof(bool))); + nfree = 0; + + /* walk through the block's free list chunks */ + cur_chunk = block->freehead; + while (cur_chunk != NULL) + { + int chunkidx = SlabChunkIndex(slab, block, cur_chunk); + + /* + * Ensure the free list link points to something on the block + * at an address aligned according to the full chunk size. + */ + if (cur_chunk < SlabBlockGetChunk(slab, block, 0) || + cur_chunk > SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1) || + SlabChunkMod(slab, block, cur_chunk) != 0) + elog(WARNING, "problem in slab %s: bogus free list link %p in block %p", + name, cur_chunk, block); + + /* count the chunk and mark it free on the free chunk array */ + nfree++; + slab->isChunkFree[chunkidx] = true; + + /* read pointer of the next free chunk */ + VALGRIND_MAKE_MEM_DEFINED(MemoryChunkGetPointer(cur_chunk), sizeof(MemoryChunk *)); + cur_chunk = *(MemoryChunk **) SlabChunkGetPointer(cur_chunk); + } + + /* check that the unused pointer matches what nunused claims */ + if (SlabBlockGetChunk(slab, block, slab->chunksPerBlock - block->nunused) != + block->unused) + elog(WARNING, "problem in slab %s: mismatch detected between nunused chunks and unused pointer in block %p", + name, block); /* - * Now walk through the chunks, count the free ones and also - * perform some additional checks for the used ones. As the chunk - * freelist is stored within the chunks themselves, we have to - * walk through the chunks and construct our own bitmap. + * count the remaining free chunks that have yet to make it onto + * the block's free list. */ - - nfree = 0; - while (idx < slab->chunksPerBlock) + cur_chunk = block->unused; + for (j = 0; j < block->nunused; j++) { - MemoryChunk *chunk; + int chunkidx = SlabChunkIndex(slab, block, cur_chunk); - /* count the chunk as free, add it to the bitmap */ + + /* count the chunk as free and mark it as so in the array */ nfree++; - slab->freechunks[idx] = true; + if (chunkidx < slab->chunksPerBlock) + slab->isChunkFree[chunkidx] = true; - /* read index of the next free chunk */ - chunk = SlabBlockGetChunk(slab, block, idx); - VALGRIND_MAKE_MEM_DEFINED(MemoryChunkGetPointer(chunk), sizeof(int32)); - idx = *(int32 *) MemoryChunkGetPointer(chunk); + /* move forward 1 chunk */ + cur_chunk = (MemoryChunk *) (((char *) cur_chunk) + slab->fullChunkSize); } for (j = 0; j < slab->chunksPerBlock; j++) { - /* non-zero bit in the bitmap means chunk the chunk is used */ - if (!slab->freechunks[j]) + if (!slab->isChunkFree[j]) { MemoryChunk *chunk = SlabBlockGetChunk(slab, block, j); SlabBlock *chunkblock = (SlabBlock *) MemoryChunkGetBlock(chunk); @@ -793,12 +1036,17 @@ SlabCheck(MemoryContext context) * in the block header). */ if (nfree != block->nfree) - elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match bitmap %d", - name, block->nfree, block, nfree); + elog(WARNING, "problem in slab %s: nfree in block %p is %d but %d chunk were found as free", + name, block, block->nfree, nfree); + + nblocks++; } } - Assert(slab->nblocks * slab->blockSize == context->mem_allocated); + /* the stored empty blocks are tracked in mem_allocated too */ + nblocks += dclist_count(&slab->emptyblocks); + + Assert(nblocks * slab->blockSize == context->mem_allocated); } #endif /* MEMORY_CONTEXT_CHECKING */