diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 6b6a8289ad..f6500fec89 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/buf_init.c,v 1.70 2004/12/31 22:00:49 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/buf_init.c,v 1.71 2005/02/03 23:29:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -73,7 +73,6 @@ long int LocalBufferFlushCount; * aborts, it should only unpin the buffers exactly the number of times it * has pinned them, so that it will not blow away buffers of another * backend. - * */ @@ -120,14 +119,17 @@ InitBufferPool(void) block = BufferBlocks; /* - * link the buffers into a single linked list. This will become - * the LIFO list of unused buffers returned by - * StrategyGetBuffer(). + * Initialize all the buffer headers. */ for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++) { Assert(ShmemIsValid((unsigned long) block)); + /* + * The bufNext fields link together all totally-unused buffers. + * Subsequent management of this list is done by + * StrategyGetBuffer(). + */ buf->bufNext = i + 1; CLEAR_BUFFERTAG(buf->tag); @@ -142,7 +144,7 @@ InitBufferPool(void) buf->wait_backend_id = 0; } - /* Correct last entry */ + /* Correct last entry of linked list */ BufferDescriptors[NBuffers - 1].bufNext = -1; LWLockRelease(BufMgrLock); @@ -178,7 +180,8 @@ InitBufferPoolAccess(void) /* * Convert shmem offsets into addresses as seen by this process. This - * is just to speed up the BufferGetBlock() macro. + * is just to speed up the BufferGetBlock() macro. It is OK to do this + * without any lock since the data pointers never change. */ for (i = 0; i < NBuffers; i++) BufferBlockPointers[i] = (Block) MAKE_PTR(BufferDescriptors[i].data); @@ -201,14 +204,8 @@ BufferShmemSize(void) /* size of data pages */ size += NBuffers * MAXALIGN(BLCKSZ); - /* size of buffer hash table */ - size += hash_estimate_size(NBuffers * 2, sizeof(BufferLookupEnt)); - - /* size of the shared replacement strategy control block */ - size += MAXALIGN(sizeof(BufferStrategyControl)); - - /* size of the ARC directory blocks */ - size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB)); + /* size of stuff controlled by freelist.c */ + size += StrategyShmemSize(); return size; } diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c index 12ac6aba88..ef79ae9c39 100644 --- a/src/backend/storage/buffer/buf_table.c +++ b/src/backend/storage/buffer/buf_table.c @@ -1,11 +1,11 @@ /*------------------------------------------------------------------------- * * buf_table.c - * routines for finding buffers in the buffer pool. + * routines for mapping BufferTags to buffer indexes. * - * NOTE: these days, what this table actually provides is a mapping from - * BufferTags to CDB indexes, not directly to buffers. The function names - * are thus slight misnomers. + * NOTE: this module is called only by freelist.c, and the "buffer IDs" + * it deals with are whatever freelist.c needs them to be; they may not be + * directly equivalent to Buffer numbers. * * Note: all routines in this file assume that the BufMgrLock is held * by the caller, so no synchronization is needed. @@ -16,7 +16,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/buf_table.c,v 1.38 2004/12/31 22:00:49 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/buf_table.c,v 1.39 2005/02/03 23:29:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,12 +26,29 @@ #include "storage/bufmgr.h" +/* entry for buffer lookup hashtable */ +typedef struct +{ + BufferTag key; /* Tag of a disk page */ + int id; /* Associated buffer ID */ +} BufferLookupEnt; + static HTAB *SharedBufHash; +/* + * Estimate space needed for mapping hashtable + * size is the desired hash table size (possibly more than NBuffers) + */ +int +BufTableShmemSize(int size) +{ + return hash_estimate_size(size, sizeof(BufferLookupEnt)); +} + /* * Initialize shmem hash table for mapping buffers - * size is the desired hash table size (2*NBuffers for ARC algorithm) + * size is the desired hash table size (possibly more than NBuffers) */ void InitBufTable(int size) @@ -56,7 +73,7 @@ InitBufTable(int size) /* * BufTableLookup - * Lookup the given BufferTag; return CDB index, or -1 if not found + * Lookup the given BufferTag; return buffer ID, or -1 if not found */ int BufTableLookup(BufferTag *tagPtr) @@ -76,10 +93,10 @@ BufTableLookup(BufferTag *tagPtr) /* * BufTableInsert - * Insert a hashtable entry for given tag and CDB index + * Insert a hashtable entry for given tag and buffer ID */ void -BufTableInsert(BufferTag *tagPtr, int cdb_id) +BufTableInsert(BufferTag *tagPtr, int buf_id) { BufferLookupEnt *result; bool found; @@ -92,15 +109,15 @@ BufTableInsert(BufferTag *tagPtr, int cdb_id) (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); - if (found) /* found something else in the table? */ + if (found) /* found something already in the table? */ elog(ERROR, "shared buffer hash table corrupted"); - result->id = cdb_id; + result->id = buf_id; } /* * BufTableDelete - * Delete the hashtable entry for given tag + * Delete the hashtable entry for given tag (which must exist) */ void BufTableDelete(BufferTag *tagPtr) diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index eb38fcf21a..b960208f74 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -3,6 +3,11 @@ * freelist.c * routines for manipulating the buffer pool's replacement strategy. * + * The name "freelist.c" is now a bit of a misnomer, since this module + * controls not only the list of free buffers per se, but the entire + * mechanism for looking up existing shared buffers and the strategy + * for choosing replacement victims when needed. + * * Note: all routines in this file assume that the BufMgrLock is held * by the caller, so no synchronization is needed. * @@ -12,7 +17,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.49 2004/12/31 22:00:49 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.50 2005/02/03 23:29:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,6 +30,51 @@ #include "storage/bufmgr.h" +/* + * Definitions for the buffer replacement strategy + */ +#define STRAT_LIST_UNUSED (-1) +#define STRAT_LIST_B1 0 +#define STRAT_LIST_T1 1 +#define STRAT_LIST_T2 2 +#define STRAT_LIST_B2 3 +#define STRAT_NUM_LISTS 4 + +/* + * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC) + */ +typedef struct +{ + int prev; /* list links */ + int next; + short list; /* ID of list it is currently in */ + bool t1_vacuum; /* t => present only because of VACUUM */ + TransactionId t1_xid; /* the xid this entry went onto T1 */ + BufferTag buf_tag; /* page identifier */ + int buf_id; /* currently assigned data buffer, or -1 */ +} BufferStrategyCDB; + +/* + * The shared ARC control information. + */ +typedef struct +{ + int target_T1_size; /* What T1 size are we aiming for */ + int listUnusedCDB; /* All unused StrategyCDB */ + int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2 + * and B2 */ + int listTail[STRAT_NUM_LISTS]; + int listSize[STRAT_NUM_LISTS]; + Buffer listFreeBuffers; /* List of unused buffers */ + + long num_lookup; /* Some hit statistics */ + long num_hit[STRAT_NUM_LISTS]; + time_t stat_report; + + /* Array of CDB's starts here */ + BufferStrategyCDB cdb[1]; /* VARIABLE SIZE ARRAY */ +} BufferStrategyControl; + /* GUC variable: time in seconds between statistics reports */ int DebugSharedBuffers = 0; @@ -812,6 +862,28 @@ StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags, } +/* + * StrategyShmemSize + * + * estimate the size of shared memory used by the freelist-related structures. + */ +int +StrategyShmemSize(void) +{ + int size = 0; + + /* size of CDB lookup hash table */ + size += BufTableShmemSize(NBuffers * 2); + + /* size of the shared replacement strategy control block */ + size += MAXALIGN(sizeof(BufferStrategyControl)); + + /* size of the ARC directory blocks */ + size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB)); + + return size; +} + /* * StrategyInitialize -- initialize the buffer cache replacement * strategy. diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index a89999f38f..2eee1415da 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.75 2004/12/31 22:03:42 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.76 2005/02/03 23:29:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,8 +17,9 @@ #include "storage/backendid.h" #include "storage/buf.h" -#include "storage/lmgr.h" #include "storage/lwlock.h" +#include "storage/shmem.h" +#include "utils/rel.h" /* @@ -40,10 +41,10 @@ typedef bits16 BufFlags; * Buffer tag identifies which disk block the buffer contains. * * Note: the BufferTag data must be sufficient to determine where to write the - * block, even during a "blind write" with no relcache entry. It's possible - * that the backend flushing the buffer doesn't even believe the relation is - * visible yet (its xact may have started before the xact that created the - * rel). The storage manager must be able to cope anyway. + * block, without reference to pg_class or pg_tablespace entries. It's + * possible that the backend flushing the buffer doesn't even believe the + * relation is visible yet (its xact may have started before the xact that + * created the rel). The storage manager must be able to cope anyway. * * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have * to be fixed to zero them, since this struct is used as a hash key. @@ -107,58 +108,12 @@ typedef struct sbufdesc #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) -/* entry for buffer lookup hashtable */ -typedef struct -{ - BufferTag key; /* Tag of a disk page */ - int id; /* CDB id of associated CDB */ -} BufferLookupEnt; -/* - * Definitions for the buffer replacement strategy - */ -#define STRAT_LIST_UNUSED (-1) -#define STRAT_LIST_B1 0 -#define STRAT_LIST_T1 1 -#define STRAT_LIST_T2 2 -#define STRAT_LIST_B2 3 -#define STRAT_NUM_LISTS 4 - -/* - * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC) - */ -typedef struct -{ - int prev; /* list links */ - int next; - short list; /* ID of list it is currently in */ - bool t1_vacuum; /* t => present only because of VACUUM */ - TransactionId t1_xid; /* the xid this entry went onto T1 */ - BufferTag buf_tag; /* page identifier */ - int buf_id; /* currently assigned data buffer, or -1 */ -} BufferStrategyCDB; - -/* - * The shared ARC control information. - */ -typedef struct -{ - int target_T1_size; /* What T1 size are we aiming for */ - int listUnusedCDB; /* All unused StrategyCDB */ - int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2 - * and B2 */ - int listTail[STRAT_NUM_LISTS]; - int listSize[STRAT_NUM_LISTS]; - Buffer listFreeBuffers; /* List of unused buffers */ - - long num_lookup; /* Some hit statistics */ - long num_hit[STRAT_NUM_LISTS]; - time_t stat_report; - - /* Array of CDB's starts here */ - BufferStrategyCDB cdb[1]; /* VARIABLE SIZE ARRAY */ -} BufferStrategyControl; +/* in bufmgr.c */ +extern BufferDesc *BufferDescriptors; +/* in localbuf.c */ +extern BufferDesc *LocalBufferDescriptors; /* counters in buf_init.c */ extern long int ReadBufferCount; @@ -170,11 +125,9 @@ extern long int LocalBufferFlushCount; /* - * Bufmgr Interface: + * Internal routines: only called by bufmgr */ -/* Internal routines: only called by bufmgr */ - /* freelist.c */ extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck, int *cdb_found_index); @@ -185,20 +138,17 @@ extern void StrategyInvalidateBuffer(BufferDesc *buf); extern void StrategyHintVacuum(bool vacuum_active); extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags, int max_buffers); +extern int StrategyShmemSize(void); extern void StrategyInitialize(bool init); /* buf_table.c */ +extern int BufTableShmemSize(int size); extern void InitBufTable(int size); extern int BufTableLookup(BufferTag *tagPtr); -extern void BufTableInsert(BufferTag *tagPtr, int cdb_id); +extern void BufTableInsert(BufferTag *tagPtr, int buf_id); extern void BufTableDelete(BufferTag *tagPtr); -/* bufmgr.c */ -extern BufferDesc *BufferDescriptors; - /* localbuf.c */ -extern BufferDesc *LocalBufferDescriptors; - extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr); extern void WriteLocalBuffer(Buffer buffer, bool release);