Split the buffer mapping table into multiple separately lockable
partitions, as per discussion. Passes functionality checks, but I don't have any performance data yet.
This commit is contained in:
parent
51ee9fa157
commit
10b9ca3d05
|
@ -3,7 +3,7 @@
|
||||||
* pg_buffercache_pages.c
|
* pg_buffercache_pages.c
|
||||||
* display some contents of the buffer cache
|
* display some contents of the buffer cache
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/contrib/pg_buffercache/pg_buffercache_pages.c,v 1.7 2006/05/30 22:12:13 tgl Exp $
|
* $PostgreSQL: pgsql/contrib/pg_buffercache/pg_buffercache_pages.c,v 1.8 2006/07/23 03:07:57 tgl Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
@ -74,7 +74,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
if (SRF_IS_FIRSTCALL())
|
if (SRF_IS_FIRSTCALL())
|
||||||
{
|
{
|
||||||
uint32 i;
|
int i;
|
||||||
volatile BufferDesc *bufHdr;
|
volatile BufferDesc *bufHdr;
|
||||||
|
|
||||||
funcctx = SRF_FIRSTCALL_INIT();
|
funcctx = SRF_FIRSTCALL_INIT();
|
||||||
|
@ -108,7 +108,6 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
|
||||||
funcctx->max_calls = NBuffers;
|
funcctx->max_calls = NBuffers;
|
||||||
funcctx->user_fctx = fctx;
|
funcctx->user_fctx = fctx;
|
||||||
|
|
||||||
|
|
||||||
/* Allocate NBuffers worth of BufferCachePagesRec records. */
|
/* Allocate NBuffers worth of BufferCachePagesRec records. */
|
||||||
fctx->record = (BufferCachePagesRec *) palloc(sizeof(BufferCachePagesRec) * NBuffers);
|
fctx->record = (BufferCachePagesRec *) palloc(sizeof(BufferCachePagesRec) * NBuffers);
|
||||||
|
|
||||||
|
@ -120,17 +119,21 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
|
||||||
fctx->values[4] = (char *) palloc(3 * sizeof(uint32) + 1);
|
fctx->values[4] = (char *) palloc(3 * sizeof(uint32) + 1);
|
||||||
fctx->values[5] = (char *) palloc(2);
|
fctx->values[5] = (char *) palloc(2);
|
||||||
|
|
||||||
|
|
||||||
/* Return to original context when allocating transient memory */
|
/* Return to original context when allocating transient memory */
|
||||||
MemoryContextSwitchTo(oldcontext);
|
MemoryContextSwitchTo(oldcontext);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To get a consistent picture of the buffer state, we must lock
|
||||||
|
* all partitions of the buffer map. Needless to say, this is
|
||||||
|
* horrible for concurrency...
|
||||||
|
*/
|
||||||
|
for (i = 0; i < NUM_BUFFER_PARTITIONS; i++)
|
||||||
|
LWLockAcquire(FirstBufMappingLock + i, LW_SHARED);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock Buffer map and scan though all the buffers, saving the
|
* Scan though all the buffers, saving the relevant fields in the
|
||||||
* relevant fields in the fctx->record structure.
|
* fctx->record structure.
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(BufMappingLock, LW_SHARED);
|
|
||||||
|
|
||||||
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
|
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
|
||||||
{
|
{
|
||||||
/* Lock each buffer header before inspecting. */
|
/* Lock each buffer header before inspecting. */
|
||||||
|
@ -157,7 +160,8 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Release Buffer map. */
|
/* Release Buffer map. */
|
||||||
LWLockRelease(BufMappingLock);
|
for (i = 0; i < NUM_BUFFER_PARTITIONS; i++)
|
||||||
|
LWLockRelease(FirstBufMappingLock + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
funcctx = SRF_PERCALL_SETUP();
|
funcctx = SRF_PERCALL_SETUP();
|
||||||
|
@ -165,7 +169,6 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
|
||||||
/* Get the saved state */
|
/* Get the saved state */
|
||||||
fctx = funcctx->user_fctx;
|
fctx = funcctx->user_fctx;
|
||||||
|
|
||||||
|
|
||||||
if (funcctx->call_cntr < funcctx->max_calls)
|
if (funcctx->call_cntr < funcctx->max_calls)
|
||||||
{
|
{
|
||||||
uint32 i = funcctx->call_cntr;
|
uint32 i = funcctx->call_cntr;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.10 2006/06/08 14:58:33 tgl Exp $
|
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.11 2006/07/23 03:07:58 tgl Exp $
|
||||||
|
|
||||||
Notes about shared buffer access rules
|
Notes about shared buffer access rules
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
@ -114,6 +114,14 @@ operation that needs exclusive lock is reading in a page that was not
|
||||||
in shared buffers already, which will require at least a kernel call
|
in shared buffers already, which will require at least a kernel call
|
||||||
and usually a wait for I/O, so it will be slow anyway.
|
and usually a wait for I/O, so it will be slow anyway.
|
||||||
|
|
||||||
|
* As of PG 8.2, the BufMappingLock has been split into NUM_BUFFER_PARTITIONS
|
||||||
|
separate locks, each guarding a portion of the buffer tag space. This allows
|
||||||
|
further reduction of contention in the normal code paths. The partition
|
||||||
|
that a particular buffer tag belongs to is determined from the low-order
|
||||||
|
bits of the tag's hash value. The rules stated above apply to each partition
|
||||||
|
independently. If it is necessary to lock more than one partition at a time,
|
||||||
|
they must be locked in partition-number order to avoid risk of deadlock.
|
||||||
|
|
||||||
* A separate system-wide LWLock, the BufFreelistLock, provides mutual
|
* A separate system-wide LWLock, the BufFreelistLock, provides mutual
|
||||||
exclusion for operations that access the buffer free list or select
|
exclusion for operations that access the buffer free list or select
|
||||||
buffers for replacement. This is always taken in exclusive mode since
|
buffers for replacement. This is always taken in exclusive mode since
|
||||||
|
|
|
@ -4,8 +4,10 @@
|
||||||
* routines for mapping BufferTags to buffer indexes.
|
* routines for mapping BufferTags to buffer indexes.
|
||||||
*
|
*
|
||||||
* Note: the routines in this file do no locking of their own. The caller
|
* Note: the routines in this file do no locking of their own. The caller
|
||||||
* must hold a suitable lock on the BufMappingLock, as specified in the
|
* must hold a suitable lock on the appropriate BufMappingLock, as specified
|
||||||
* comments.
|
* in the comments. We can't do the locking inside these functions because
|
||||||
|
* in most cases the caller needs to adjust the buffer header contents
|
||||||
|
* before the lock is released (see notes in README).
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||||
|
@ -13,7 +15,7 @@
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/buf_table.c,v 1.46 2006/07/14 16:59:19 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/buffer/buf_table.c,v 1.47 2006/07/23 03:07:58 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -58,29 +60,49 @@ InitBufTable(int size)
|
||||||
info.keysize = sizeof(BufferTag);
|
info.keysize = sizeof(BufferTag);
|
||||||
info.entrysize = sizeof(BufferLookupEnt);
|
info.entrysize = sizeof(BufferLookupEnt);
|
||||||
info.hash = tag_hash;
|
info.hash = tag_hash;
|
||||||
|
info.num_partitions = NUM_BUFFER_PARTITIONS;
|
||||||
|
|
||||||
SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table",
|
SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table",
|
||||||
size, size,
|
size, size,
|
||||||
&info,
|
&info,
|
||||||
HASH_ELEM | HASH_FUNCTION);
|
HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);
|
||||||
|
|
||||||
if (!SharedBufHash)
|
if (!SharedBufHash)
|
||||||
elog(FATAL, "could not initialize shared buffer hash table");
|
elog(FATAL, "could not initialize shared buffer hash table");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BufTableHashCode
|
||||||
|
* Compute the hash code associated with a BufferTag
|
||||||
|
*
|
||||||
|
* This must be passed to the lookup/insert/delete routines along with the
|
||||||
|
* tag. We do it like this because the callers need to know the hash code
|
||||||
|
* in order to determine which buffer partition to lock, and we don't want
|
||||||
|
* to do the hash computation twice (hash_any is a bit slow).
|
||||||
|
*/
|
||||||
|
uint32
|
||||||
|
BufTableHashCode(BufferTag *tagPtr)
|
||||||
|
{
|
||||||
|
return get_hash_value(SharedBufHash, (void *) tagPtr);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BufTableLookup
|
* BufTableLookup
|
||||||
* Lookup the given BufferTag; return buffer ID, or -1 if not found
|
* Lookup the given BufferTag; return buffer ID, or -1 if not found
|
||||||
*
|
*
|
||||||
* Caller must hold at least share lock on BufMappingLock
|
* Caller must hold at least share lock on BufMappingLock for tag's partition
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
BufTableLookup(BufferTag *tagPtr)
|
BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
|
||||||
{
|
{
|
||||||
BufferLookupEnt *result;
|
BufferLookupEnt *result;
|
||||||
|
|
||||||
result = (BufferLookupEnt *)
|
result = (BufferLookupEnt *)
|
||||||
hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL);
|
hash_search_with_hash_value(SharedBufHash,
|
||||||
|
(void *) tagPtr,
|
||||||
|
hashcode,
|
||||||
|
HASH_FIND,
|
||||||
|
NULL);
|
||||||
|
|
||||||
if (!result)
|
if (!result)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -96,10 +118,10 @@ BufTableLookup(BufferTag *tagPtr)
|
||||||
* Returns -1 on successful insertion. If a conflicting entry exists
|
* Returns -1 on successful insertion. If a conflicting entry exists
|
||||||
* already, returns the buffer ID in that entry.
|
* already, returns the buffer ID in that entry.
|
||||||
*
|
*
|
||||||
* Caller must hold write lock on BufMappingLock
|
* Caller must hold exclusive lock on BufMappingLock for tag's partition
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
BufTableInsert(BufferTag *tagPtr, int buf_id)
|
BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
|
||||||
{
|
{
|
||||||
BufferLookupEnt *result;
|
BufferLookupEnt *result;
|
||||||
bool found;
|
bool found;
|
||||||
|
@ -108,7 +130,11 @@ BufTableInsert(BufferTag *tagPtr, int buf_id)
|
||||||
Assert(tagPtr->blockNum != P_NEW); /* invalid tag */
|
Assert(tagPtr->blockNum != P_NEW); /* invalid tag */
|
||||||
|
|
||||||
result = (BufferLookupEnt *)
|
result = (BufferLookupEnt *)
|
||||||
hash_search(SharedBufHash, (void *) tagPtr, HASH_ENTER, &found);
|
hash_search_with_hash_value(SharedBufHash,
|
||||||
|
(void *) tagPtr,
|
||||||
|
hashcode,
|
||||||
|
HASH_ENTER,
|
||||||
|
&found);
|
||||||
|
|
||||||
if (found) /* found something already in the table */
|
if (found) /* found something already in the table */
|
||||||
return result->id;
|
return result->id;
|
||||||
|
@ -122,15 +148,19 @@ BufTableInsert(BufferTag *tagPtr, int buf_id)
|
||||||
* BufTableDelete
|
* BufTableDelete
|
||||||
* Delete the hashtable entry for given tag (which must exist)
|
* Delete the hashtable entry for given tag (which must exist)
|
||||||
*
|
*
|
||||||
* Caller must hold write lock on BufMappingLock
|
* Caller must hold exclusive lock on BufMappingLock for tag's partition
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
BufTableDelete(BufferTag *tagPtr)
|
BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
|
||||||
{
|
{
|
||||||
BufferLookupEnt *result;
|
BufferLookupEnt *result;
|
||||||
|
|
||||||
result = (BufferLookupEnt *)
|
result = (BufferLookupEnt *)
|
||||||
hash_search(SharedBufHash, (void *) tagPtr, HASH_REMOVE, NULL);
|
hash_search_with_hash_value(SharedBufHash,
|
||||||
|
(void *) tagPtr,
|
||||||
|
hashcode,
|
||||||
|
HASH_REMOVE,
|
||||||
|
NULL);
|
||||||
|
|
||||||
if (!result) /* shouldn't happen */
|
if (!result) /* shouldn't happen */
|
||||||
elog(ERROR, "shared buffer hash table corrupted");
|
elog(ERROR, "shared buffer hash table corrupted");
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.208 2006/07/14 14:52:22 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.209 2006/07/23 03:07:58 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -293,7 +293,11 @@ BufferAlloc(Relation reln,
|
||||||
bool *foundPtr)
|
bool *foundPtr)
|
||||||
{
|
{
|
||||||
BufferTag newTag; /* identity of requested block */
|
BufferTag newTag; /* identity of requested block */
|
||||||
BufferTag oldTag;
|
uint32 newHash; /* hash value for newTag */
|
||||||
|
LWLockId newPartitionLock; /* buffer partition lock for it */
|
||||||
|
BufferTag oldTag; /* previous identity of selected buffer */
|
||||||
|
uint32 oldHash; /* hash value for oldTag */
|
||||||
|
LWLockId oldPartitionLock; /* buffer partition lock for it */
|
||||||
BufFlags oldFlags;
|
BufFlags oldFlags;
|
||||||
int buf_id;
|
int buf_id;
|
||||||
volatile BufferDesc *buf;
|
volatile BufferDesc *buf;
|
||||||
|
@ -302,9 +306,13 @@ BufferAlloc(Relation reln,
|
||||||
/* create a tag so we can lookup the buffer */
|
/* create a tag so we can lookup the buffer */
|
||||||
INIT_BUFFERTAG(newTag, reln, blockNum);
|
INIT_BUFFERTAG(newTag, reln, blockNum);
|
||||||
|
|
||||||
|
/* determine its hash code and partition lock ID */
|
||||||
|
newHash = BufTableHashCode(&newTag);
|
||||||
|
newPartitionLock = BufMappingPartitionLock(newHash);
|
||||||
|
|
||||||
/* see if the block is in the buffer pool already */
|
/* see if the block is in the buffer pool already */
|
||||||
LWLockAcquire(BufMappingLock, LW_SHARED);
|
LWLockAcquire(newPartitionLock, LW_SHARED);
|
||||||
buf_id = BufTableLookup(&newTag);
|
buf_id = BufTableLookup(&newTag, newHash);
|
||||||
if (buf_id >= 0)
|
if (buf_id >= 0)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -317,7 +325,7 @@ BufferAlloc(Relation reln,
|
||||||
valid = PinBuffer(buf);
|
valid = PinBuffer(buf);
|
||||||
|
|
||||||
/* Can release the mapping lock as soon as we've pinned it */
|
/* Can release the mapping lock as soon as we've pinned it */
|
||||||
LWLockRelease(BufMappingLock);
|
LWLockRelease(newPartitionLock);
|
||||||
|
|
||||||
*foundPtr = TRUE;
|
*foundPtr = TRUE;
|
||||||
|
|
||||||
|
@ -345,9 +353,9 @@ BufferAlloc(Relation reln,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Didn't find it in the buffer pool. We'll have to initialize a new
|
* Didn't find it in the buffer pool. We'll have to initialize a new
|
||||||
* buffer. Remember to unlock BufMappingLock while doing the work.
|
* buffer. Remember to unlock the mapping lock while doing the work.
|
||||||
*/
|
*/
|
||||||
LWLockRelease(BufMappingLock);
|
LWLockRelease(newPartitionLock);
|
||||||
|
|
||||||
/* Loop here in case we have to try another victim buffer */
|
/* Loop here in case we have to try another victim buffer */
|
||||||
for (;;)
|
for (;;)
|
||||||
|
@ -412,10 +420,48 @@ BufferAlloc(Relation reln,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Acquire exclusive mapping lock in preparation for changing the
|
* To change the association of a valid buffer, we'll need to have
|
||||||
* buffer's association.
|
* exclusive lock on both the old and new mapping partitions.
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(BufMappingLock, LW_EXCLUSIVE);
|
if (oldFlags & BM_TAG_VALID)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Need to compute the old tag's hashcode and partition lock ID.
|
||||||
|
* XXX is it worth storing the hashcode in BufferDesc so we need
|
||||||
|
* not recompute it here? Probably not.
|
||||||
|
*/
|
||||||
|
oldTag = buf->tag;
|
||||||
|
oldHash = BufTableHashCode(&oldTag);
|
||||||
|
oldPartitionLock = BufMappingPartitionLock(oldHash);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Must lock the lower-numbered partition first to avoid
|
||||||
|
* deadlocks.
|
||||||
|
*/
|
||||||
|
if (oldPartitionLock < newPartitionLock)
|
||||||
|
{
|
||||||
|
LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
|
||||||
|
LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
|
||||||
|
}
|
||||||
|
else if (oldPartitionLock > newPartitionLock)
|
||||||
|
{
|
||||||
|
LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
|
||||||
|
LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* only one partition, only one lock */
|
||||||
|
LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* if it wasn't valid, we need only the new partition */
|
||||||
|
LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
|
||||||
|
/* these just keep the compiler quiet about uninit variables */
|
||||||
|
oldHash = 0;
|
||||||
|
oldPartitionLock = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to make a hashtable entry for the buffer under its new tag.
|
* Try to make a hashtable entry for the buffer under its new tag.
|
||||||
|
@ -424,7 +470,7 @@ BufferAlloc(Relation reln,
|
||||||
* Note that we have not yet removed the hashtable entry for the old
|
* Note that we have not yet removed the hashtable entry for the old
|
||||||
* tag.
|
* tag.
|
||||||
*/
|
*/
|
||||||
buf_id = BufTableInsert(&newTag, buf->buf_id);
|
buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
|
||||||
|
|
||||||
if (buf_id >= 0)
|
if (buf_id >= 0)
|
||||||
{
|
{
|
||||||
|
@ -433,10 +479,15 @@ BufferAlloc(Relation reln,
|
||||||
* do. We'll just handle this as if it were found in the buffer
|
* do. We'll just handle this as if it were found in the buffer
|
||||||
* pool in the first place. First, give up the buffer we were
|
* pool in the first place. First, give up the buffer we were
|
||||||
* planning to use. Don't allow it to be thrown in the free list
|
* planning to use. Don't allow it to be thrown in the free list
|
||||||
* (we don't want to hold both global locks at once).
|
* (we don't want to hold freelist and mapping locks at once).
|
||||||
*/
|
*/
|
||||||
UnpinBuffer(buf, true, false);
|
UnpinBuffer(buf, true, false);
|
||||||
|
|
||||||
|
/* Can give up that buffer's mapping partition lock now */
|
||||||
|
if ((oldFlags & BM_TAG_VALID) &&
|
||||||
|
oldPartitionLock != newPartitionLock)
|
||||||
|
LWLockRelease(oldPartitionLock);
|
||||||
|
|
||||||
/* remaining code should match code at top of routine */
|
/* remaining code should match code at top of routine */
|
||||||
|
|
||||||
buf = &BufferDescriptors[buf_id];
|
buf = &BufferDescriptors[buf_id];
|
||||||
|
@ -444,7 +495,7 @@ BufferAlloc(Relation reln,
|
||||||
valid = PinBuffer(buf);
|
valid = PinBuffer(buf);
|
||||||
|
|
||||||
/* Can release the mapping lock as soon as we've pinned it */
|
/* Can release the mapping lock as soon as we've pinned it */
|
||||||
LWLockRelease(BufMappingLock);
|
LWLockRelease(newPartitionLock);
|
||||||
|
|
||||||
*foundPtr = TRUE;
|
*foundPtr = TRUE;
|
||||||
|
|
||||||
|
@ -481,12 +532,16 @@ BufferAlloc(Relation reln,
|
||||||
* recycle this buffer; we must undo everything we've done and start
|
* recycle this buffer; we must undo everything we've done and start
|
||||||
* over with a new victim buffer.
|
* over with a new victim buffer.
|
||||||
*/
|
*/
|
||||||
if (buf->refcount == 1 && !(buf->flags & BM_DIRTY))
|
oldFlags = buf->flags;
|
||||||
|
if (buf->refcount == 1 && !(oldFlags & BM_DIRTY))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
UnlockBufHdr(buf);
|
UnlockBufHdr(buf);
|
||||||
BufTableDelete(&newTag);
|
BufTableDelete(&newTag, newHash);
|
||||||
LWLockRelease(BufMappingLock);
|
if ((oldFlags & BM_TAG_VALID) &&
|
||||||
|
oldPartitionLock != newPartitionLock)
|
||||||
|
LWLockRelease(oldPartitionLock);
|
||||||
|
LWLockRelease(newPartitionLock);
|
||||||
UnpinBuffer(buf, true, false /* evidently recently used */ );
|
UnpinBuffer(buf, true, false /* evidently recently used */ );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -497,8 +552,6 @@ BufferAlloc(Relation reln,
|
||||||
* paranoia. We also clear the usage_count since any recency of use of
|
* paranoia. We also clear the usage_count since any recency of use of
|
||||||
* the old content is no longer relevant.
|
* the old content is no longer relevant.
|
||||||
*/
|
*/
|
||||||
oldTag = buf->tag;
|
|
||||||
oldFlags = buf->flags;
|
|
||||||
buf->tag = newTag;
|
buf->tag = newTag;
|
||||||
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
|
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
|
||||||
buf->flags |= BM_TAG_VALID;
|
buf->flags |= BM_TAG_VALID;
|
||||||
|
@ -507,9 +560,13 @@ BufferAlloc(Relation reln,
|
||||||
UnlockBufHdr(buf);
|
UnlockBufHdr(buf);
|
||||||
|
|
||||||
if (oldFlags & BM_TAG_VALID)
|
if (oldFlags & BM_TAG_VALID)
|
||||||
BufTableDelete(&oldTag);
|
{
|
||||||
|
BufTableDelete(&oldTag, oldHash);
|
||||||
|
if (oldPartitionLock != newPartitionLock)
|
||||||
|
LWLockRelease(oldPartitionLock);
|
||||||
|
}
|
||||||
|
|
||||||
LWLockRelease(BufMappingLock);
|
LWLockRelease(newPartitionLock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Buffer contents are currently invalid. Try to get the io_in_progress
|
* Buffer contents are currently invalid. Try to get the io_in_progress
|
||||||
|
@ -545,6 +602,8 @@ static void
|
||||||
InvalidateBuffer(volatile BufferDesc *buf)
|
InvalidateBuffer(volatile BufferDesc *buf)
|
||||||
{
|
{
|
||||||
BufferTag oldTag;
|
BufferTag oldTag;
|
||||||
|
uint32 oldHash; /* hash value for oldTag */
|
||||||
|
LWLockId oldPartitionLock; /* buffer partition lock for it */
|
||||||
BufFlags oldFlags;
|
BufFlags oldFlags;
|
||||||
|
|
||||||
/* Save the original buffer tag before dropping the spinlock */
|
/* Save the original buffer tag before dropping the spinlock */
|
||||||
|
@ -552,13 +611,21 @@ InvalidateBuffer(volatile BufferDesc *buf)
|
||||||
|
|
||||||
UnlockBufHdr(buf);
|
UnlockBufHdr(buf);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Need to compute the old tag's hashcode and partition lock ID.
|
||||||
|
* XXX is it worth storing the hashcode in BufferDesc so we need
|
||||||
|
* not recompute it here? Probably not.
|
||||||
|
*/
|
||||||
|
oldHash = BufTableHashCode(&oldTag);
|
||||||
|
oldPartitionLock = BufMappingPartitionLock(oldHash);
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Acquire exclusive mapping lock in preparation for changing the buffer's
|
* Acquire exclusive mapping lock in preparation for changing the buffer's
|
||||||
* association.
|
* association.
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(BufMappingLock, LW_EXCLUSIVE);
|
LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
/* Re-lock the buffer header */
|
/* Re-lock the buffer header */
|
||||||
LockBufHdr(buf);
|
LockBufHdr(buf);
|
||||||
|
@ -567,7 +634,7 @@ retry:
|
||||||
if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
|
if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
|
||||||
{
|
{
|
||||||
UnlockBufHdr(buf);
|
UnlockBufHdr(buf);
|
||||||
LWLockRelease(BufMappingLock);
|
LWLockRelease(oldPartitionLock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -583,7 +650,7 @@ retry:
|
||||||
if (buf->refcount != 0)
|
if (buf->refcount != 0)
|
||||||
{
|
{
|
||||||
UnlockBufHdr(buf);
|
UnlockBufHdr(buf);
|
||||||
LWLockRelease(BufMappingLock);
|
LWLockRelease(oldPartitionLock);
|
||||||
/* safety check: should definitely not be our *own* pin */
|
/* safety check: should definitely not be our *own* pin */
|
||||||
if (PrivateRefCount[buf->buf_id] != 0)
|
if (PrivateRefCount[buf->buf_id] != 0)
|
||||||
elog(ERROR, "buffer is pinned in InvalidateBuffer");
|
elog(ERROR, "buffer is pinned in InvalidateBuffer");
|
||||||
|
@ -606,7 +673,7 @@ retry:
|
||||||
* Remove the buffer from the lookup hashtable, if it was in there.
|
* Remove the buffer from the lookup hashtable, if it was in there.
|
||||||
*/
|
*/
|
||||||
if (oldFlags & BM_TAG_VALID)
|
if (oldFlags & BM_TAG_VALID)
|
||||||
BufTableDelete(&oldTag);
|
BufTableDelete(&oldTag, oldHash);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Avoid accepting a cancel interrupt when we release the mapping lock;
|
* Avoid accepting a cancel interrupt when we release the mapping lock;
|
||||||
|
@ -616,7 +683,7 @@ retry:
|
||||||
*/
|
*/
|
||||||
HOLD_INTERRUPTS();
|
HOLD_INTERRUPTS();
|
||||||
|
|
||||||
LWLockRelease(BufMappingLock);
|
LWLockRelease(oldPartitionLock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Insert the buffer at the head of the list of free buffers.
|
* Insert the buffer at the head of the list of free buffers.
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.40 2006/07/14 14:52:23 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.41 2006/07/23 03:07:58 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -157,10 +157,7 @@ NumLWLocks(void)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Predefined LWLocks */
|
/* Predefined LWLocks */
|
||||||
numLocks = (int) FirstLockMgrLock;
|
numLocks = (int) NumFixedLWLocks;
|
||||||
|
|
||||||
/* lock.c gets the ones starting at FirstLockMgrLock */
|
|
||||||
numLocks += NUM_LOCK_PARTITIONS;
|
|
||||||
|
|
||||||
/* bufmgr.c needs two for each shared buffer */
|
/* bufmgr.c needs two for each shared buffer */
|
||||||
numLocks += 2 * NBuffers;
|
numLocks += 2 * NBuffers;
|
||||||
|
@ -239,11 +236,10 @@ CreateLWLocks(void)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize the dynamic-allocation counter, which is stored just before
|
* Initialize the dynamic-allocation counter, which is stored just before
|
||||||
* the first LWLock. The LWLocks used by lock.c are not dynamically
|
* the first LWLock.
|
||||||
* allocated, it just assumes it has them.
|
|
||||||
*/
|
*/
|
||||||
LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
|
LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
|
||||||
LWLockCounter[0] = (int) FirstLockMgrLock + NUM_LOCK_PARTITIONS;
|
LWLockCounter[0] = (int) NumFixedLWLocks;
|
||||||
LWLockCounter[1] = numLocks;
|
LWLockCounter[1] = numLocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.86 2006/03/31 23:32:07 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.87 2006/07/23 03:07:58 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -86,6 +86,17 @@ typedef struct buftag
|
||||||
(a).blockNum == (b).blockNum \
|
(a).blockNum == (b).blockNum \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The shared buffer mapping table is partitioned to reduce contention.
|
||||||
|
* To determine which partition lock a given tag requires, compute the tag's
|
||||||
|
* hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
|
||||||
|
* NB: NUM_BUFFER_PARTITIONS must be a power of 2!
|
||||||
|
*/
|
||||||
|
#define BufTableHashPartition(hashcode) \
|
||||||
|
((hashcode) % NUM_BUFFER_PARTITIONS)
|
||||||
|
#define BufMappingPartitionLock(hashcode) \
|
||||||
|
((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BufferDesc -- shared descriptor/state data for a single shared buffer.
|
* BufferDesc -- shared descriptor/state data for a single shared buffer.
|
||||||
*
|
*
|
||||||
|
@ -182,9 +193,10 @@ extern void StrategyInitialize(bool init);
|
||||||
/* buf_table.c */
|
/* buf_table.c */
|
||||||
extern Size BufTableShmemSize(int size);
|
extern Size BufTableShmemSize(int size);
|
||||||
extern void InitBufTable(int size);
|
extern void InitBufTable(int size);
|
||||||
extern int BufTableLookup(BufferTag *tagPtr);
|
extern uint32 BufTableHashCode(BufferTag *tagPtr);
|
||||||
extern int BufTableInsert(BufferTag *tagPtr, int buf_id);
|
extern int BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
|
||||||
extern void BufTableDelete(BufferTag *tagPtr);
|
extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
|
||||||
|
extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
|
||||||
|
|
||||||
/* localbuf.c */
|
/* localbuf.c */
|
||||||
extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
|
extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.94 2006/03/05 15:58:59 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.95 2006/07/23 03:07:58 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -19,23 +19,15 @@
|
||||||
#include "storage/shmem.h"
|
#include "storage/shmem.h"
|
||||||
|
|
||||||
|
|
||||||
/*
|
/* struct PGPROC is declared in proc.h, but must forward-reference it */
|
||||||
* Number of partitions the shared lock tables are divided into.
|
typedef struct PGPROC PGPROC;
|
||||||
*
|
|
||||||
* See LockTagToPartition() if you change this.
|
|
||||||
*/
|
|
||||||
#define NUM_LOCK_PARTITIONS 16
|
|
||||||
|
|
||||||
/* originally in procq.h */
|
|
||||||
typedef struct PROC_QUEUE
|
typedef struct PROC_QUEUE
|
||||||
{
|
{
|
||||||
SHM_QUEUE links; /* head of list of PGPROC objects */
|
SHM_QUEUE links; /* head of list of PGPROC objects */
|
||||||
int size; /* number of entries in list */
|
int size; /* number of entries in list */
|
||||||
} PROC_QUEUE;
|
} PROC_QUEUE;
|
||||||
|
|
||||||
/* struct PGPROC is declared in proc.h, but must forward-reference it */
|
|
||||||
typedef struct PGPROC PGPROC;
|
|
||||||
|
|
||||||
/* GUC variables */
|
/* GUC variables */
|
||||||
extern int max_locks_per_xact;
|
extern int max_locks_per_xact;
|
||||||
|
|
||||||
|
|
|
@ -7,13 +7,25 @@
|
||||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.28 2006/05/08 00:00:17 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.29 2006/07/23 03:07:58 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
#ifndef LWLOCK_H
|
#ifndef LWLOCK_H
|
||||||
#define LWLOCK_H
|
#define LWLOCK_H
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
|
||||||
|
* here, but we need them to set up enum LWLockId correctly, and having
|
||||||
|
* this file include lock.h or bufmgr.h would be backwards.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Number of partitions of the shared buffer mapping hashtable */
|
||||||
|
#define NUM_BUFFER_PARTITIONS 16
|
||||||
|
|
||||||
|
/* Number of partitions the shared lock tables are divided into */
|
||||||
|
#define NUM_LOCK_PARTITIONS 16
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
|
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
|
||||||
* dynamically assigned (e.g., for shared buffers). The LWLock structures
|
* dynamically assigned (e.g., for shared buffers). The LWLock structures
|
||||||
|
@ -25,7 +37,6 @@
|
||||||
*/
|
*/
|
||||||
typedef enum LWLockId
|
typedef enum LWLockId
|
||||||
{
|
{
|
||||||
BufMappingLock,
|
|
||||||
BufFreelistLock,
|
BufFreelistLock,
|
||||||
ShmemIndexLock,
|
ShmemIndexLock,
|
||||||
OidGenLock,
|
OidGenLock,
|
||||||
|
@ -48,7 +59,11 @@ typedef enum LWLockId
|
||||||
TwoPhaseStateLock,
|
TwoPhaseStateLock,
|
||||||
TablespaceCreateLock,
|
TablespaceCreateLock,
|
||||||
BtreeVacuumLock,
|
BtreeVacuumLock,
|
||||||
FirstLockMgrLock, /* must be last except for MaxDynamicLWLock */
|
FirstBufMappingLock,
|
||||||
|
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
|
||||||
|
|
||||||
|
/* must be last except for MaxDynamicLWLock: */
|
||||||
|
NumFixedLWLocks = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
|
||||||
|
|
||||||
MaxDynamicLWLock = 1000000000
|
MaxDynamicLWLock = 1000000000
|
||||||
} LWLockId;
|
} LWLockId;
|
||||||
|
|
Loading…
Reference in New Issue