1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* localbuf.c
|
2002-08-06 04:36:35 +02:00
|
|
|
* local buffer manager. Fast buffer manager for temporary tables,
|
|
|
|
* which never need to be WAL-logged or checkpointed, etc.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2015-01-06 17:43:47 +01:00
|
|
|
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994-5, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2000-11-30 20:06:37 +01:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/storage/buffer/localbuf.c
|
2000-11-30 20:06:37 +01:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
2000-11-20 17:47:32 +01:00
|
|
|
#include "postgres.h"
|
2000-10-28 18:21:00 +02:00
|
|
|
|
2008-11-11 15:17:02 +01:00
|
|
|
#include "catalog/catalog.h"
|
2009-12-15 05:57:48 +01:00
|
|
|
#include "executor/instrument.h"
|
2000-11-30 02:39:08 +01:00
|
|
|
#include "storage/buf_internals.h"
|
|
|
|
#include "storage/bufmgr.h"
|
2005-03-20 00:27:11 +01:00
|
|
|
#include "utils/guc.h"
|
2005-03-19 18:39:43 +01:00
|
|
|
#include "utils/memutils.h"
|
2012-08-29 00:02:07 +02:00
|
|
|
#include "utils/resowner_private.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-04-18 04:53:37 +02:00
|
|
|
|
2002-08-06 04:36:35 +02:00
|
|
|
/*#define LBDEBUG*/
|
|
|
|
|
2005-03-19 18:39:43 +01:00
|
|
|
/* entry for buffer lookup hashtable */
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
BufferTag key; /* Tag of a disk page */
|
|
|
|
int id; /* Associated local buffer's index */
|
|
|
|
} LocalBufferLookupEnt;
|
|
|
|
|
2005-03-04 21:21:07 +01:00
|
|
|
/* Note: this macro only works on local buffers, not shared ones! */
|
2005-10-15 04:49:52 +02:00
|
|
|
#define LocalBufHdrGetBlock(bufHdr) \
|
2005-03-04 21:21:07 +01:00
|
|
|
LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
|
|
|
|
|
2005-03-19 18:39:43 +01:00
|
|
|
int NLocBuffer = 0; /* until buffers are initialized */
|
2002-08-06 04:36:35 +02:00
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
BufferDesc *LocalBufferDescriptors = NULL;
|
2000-11-30 02:39:08 +01:00
|
|
|
Block *LocalBufferBlockPointers = NULL;
|
2004-04-22 09:21:55 +02:00
|
|
|
int32 *LocalRefCount = NULL;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
static int nextFreeLocalBuf = 0;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2005-03-19 18:39:43 +01:00
|
|
|
static HTAB *LocalBufHash = NULL;
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2005-03-20 00:27:11 +01:00
|
|
|
static void InitLocalBuffers(void);
|
2006-12-27 23:31:54 +01:00
|
|
|
static Block GetLocalBufferStorage(void);
|
2005-03-20 00:27:11 +01:00
|
|
|
|
|
|
|
|
2009-01-12 06:10:45 +01:00
|
|
|
/*
|
|
|
|
* LocalPrefetchBuffer -
|
|
|
|
* initiate asynchronous read of a block of a relation
|
|
|
|
*
|
|
|
|
* Do PrefetchBuffer's work for temporary relations.
|
|
|
|
* No-op if prefetching isn't compiled in.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
|
|
|
|
BlockNumber blockNum)
|
|
|
|
{
|
|
|
|
#ifdef USE_PREFETCH
|
|
|
|
BufferTag newTag; /* identity of requested block */
|
|
|
|
LocalBufferLookupEnt *hresult;
|
|
|
|
|
2010-08-13 22:10:54 +02:00
|
|
|
INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
|
2009-01-12 06:10:45 +01:00
|
|
|
|
|
|
|
/* Initialize local buffers if first request in this session */
|
|
|
|
if (LocalBufHash == NULL)
|
|
|
|
InitLocalBuffers();
|
|
|
|
|
|
|
|
/* See if the desired buffer already exists */
|
|
|
|
hresult = (LocalBufferLookupEnt *)
|
|
|
|
hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
|
|
|
|
|
|
|
|
if (hresult)
|
|
|
|
{
|
|
|
|
/* Yes, so nothing to do */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Not in buffers, so initiate prefetch */
|
|
|
|
smgrprefetch(smgr, forkNum, blockNum);
|
2009-06-11 16:49:15 +02:00
|
|
|
#endif /* USE_PREFETCH */
|
2009-01-12 06:10:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
|
|
|
* LocalBufferAlloc -
|
2005-03-19 18:39:43 +01:00
|
|
|
* Find or create a local buffer for the given page of the given relation.
|
2004-04-21 20:06:30 +02:00
|
|
|
*
|
|
|
|
* API is similar to bufmgr.c's BufferAlloc, except that we do not need
|
2014-05-06 18:12:18 +02:00
|
|
|
* to do any locking since this is all local. Also, IO_IN_PROGRESS
|
2007-05-30 22:12:03 +02:00
|
|
|
* does not get set. Lastly, we support only default access strategy
|
|
|
|
* (hence, usage_count is always advanced).
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
BufferDesc *
|
2008-08-11 13:05:11 +02:00
|
|
|
LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
|
|
|
|
bool *foundPtr)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2004-04-21 20:06:30 +02:00
|
|
|
BufferTag newTag; /* identity of requested block */
|
2005-03-19 18:39:43 +01:00
|
|
|
LocalBufferLookupEnt *hresult;
|
2004-04-21 20:06:30 +02:00
|
|
|
BufferDesc *bufHdr;
|
2005-03-19 18:39:43 +01:00
|
|
|
int b;
|
|
|
|
int trycounter;
|
|
|
|
bool found;
|
2004-04-21 20:06:30 +02:00
|
|
|
|
2010-08-13 22:10:54 +02:00
|
|
|
INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2005-03-20 00:27:11 +01:00
|
|
|
/* Initialize local buffers if first request in this session */
|
|
|
|
if (LocalBufHash == NULL)
|
|
|
|
InitLocalBuffers();
|
|
|
|
|
2005-03-19 18:39:43 +01:00
|
|
|
/* See if the desired buffer already exists */
|
|
|
|
hresult = (LocalBufferLookupEnt *)
|
|
|
|
hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
|
|
|
|
|
|
|
|
if (hresult)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2005-03-19 18:39:43 +01:00
|
|
|
b = hresult->id;
|
Align buffer descriptors to cache line boundaries.
Benchmarks has shown that aligning the buffer descriptor array to
cache lines is important for scalability; especially on bigger,
multi-socket, machines.
Currently the array sometimes already happens to be aligned by
happenstance, depending how large previous shared memory allocations
were. That can lead to wildly varying performance results after minor
configuration changes.
In addition to aligning the start of descriptor array, also force the
size of individual descriptors to be of a common cache line size (64
bytes). That happens to already be the case on 64bit platforms, but
this way we can change the struct BufferDesc more easily.
As the alignment primarily matters in highly concurrent workloads
which probably all are 64bit these days, and the space wastage of
element alignment would be a bit more noticeable on 32bit systems, we
don't force the stride to be cacheline sized on 32bit platforms for
now. If somebody does actual performance testing, we can reevaluate
that decision by changing the definition of BUFFERDESC_PADDED_SIZE.
Discussion: 20140202151319.GD32123@awork2.anarazel.de
Per discussion with Bruce Momjan, Tom Lane, Robert Haas, and Peter
Geoghegan.
2015-01-29 17:49:03 +01:00
|
|
|
bufHdr = GetLocalBufferDescriptor(b);
|
2005-03-19 18:39:43 +01:00
|
|
|
Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag));
|
1996-07-09 08:22:35 +02:00
|
|
|
#ifdef LBDEBUG
|
2008-11-27 08:38:01 +01:00
|
|
|
fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
|
2010-08-13 22:10:54 +02:00
|
|
|
smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1);
|
1997-09-07 07:04:48 +02:00
|
|
|
#endif
|
2007-05-30 22:12:03 +02:00
|
|
|
/* this part is equivalent to PinBuffer for a shared buffer */
|
|
|
|
if (LocalRefCount[b] == 0)
|
|
|
|
{
|
|
|
|
if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
|
|
|
|
bufHdr->usage_count++;
|
|
|
|
}
|
2005-03-19 18:39:43 +01:00
|
|
|
LocalRefCount[b]++;
|
|
|
|
ResourceOwnerRememberBuffer(CurrentResourceOwner,
|
|
|
|
BufferDescriptorGetBuffer(bufHdr));
|
|
|
|
if (bufHdr->flags & BM_VALID)
|
|
|
|
*foundPtr = TRUE;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Previous read attempt must have failed; try again */
|
|
|
|
*foundPtr = FALSE;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2005-03-19 18:39:43 +01:00
|
|
|
return bufHdr;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef LBDEBUG
|
2008-11-27 08:38:01 +01:00
|
|
|
fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
|
2011-04-10 17:42:00 +02:00
|
|
|
smgr->smgr_rnode.node.relNode, forkNum, blockNum,
|
|
|
|
-nextFreeLocalBuf - 1);
|
1997-09-07 07:04:48 +02:00
|
|
|
#endif
|
|
|
|
|
2005-03-04 21:21:07 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Need to get a new buffer. We use a clock sweep algorithm (essentially
|
|
|
|
* the same as what freelist.c does now...)
|
2005-03-04 21:21:07 +01:00
|
|
|
*/
|
|
|
|
trycounter = NLocBuffer;
|
|
|
|
for (;;)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2005-03-19 18:39:43 +01:00
|
|
|
b = nextFreeLocalBuf;
|
2005-03-04 21:21:07 +01:00
|
|
|
|
|
|
|
if (++nextFreeLocalBuf >= NLocBuffer)
|
|
|
|
nextFreeLocalBuf = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Align buffer descriptors to cache line boundaries.
Benchmarks has shown that aligning the buffer descriptor array to
cache lines is important for scalability; especially on bigger,
multi-socket, machines.
Currently the array sometimes already happens to be aligned by
happenstance, depending how large previous shared memory allocations
were. That can lead to wildly varying performance results after minor
configuration changes.
In addition to aligning the start of descriptor array, also force the
size of individual descriptors to be of a common cache line size (64
bytes). That happens to already be the case on 64bit platforms, but
this way we can change the struct BufferDesc more easily.
As the alignment primarily matters in highly concurrent workloads
which probably all are 64bit these days, and the space wastage of
element alignment would be a bit more noticeable on 32bit systems, we
don't force the stride to be cacheline sized on 32bit platforms for
now. If somebody does actual performance testing, we can reevaluate
that decision by changing the definition of BUFFERDESC_PADDED_SIZE.
Discussion: 20140202151319.GD32123@awork2.anarazel.de
Per discussion with Bruce Momjan, Tom Lane, Robert Haas, and Peter
Geoghegan.
2015-01-29 17:49:03 +01:00
|
|
|
bufHdr = GetLocalBufferDescriptor(b);
|
2005-03-04 21:21:07 +01:00
|
|
|
|
2007-05-30 22:12:03 +02:00
|
|
|
if (LocalRefCount[b] == 0)
|
2005-03-04 21:21:07 +01:00
|
|
|
{
|
2007-05-30 22:12:03 +02:00
|
|
|
if (bufHdr->usage_count > 0)
|
|
|
|
{
|
|
|
|
bufHdr->usage_count--;
|
|
|
|
trycounter = NLocBuffer;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Found a usable buffer */
|
|
|
|
LocalRefCount[b]++;
|
|
|
|
ResourceOwnerRememberBuffer(CurrentResourceOwner,
|
2007-11-15 22:14:46 +01:00
|
|
|
BufferDescriptorGetBuffer(bufHdr));
|
2007-05-30 22:12:03 +02:00
|
|
|
break;
|
|
|
|
}
|
2005-03-04 21:21:07 +01:00
|
|
|
}
|
|
|
|
else if (--trycounter == 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
|
|
|
|
errmsg("no empty local buffer available")));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* this buffer is not referenced but it might still be dirty. if that's
|
|
|
|
* the case, write it out before reusing it!
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2005-03-04 21:21:07 +01:00
|
|
|
if (bufHdr->flags & BM_DIRTY)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
SMgrRelation oreln;
|
|
|
|
Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2004-02-10 02:55:27 +01:00
|
|
|
/* Find smgr relation for buffer */
|
2010-08-13 22:10:54 +02:00
|
|
|
oreln = smgropen(bufHdr->tag.rnode, MyBackendId);
|
2004-02-10 02:55:27 +01:00
|
|
|
|
2013-03-22 14:54:07 +01:00
|
|
|
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
|
|
|
|
|
2004-02-10 02:55:27 +01:00
|
|
|
/* And write... */
|
2005-01-10 21:02:24 +01:00
|
|
|
smgrwrite(oreln,
|
2008-08-11 13:05:11 +02:00
|
|
|
bufHdr->tag.forkNum,
|
2004-02-10 02:55:27 +01:00
|
|
|
bufHdr->tag.blockNum,
|
2013-03-22 14:54:07 +01:00
|
|
|
localpage,
|
2010-08-13 22:10:54 +02:00
|
|
|
false);
|
1999-09-18 21:08:25 +02:00
|
|
|
|
2005-03-19 18:39:43 +01:00
|
|
|
/* Mark not-dirty now in case we error out below */
|
|
|
|
bufHdr->flags &= ~BM_DIRTY;
|
|
|
|
|
2009-12-15 05:57:48 +01:00
|
|
|
pgBufferUsage.local_blks_written++;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2000-11-30 02:39:08 +01:00
|
|
|
* lazy memory allocation: allocate space on first use of a buffer.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2005-03-04 21:21:07 +01:00
|
|
|
if (LocalBufHdrGetBlock(bufHdr) == NULL)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2005-03-19 18:39:43 +01:00
|
|
|
/* Set pointer for use by BufferGetBlock() macro */
|
2006-12-27 23:31:54 +01:00
|
|
|
LocalBufHdrGetBlock(bufHdr) = GetLocalBufferStorage();
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
2005-03-19 18:39:43 +01:00
|
|
|
/*
|
|
|
|
* Update the hash table: remove old entry, if any, and make new one.
|
|
|
|
*/
|
|
|
|
if (bufHdr->flags & BM_TAG_VALID)
|
|
|
|
{
|
|
|
|
hresult = (LocalBufferLookupEnt *)
|
|
|
|
hash_search(LocalBufHash, (void *) &bufHdr->tag,
|
|
|
|
HASH_REMOVE, NULL);
|
2005-10-15 04:49:52 +02:00
|
|
|
if (!hresult) /* shouldn't happen */
|
2005-03-19 18:39:43 +01:00
|
|
|
elog(ERROR, "local buffer hash table corrupted");
|
|
|
|
/* mark buffer invalid just in case hash insert fails */
|
|
|
|
CLEAR_BUFFERTAG(bufHdr->tag);
|
|
|
|
bufHdr->flags &= ~(BM_VALID | BM_TAG_VALID);
|
|
|
|
}
|
|
|
|
|
|
|
|
hresult = (LocalBufferLookupEnt *)
|
|
|
|
hash_search(LocalBufHash, (void *) &newTag, HASH_ENTER, &found);
|
2005-10-15 04:49:52 +02:00
|
|
|
if (found) /* shouldn't happen */
|
2005-03-19 18:39:43 +01:00
|
|
|
elog(ERROR, "local buffer hash table corrupted");
|
|
|
|
hresult->id = b;
|
|
|
|
|
2002-08-06 04:36:35 +02:00
|
|
|
/*
|
|
|
|
* it's all ours now.
|
|
|
|
*/
|
2004-04-21 20:06:30 +02:00
|
|
|
bufHdr->tag = newTag;
|
|
|
|
bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
|
2005-03-04 21:21:07 +01:00
|
|
|
bufHdr->flags |= BM_TAG_VALID;
|
2007-05-30 22:12:03 +02:00
|
|
|
bufHdr->usage_count = 1;
|
2002-08-06 04:36:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
*foundPtr = FALSE;
|
|
|
|
return bufHdr;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2006-04-01 01:32:07 +02:00
|
|
|
* MarkLocalBufferDirty -
|
|
|
|
* mark a local buffer dirty
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-06-15 21:59:59 +02:00
|
|
|
void
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkLocalBufferDirty(Buffer buffer)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
int bufid;
|
2005-03-04 21:21:07 +01:00
|
|
|
BufferDesc *bufHdr;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(BufferIsLocal(buffer));
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
#ifdef LBDEBUG
|
2006-04-01 01:32:07 +02:00
|
|
|
fprintf(stderr, "LB DIRTY %d\n", buffer);
|
1997-09-07 07:04:48 +02:00
|
|
|
#endif
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
bufid = -(buffer + 1);
|
2005-03-04 21:21:07 +01:00
|
|
|
|
|
|
|
Assert(LocalRefCount[bufid] > 0);
|
|
|
|
|
Align buffer descriptors to cache line boundaries.
Benchmarks has shown that aligning the buffer descriptor array to
cache lines is important for scalability; especially on bigger,
multi-socket, machines.
Currently the array sometimes already happens to be aligned by
happenstance, depending how large previous shared memory allocations
were. That can lead to wildly varying performance results after minor
configuration changes.
In addition to aligning the start of descriptor array, also force the
size of individual descriptors to be of a common cache line size (64
bytes). That happens to already be the case on 64bit platforms, but
this way we can change the struct BufferDesc more easily.
As the alignment primarily matters in highly concurrent workloads
which probably all are 64bit these days, and the space wastage of
element alignment would be a bit more noticeable on 32bit systems, we
don't force the stride to be cacheline sized on 32bit platforms for
now. If somebody does actual performance testing, we can reevaluate
that decision by changing the definition of BUFFERDESC_PADDED_SIZE.
Discussion: 20140202151319.GD32123@awork2.anarazel.de
Per discussion with Bruce Momjan, Tom Lane, Robert Haas, and Peter
Geoghegan.
2015-01-29 17:49:03 +01:00
|
|
|
bufHdr = GetLocalBufferDescriptor(bufid);
|
2012-02-23 02:33:05 +01:00
|
|
|
|
|
|
|
if (!(bufHdr->flags & BM_DIRTY))
|
|
|
|
pgBufferUsage.local_blks_dirtied++;
|
|
|
|
|
2005-03-04 21:21:07 +01:00
|
|
|
bufHdr->flags |= BM_DIRTY;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2005-11-17 18:42:02 +01:00
|
|
|
/*
|
|
|
|
* DropRelFileNodeLocalBuffers
|
|
|
|
* This function removes from the buffer pool all the pages of the
|
|
|
|
* specified relation that have block numbers >= firstDelBlock.
|
|
|
|
* (In particular, with firstDelBlock = 0, all pages are removed.)
|
|
|
|
* Dirty pages are simply dropped, without bothering to write them
|
2014-05-06 18:12:18 +02:00
|
|
|
* out first. Therefore, this is NOT rollback-able, and so should be
|
2005-11-17 18:42:02 +01:00
|
|
|
* used only with extreme caution!
|
|
|
|
*
|
|
|
|
* See DropRelFileNodeBuffers in bufmgr.c for more notes.
|
|
|
|
*/
|
|
|
|
void
|
2008-08-11 13:05:11 +02:00
|
|
|
DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
|
|
|
|
BlockNumber firstDelBlock)
|
2005-11-17 18:42:02 +01:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < NLocBuffer; i++)
|
|
|
|
{
|
Align buffer descriptors to cache line boundaries.
Benchmarks has shown that aligning the buffer descriptor array to
cache lines is important for scalability; especially on bigger,
multi-socket, machines.
Currently the array sometimes already happens to be aligned by
happenstance, depending how large previous shared memory allocations
were. That can lead to wildly varying performance results after minor
configuration changes.
In addition to aligning the start of descriptor array, also force the
size of individual descriptors to be of a common cache line size (64
bytes). That happens to already be the case on 64bit platforms, but
this way we can change the struct BufferDesc more easily.
As the alignment primarily matters in highly concurrent workloads
which probably all are 64bit these days, and the space wastage of
element alignment would be a bit more noticeable on 32bit systems, we
don't force the stride to be cacheline sized on 32bit platforms for
now. If somebody does actual performance testing, we can reevaluate
that decision by changing the definition of BUFFERDESC_PADDED_SIZE.
Discussion: 20140202151319.GD32123@awork2.anarazel.de
Per discussion with Bruce Momjan, Tom Lane, Robert Haas, and Peter
Geoghegan.
2015-01-29 17:49:03 +01:00
|
|
|
BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
|
2005-11-17 18:42:02 +01:00
|
|
|
LocalBufferLookupEnt *hresult;
|
|
|
|
|
|
|
|
if ((bufHdr->flags & BM_TAG_VALID) &&
|
|
|
|
RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
|
2008-08-11 13:05:11 +02:00
|
|
|
bufHdr->tag.forkNum == forkNum &&
|
2005-11-17 18:42:02 +01:00
|
|
|
bufHdr->tag.blockNum >= firstDelBlock)
|
|
|
|
{
|
|
|
|
if (LocalRefCount[i] != 0)
|
2008-11-11 14:19:16 +01:00
|
|
|
elog(ERROR, "block %u of %s is still referenced (local %u)",
|
2005-11-17 18:42:02 +01:00
|
|
|
bufHdr->tag.blockNum,
|
2010-08-13 22:10:54 +02:00
|
|
|
relpathbackend(bufHdr->tag.rnode, MyBackendId,
|
2011-04-10 17:42:00 +02:00
|
|
|
bufHdr->tag.forkNum),
|
2005-11-17 18:42:02 +01:00
|
|
|
LocalRefCount[i]);
|
|
|
|
/* Remove entry from hashtable */
|
|
|
|
hresult = (LocalBufferLookupEnt *)
|
|
|
|
hash_search(LocalBufHash, (void *) &bufHdr->tag,
|
|
|
|
HASH_REMOVE, NULL);
|
2005-11-22 19:17:34 +01:00
|
|
|
if (!hresult) /* shouldn't happen */
|
2005-11-17 18:42:02 +01:00
|
|
|
elog(ERROR, "local buffer hash table corrupted");
|
|
|
|
/* Mark buffer invalid */
|
|
|
|
CLEAR_BUFFERTAG(bufHdr->tag);
|
|
|
|
bufHdr->flags = 0;
|
|
|
|
bufHdr->usage_count = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-07 23:42:27 +02:00
|
|
|
/*
|
|
|
|
* DropRelFileNodeAllLocalBuffers
|
|
|
|
* This function removes from the buffer pool all pages of all forks
|
|
|
|
* of the specified relation.
|
|
|
|
*
|
|
|
|
* See DropRelFileNodeAllBuffers in bufmgr.c for more notes.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < NLocBuffer; i++)
|
|
|
|
{
|
Align buffer descriptors to cache line boundaries.
Benchmarks has shown that aligning the buffer descriptor array to
cache lines is important for scalability; especially on bigger,
multi-socket, machines.
Currently the array sometimes already happens to be aligned by
happenstance, depending how large previous shared memory allocations
were. That can lead to wildly varying performance results after minor
configuration changes.
In addition to aligning the start of descriptor array, also force the
size of individual descriptors to be of a common cache line size (64
bytes). That happens to already be the case on 64bit platforms, but
this way we can change the struct BufferDesc more easily.
As the alignment primarily matters in highly concurrent workloads
which probably all are 64bit these days, and the space wastage of
element alignment would be a bit more noticeable on 32bit systems, we
don't force the stride to be cacheline sized on 32bit platforms for
now. If somebody does actual performance testing, we can reevaluate
that decision by changing the definition of BUFFERDESC_PADDED_SIZE.
Discussion: 20140202151319.GD32123@awork2.anarazel.de
Per discussion with Bruce Momjan, Tom Lane, Robert Haas, and Peter
Geoghegan.
2015-01-29 17:49:03 +01:00
|
|
|
BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
|
2012-06-07 23:42:27 +02:00
|
|
|
LocalBufferLookupEnt *hresult;
|
|
|
|
|
|
|
|
if ((bufHdr->flags & BM_TAG_VALID) &&
|
|
|
|
RelFileNodeEquals(bufHdr->tag.rnode, rnode))
|
|
|
|
{
|
|
|
|
if (LocalRefCount[i] != 0)
|
|
|
|
elog(ERROR, "block %u of %s is still referenced (local %u)",
|
|
|
|
bufHdr->tag.blockNum,
|
|
|
|
relpathbackend(bufHdr->tag.rnode, MyBackendId,
|
|
|
|
bufHdr->tag.forkNum),
|
|
|
|
LocalRefCount[i]);
|
|
|
|
/* Remove entry from hashtable */
|
|
|
|
hresult = (LocalBufferLookupEnt *)
|
|
|
|
hash_search(LocalBufHash, (void *) &bufHdr->tag,
|
|
|
|
HASH_REMOVE, NULL);
|
|
|
|
if (!hresult) /* shouldn't happen */
|
|
|
|
elog(ERROR, "local buffer hash table corrupted");
|
|
|
|
/* Mark buffer invalid */
|
|
|
|
CLEAR_BUFFERTAG(bufHdr->tag);
|
|
|
|
bufHdr->flags = 0;
|
|
|
|
bufHdr->usage_count = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2005-03-20 00:27:11 +01:00
|
|
|
* InitLocalBuffers -
|
1997-09-07 07:04:48 +02:00
|
|
|
* init the local buffer cache. Since most queries (esp. multi-user ones)
|
2000-11-30 02:39:08 +01:00
|
|
|
* don't involve local buffers, we delay allocating actual memory for the
|
2002-08-06 04:36:35 +02:00
|
|
|
* buffers until we need them; just make the buffer headers here.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2005-03-20 00:27:11 +01:00
|
|
|
static void
|
|
|
|
InitLocalBuffers(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2005-03-20 00:27:11 +01:00
|
|
|
int nbufs = num_temp_buffers;
|
2005-03-19 18:39:43 +01:00
|
|
|
HASHCTL info;
|
1997-09-08 04:41:22 +02:00
|
|
|
int i;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2005-03-19 18:39:43 +01:00
|
|
|
/* Allocate and zero buffer headers and auxiliary arrays */
|
2005-08-21 01:26:37 +02:00
|
|
|
LocalBufferDescriptors = (BufferDesc *) calloc(nbufs, sizeof(BufferDesc));
|
|
|
|
LocalBufferBlockPointers = (Block *) calloc(nbufs, sizeof(Block));
|
|
|
|
LocalRefCount = (int32 *) calloc(nbufs, sizeof(int32));
|
|
|
|
if (!LocalBufferDescriptors || !LocalBufferBlockPointers || !LocalRefCount)
|
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
|
|
errmsg("out of memory")));
|
2005-03-19 18:39:43 +01:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
nextFreeLocalBuf = 0;
|
|
|
|
|
2005-03-19 18:39:43 +01:00
|
|
|
/* initialize fields that need to start off nonzero */
|
|
|
|
for (i = 0; i < nbufs; i++)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
Align buffer descriptors to cache line boundaries.
Benchmarks has shown that aligning the buffer descriptor array to
cache lines is important for scalability; especially on bigger,
multi-socket, machines.
Currently the array sometimes already happens to be aligned by
happenstance, depending how large previous shared memory allocations
were. That can lead to wildly varying performance results after minor
configuration changes.
In addition to aligning the start of descriptor array, also force the
size of individual descriptors to be of a common cache line size (64
bytes). That happens to already be the case on 64bit platforms, but
this way we can change the struct BufferDesc more easily.
As the alignment primarily matters in highly concurrent workloads
which probably all are 64bit these days, and the space wastage of
element alignment would be a bit more noticeable on 32bit systems, we
don't force the stride to be cacheline sized on 32bit platforms for
now. If somebody does actual performance testing, we can reevaluate
that decision by changing the definition of BUFFERDESC_PADDED_SIZE.
Discussion: 20140202151319.GD32123@awork2.anarazel.de
Per discussion with Bruce Momjan, Tom Lane, Robert Haas, and Peter
Geoghegan.
2015-01-29 17:49:03 +01:00
|
|
|
BufferDesc *buf = GetLocalBufferDescriptor(i);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* negative to indicate local buffer. This is tricky: shared buffers
|
|
|
|
* start with 0. We have to start with -2. (Note that the routine
|
|
|
|
* BufferDescriptorGetBuffer adds 1 to buf_id so our first buffer id
|
|
|
|
* is -1.)
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
|
|
|
buf->buf_id = -i - 2;
|
|
|
|
}
|
2005-03-19 18:39:43 +01:00
|
|
|
|
2005-03-20 00:27:11 +01:00
|
|
|
/* Create the lookup hash table */
|
|
|
|
MemSet(&info, 0, sizeof(info));
|
|
|
|
info.keysize = sizeof(BufferTag);
|
|
|
|
info.entrysize = sizeof(LocalBufferLookupEnt);
|
|
|
|
|
|
|
|
LocalBufHash = hash_create("Local Buffer Lookup Table",
|
|
|
|
nbufs,
|
|
|
|
&info,
|
Improve hash_create's API for selecting simple-binary-key hash functions.
Previously, if you wanted anything besides C-string hash keys, you had to
specify a custom hashing function to hash_create(). Nearly all such
callers were specifying tag_hash or oid_hash; which is tedious, and rather
error-prone, since a caller could easily miss the opportunity to optimize
by using hash_uint32 when appropriate. Replace this with a design whereby
callers using simple binary-data keys just specify HASH_BLOBS and don't
need to mess with specific support functions. hash_create() itself will
take care of optimizing when the key size is four bytes.
This nets out saving a few hundred bytes of code space, and offers
a measurable performance improvement in tidbitmap.c (which was not
exploiting the opportunity to use hash_uint32 for its 4-byte keys).
There might be some wins elsewhere too, I didn't analyze closely.
In future we could look into offering a similar optimized hashing function
for 8-byte keys. Under this design that could be done in a centralized
and machine-independent fashion, whereas getting it right for keys of
platform-dependent sizes would've been notationally painful before.
For the moment, the old way still works fine, so as not to break source
code compatibility for loadable modules. Eventually we might want to
remove tag_hash and friends from the exported API altogether, since there's
no real need for them to be explicitly referenced from outside dynahash.c.
Teodor Sigaev and Tom Lane
2014-12-18 19:36:29 +01:00
|
|
|
HASH_ELEM | HASH_BLOBS);
|
2005-03-20 00:27:11 +01:00
|
|
|
|
|
|
|
if (!LocalBufHash)
|
|
|
|
elog(ERROR, "could not initialize local buffer hash table");
|
|
|
|
|
2005-03-19 18:39:43 +01:00
|
|
|
/* Initialization done, mark buffers allocated */
|
|
|
|
NLocBuffer = nbufs;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2006-12-27 23:31:54 +01:00
|
|
|
/*
|
|
|
|
* GetLocalBufferStorage - allocate memory for a local buffer
|
|
|
|
*
|
|
|
|
* The idea of this function is to aggregate our requests for storage
|
|
|
|
* so that the memory manager doesn't see a whole lot of relatively small
|
|
|
|
* requests. Since we'll never give back a local buffer once it's created
|
|
|
|
* within a particular process, no point in burdening memmgr with separately
|
|
|
|
* managed chunks.
|
|
|
|
*/
|
|
|
|
static Block
|
|
|
|
GetLocalBufferStorage(void)
|
|
|
|
{
|
|
|
|
static char *cur_block = NULL;
|
|
|
|
static int next_buf_in_block = 0;
|
|
|
|
static int num_bufs_in_block = 0;
|
|
|
|
static int total_bufs_allocated = 0;
|
2010-08-19 18:16:20 +02:00
|
|
|
static MemoryContext LocalBufferContext = NULL;
|
2006-12-27 23:31:54 +01:00
|
|
|
|
|
|
|
char *this_buf;
|
|
|
|
|
|
|
|
Assert(total_bufs_allocated < NLocBuffer);
|
|
|
|
|
|
|
|
if (next_buf_in_block >= num_bufs_in_block)
|
|
|
|
{
|
|
|
|
/* Need to make a new request to memmgr */
|
2007-11-15 22:14:46 +01:00
|
|
|
int num_bufs;
|
2006-12-27 23:31:54 +01:00
|
|
|
|
2010-08-19 18:16:20 +02:00
|
|
|
/*
|
|
|
|
* We allocate local buffers in a context of their own, so that the
|
|
|
|
* space eaten for them is easily recognizable in MemoryContextStats
|
2014-05-06 18:12:18 +02:00
|
|
|
* output. Create the context on first use.
|
2010-08-19 18:16:20 +02:00
|
|
|
*/
|
|
|
|
if (LocalBufferContext == NULL)
|
|
|
|
LocalBufferContext =
|
|
|
|
AllocSetContextCreate(TopMemoryContext,
|
|
|
|
"LocalBufferContext",
|
|
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
|
|
|
2006-12-27 23:31:54 +01:00
|
|
|
/* Start with a 16-buffer request; subsequent ones double each time */
|
|
|
|
num_bufs = Max(num_bufs_in_block * 2, 16);
|
|
|
|
/* But not more than what we need for all remaining local bufs */
|
|
|
|
num_bufs = Min(num_bufs, NLocBuffer - total_bufs_allocated);
|
|
|
|
/* And don't overflow MaxAllocSize, either */
|
|
|
|
num_bufs = Min(num_bufs, MaxAllocSize / BLCKSZ);
|
|
|
|
|
2010-08-19 18:16:20 +02:00
|
|
|
cur_block = (char *) MemoryContextAlloc(LocalBufferContext,
|
2006-12-27 23:31:54 +01:00
|
|
|
num_bufs * BLCKSZ);
|
|
|
|
next_buf_in_block = 0;
|
|
|
|
num_bufs_in_block = num_bufs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate next buffer in current memory block */
|
|
|
|
this_buf = cur_block + next_buf_in_block * BLCKSZ;
|
|
|
|
next_buf_in_block++;
|
|
|
|
total_bufs_allocated++;
|
|
|
|
|
|
|
|
return (Block) this_buf;
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2014-06-20 11:06:42 +02:00
|
|
|
* CheckForLocalBufferLeaks - ensure this backend holds no local buffer pins
|
2000-11-30 20:03:26 +01:00
|
|
|
*
|
2014-06-20 11:06:42 +02:00
|
|
|
* This is just like CheckBufferLeaks(), but for local buffers.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2014-06-20 11:06:42 +02:00
|
|
|
static void
|
|
|
|
CheckForLocalBufferLeaks(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2004-10-16 20:57:26 +02:00
|
|
|
#ifdef USE_ASSERT_CHECKING
|
2014-06-20 11:06:42 +02:00
|
|
|
if (LocalRefCount)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2013-03-15 17:26:26 +01:00
|
|
|
int RefCountErrors = 0;
|
2005-08-08 21:44:22 +02:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < NLocBuffer; i++)
|
|
|
|
{
|
2013-03-15 17:26:26 +01:00
|
|
|
if (LocalRefCount[i] != 0)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
Buffer b = -i - 1;
|
2013-03-15 17:26:26 +01:00
|
|
|
|
|
|
|
PrintBufferLeakWarning(b);
|
|
|
|
RefCountErrors++;
|
|
|
|
}
|
2005-08-08 21:44:22 +02:00
|
|
|
}
|
2013-03-15 17:26:26 +01:00
|
|
|
Assert(RefCountErrors == 0);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2004-10-16 20:57:26 +02:00
|
|
|
#endif
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2005-03-18 17:16:09 +01:00
|
|
|
|
2014-06-20 11:06:42 +02:00
|
|
|
/*
|
|
|
|
* AtEOXact_LocalBuffers - clean up at end of transaction.
|
|
|
|
*
|
|
|
|
* This is just like AtEOXact_Buffers, but for local buffers.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
AtEOXact_LocalBuffers(bool isCommit)
|
|
|
|
{
|
|
|
|
CheckForLocalBufferLeaks();
|
|
|
|
}
|
|
|
|
|
2005-03-18 17:16:09 +01:00
|
|
|
/*
|
|
|
|
* AtProcExit_LocalBuffers - ensure we have dropped pins during backend exit.
|
|
|
|
*
|
2014-06-20 11:06:42 +02:00
|
|
|
* This is just like AtProcExit_Buffers, but for local buffers.
|
2005-03-18 17:16:09 +01:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
AtProcExit_LocalBuffers(void)
|
|
|
|
{
|
2014-06-20 11:06:42 +02:00
|
|
|
/*
|
|
|
|
* We shouldn't be holding any remaining pins; if we are, and assertions
|
|
|
|
* aren't enabled, we'll fail later in DropRelFileNodeBuffers while trying
|
|
|
|
* to drop the temp rels.
|
|
|
|
*/
|
|
|
|
CheckForLocalBufferLeaks();
|
2005-03-18 17:16:09 +01:00
|
|
|
}
|