diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index db0bda6699..583bb20933 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.55 2005/04/29 22:28:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.56 2005/05/07 21:32:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -241,13 +241,6 @@ RelationGetBufferForTuple(Relation relation, Size len, */ buffer = ReadBuffer(relation, P_NEW); - /* - * Release the file-extension lock; it's now OK for someone else to - * extend the relation some more. - */ - if (needLock) - UnlockRelationForExtension(relation, ExclusiveLock); - /* * We can be certain that locking the otherBuffer first is OK, since * it must have a lower page number. @@ -256,9 +249,22 @@ RelationGetBufferForTuple(Relation relation, Size len, LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); /* - * We need to initialize the empty new page. + * Now acquire lock on the new page. */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + /* + * Release the file-extension lock; it's now OK for someone else to + * extend the relation some more. Note that we cannot release this + * lock before we have buffer lock on the new page, or we risk a + * race condition against vacuumlazy.c --- see comments therein. + */ + if (needLock) + UnlockRelationForExtension(relation, ExclusiveLock); + + /* + * We need to initialize the empty new page. + */ pageHeader = (Page) BufferGetPage(buffer); Assert(PageIsNew((PageHeader) pageHeader)); PageInit(pageHeader, BufferGetPageSize(buffer), 0); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 222ed76435..ea02325318 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.83 2005/04/29 22:28:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.84 2005/05/07 21:32:23 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -491,18 +491,21 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) buf = ReadBuffer(rel, P_NEW); + /* Acquire buffer lock on new page */ + LockBuffer(buf, BT_WRITE); + /* - * Release the file-extension lock; it's now OK for someone else - * to extend the relation some more. + * Release the file-extension lock; it's now OK for someone else to + * extend the relation some more. Note that we cannot release this + * lock before we have buffer lock on the new page, or we risk a + * race condition against btvacuumcleanup --- see comments therein. */ if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); - /* Acquire appropriate buffer lock on new page */ - LockBuffer(buf, access); - /* Initialize the new page before returning it */ page = BufferGetPage(buf); + Assert(PageIsNew((PageHeader) page)); _bt_pageinit(page, BufferGetPageSize(buf)); } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 010af6dd5c..b57ab37a5b 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.128 2005/05/06 17:24:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.129 2005/05/07 21:32:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -739,11 +739,35 @@ btvacuumcleanup(PG_FUNCTION_ARGS) BlockNumber pages_deleted = 0; MemoryContext mycontext; MemoryContext oldcontext; + bool needLock; Assert(stats != NULL); + /* + * First find out the number of pages in the index. We must acquire + * the relation-extension lock while doing this to avoid a race + * condition: if someone else is extending the relation, there is + * a window where bufmgr/smgr have created a new all-zero page but + * it hasn't yet been write-locked by _bt_getbuf(). If we manage to + * scan such a page here, we'll improperly assume it can be recycled. + * Taking the lock synchronizes things enough to prevent a problem: + * either num_pages won't include the new page, or _bt_getbuf already + * has write lock on the buffer and it will be fully initialized before + * we can examine it. (See also vacuumlazy.c, which has the same issue.) + * + * We can skip locking for new or temp relations, + * however, since no one else could be accessing them. + */ + needLock = !RELATION_IS_LOCAL(rel); + + if (needLock) + LockRelationForExtension(rel, ExclusiveLock); + num_pages = RelationGetNumberOfBlocks(rel); + if (needLock) + UnlockRelationForExtension(rel, ExclusiveLock); + /* No point in remembering more than MaxFSMPages pages */ maxFreePages = MaxFSMPages; if ((BlockNumber) maxFreePages > num_pages) diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index d6861be715..d2b6da9ef8 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.52 2005/03/25 22:51:31 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.53 2005/05/07 21:32:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -280,8 +280,30 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, if (PageIsNew(page)) { - /* Not sure we still need to handle this case, but... */ + /* + * An all-zeroes page could be left over if a backend extends + * the relation but crashes before initializing the page. + * Reclaim such pages for use. + * + * We have to be careful here because we could be looking at + * a page that someone has just added to the relation and not + * yet been able to initialize (see RelationGetBufferForTuple). + * To interlock against that, release the buffer read lock + * (which we must do anyway) and grab the relation extension + * lock before re-locking in exclusive mode. If the page is + * still uninitialized by then, it must be left over from a + * crashed backend, and we can initialize it. + * + * We don't really need the relation lock when this is a new + * or temp relation, but it's probably not worth the code space + * to check that, since this surely isn't a critical path. + * + * Note: the comparable code in vacuum.c need not do all this + * because it's got exclusive lock on the whole relation. + */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); + LockRelationForExtension(onerel, ExclusiveLock); + UnlockRelationForExtension(onerel, ExclusiveLock); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (PageIsNew(page)) {