It turns out that TablespaceCreateDbspace fails badly if a relcache flush

occurs when it tries to heap_open pg_tablespace.  When control returns to
smgrcreate, that routine will be holding a dangling pointer to a closed
SMgrRelation, resulting in mayhem.  This is of course a consequence of
the violation of proper module layering inherent in having smgr.c call
a tablespace command routine, but the simplest fix seems to be to change
the locking mechanism.  There's no real need for TablespaceCreateDbspace
to touch pg_tablespace at all --- it's only opening it as a way of locking
against a parallel DROP TABLESPACE command.  A much better answer is to
create a special-purpose LWLock to interlock these two operations.
This drops TablespaceCreateDbspace quite a few layers down the food chain
and makes it something reasonably safe for smgr to call.
This commit is contained in:
Tom Lane 2006-01-19 04:45:38 +00:00
parent b0be247e38
commit 4513d9deda
2 changed files with 27 additions and 30 deletions

View File

@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.28 2005/10/15 02:49:15 momjian Exp $
* $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.29 2006/01/19 04:45:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -83,12 +83,9 @@ static void set_short_version(const char *path);
* If tablespaces are not supported, this is just a no-op; CREATE DATABASE
* is expected to create the default subdirectory for the database.
*
* isRedo indicates that we are creating an object during WAL replay;
* we can skip doing locking in that case (and should do so to avoid
* any possible problems with pg_tablespace not being valid).
*
* Also, when isRedo is true, we will cope with the possibility of the
* tablespace not being there either --- this could happen if we are
* isRedo indicates that we are creating an object during WAL replay.
* In this case we will cope with the possibility of the tablespace
* directory not being there either --- this could happen if we are
* replaying an operation on a table in a subsequently-dropped tablespace.
* We handle this by making a directory in the place where the tablespace
* symlink would normally be. This isn't an exact replay of course, but
@ -118,16 +115,10 @@ TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
if (errno == ENOENT)
{
/*
* Acquire ExclusiveLock on pg_tablespace to ensure that no DROP
* TABLESPACE or TablespaceCreateDbspace is running concurrently.
* Simple reads from pg_tablespace are OK.
* Acquire TablespaceCreateLock to ensure that no DROP TABLESPACE
* or TablespaceCreateDbspace is running concurrently.
*/
Relation rel;
if (!isRedo)
rel = heap_open(TableSpaceRelationId, ExclusiveLock);
else
rel = NULL;
LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
/*
* Recheck to see if someone created the directory while we were
@ -166,9 +157,7 @@ TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
}
}
/* OK to drop the exclusive lock */
if (!isRedo)
heap_close(rel, ExclusiveLock);
LWLockRelease(TablespaceCreateLock);
}
else
{
@ -401,15 +390,11 @@ DropTableSpace(DropTableSpaceStmt *stmt)
/* don't call this in a transaction block */
PreventTransactionChain((void *) stmt, "DROP TABLESPACE");
/*
* Acquire ExclusiveLock on pg_tablespace to ensure that no one else is
* trying to do DROP TABLESPACE or TablespaceCreateDbspace concurrently.
*/
rel = heap_open(TableSpaceRelationId, ExclusiveLock);
/*
* Find the target tuple
*/
rel = heap_open(TableSpaceRelationId, RowExclusiveLock);
ScanKeyInit(&entry[0],
Anum_pg_tablespace_spcname,
BTEqualStrategyNumber, F_NAMEEQ,
@ -448,6 +433,12 @@ DropTableSpace(DropTableSpaceStmt *stmt)
*/
deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid);
/*
* Acquire TablespaceCreateLock to ensure that no TablespaceCreateDbspace
* is running concurrently.
*/
LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
/*
* Try to remove the physical infrastructure
*/
@ -471,6 +462,11 @@ DropTableSpace(DropTableSpaceStmt *stmt)
(void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata);
}
/*
* Allow TablespaceCreateDbspace again.
*/
LWLockRelease(TablespaceCreateLock);
/* We keep the lock on pg_tablespace until commit */
heap_close(rel, NoLock);
#else /* !HAVE_SYMLINK */
@ -507,10 +503,10 @@ remove_tablespace_directories(Oid tablespaceoid, bool redo)
* use the tablespace from that database will simply recreate the
* subdirectory via TablespaceCreateDbspace.)
*
* Since we hold exclusive lock, no one else should be creating any fresh
* subdirectories in parallel. It is possible that new files are being
* created within subdirectories, though, so the rmdir call could fail.
* Worst consequence is a less friendly error message.
* Since we hold TablespaceCreateLock, no one else should be creating any
* fresh subdirectories in parallel. It is possible that new files are
* being created within subdirectories, though, so the rmdir call could
* fail. Worst consequence is a less friendly error message.
*/
dirdesc = AllocateDir(location);
if (dirdesc == NULL)

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.25 2006/01/04 21:06:32 tgl Exp $
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.26 2006/01/19 04:45:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,6 +46,7 @@ typedef enum LWLockId
RelCacheInitLock,
BgWriterCommLock,
TwoPhaseStateLock,
TablespaceCreateLock,
FirstLockMgrLock, /* must be last except for MaxDynamicLWLock */
MaxDynamicLWLock = 1000000000