It turns out that TablespaceCreateDbspace fails badly if a relcache flush

occurs when it tries to heap_open pg_tablespace.  When control returns to
smgrcreate, that routine will be holding a dangling pointer to a closed
SMgrRelation, resulting in mayhem.  This is of course a consequence of
the violation of proper module layering inherent in having smgr.c call
a tablespace command routine, but the simplest fix seems to be to change
the locking mechanism.  There's no real need for TablespaceCreateDbspace
to touch pg_tablespace at all --- it's only opening it as a way of locking
against a parallel DROP TABLESPACE command.  A much better answer is to
create a special-purpose LWLock to interlock these two operations.
This drops TablespaceCreateDbspace quite a few layers down the food chain
and makes it something reasonably safe for smgr to call.
This commit is contained in:
Tom Lane 2006-01-19 04:45:38 +00:00
parent b0be247e38
commit 4513d9deda
2 changed files with 27 additions and 30 deletions

View File

@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.28 2005/10/15 02:49:15 momjian Exp $ * $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.29 2006/01/19 04:45:38 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -83,12 +83,9 @@ static void set_short_version(const char *path);
* If tablespaces are not supported, this is just a no-op; CREATE DATABASE * If tablespaces are not supported, this is just a no-op; CREATE DATABASE
* is expected to create the default subdirectory for the database. * is expected to create the default subdirectory for the database.
* *
* isRedo indicates that we are creating an object during WAL replay; * isRedo indicates that we are creating an object during WAL replay.
* we can skip doing locking in that case (and should do so to avoid * In this case we will cope with the possibility of the tablespace
* any possible problems with pg_tablespace not being valid). * directory not being there either --- this could happen if we are
*
* Also, when isRedo is true, we will cope with the possibility of the
* tablespace not being there either --- this could happen if we are
* replaying an operation on a table in a subsequently-dropped tablespace. * replaying an operation on a table in a subsequently-dropped tablespace.
* We handle this by making a directory in the place where the tablespace * We handle this by making a directory in the place where the tablespace
* symlink would normally be. This isn't an exact replay of course, but * symlink would normally be. This isn't an exact replay of course, but
@ -118,16 +115,10 @@ TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
if (errno == ENOENT) if (errno == ENOENT)
{ {
/* /*
* Acquire ExclusiveLock on pg_tablespace to ensure that no DROP * Acquire TablespaceCreateLock to ensure that no DROP TABLESPACE
* TABLESPACE or TablespaceCreateDbspace is running concurrently. * or TablespaceCreateDbspace is running concurrently.
* Simple reads from pg_tablespace are OK.
*/ */
Relation rel; LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
if (!isRedo)
rel = heap_open(TableSpaceRelationId, ExclusiveLock);
else
rel = NULL;
/* /*
* Recheck to see if someone created the directory while we were * Recheck to see if someone created the directory while we were
@ -166,9 +157,7 @@ TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
} }
} }
/* OK to drop the exclusive lock */ LWLockRelease(TablespaceCreateLock);
if (!isRedo)
heap_close(rel, ExclusiveLock);
} }
else else
{ {
@ -401,15 +390,11 @@ DropTableSpace(DropTableSpaceStmt *stmt)
/* don't call this in a transaction block */ /* don't call this in a transaction block */
PreventTransactionChain((void *) stmt, "DROP TABLESPACE"); PreventTransactionChain((void *) stmt, "DROP TABLESPACE");
/*
* Acquire ExclusiveLock on pg_tablespace to ensure that no one else is
* trying to do DROP TABLESPACE or TablespaceCreateDbspace concurrently.
*/
rel = heap_open(TableSpaceRelationId, ExclusiveLock);
/* /*
* Find the target tuple * Find the target tuple
*/ */
rel = heap_open(TableSpaceRelationId, RowExclusiveLock);
ScanKeyInit(&entry[0], ScanKeyInit(&entry[0],
Anum_pg_tablespace_spcname, Anum_pg_tablespace_spcname,
BTEqualStrategyNumber, F_NAMEEQ, BTEqualStrategyNumber, F_NAMEEQ,
@ -448,6 +433,12 @@ DropTableSpace(DropTableSpaceStmt *stmt)
*/ */
deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid); deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid);
/*
* Acquire TablespaceCreateLock to ensure that no TablespaceCreateDbspace
* is running concurrently.
*/
LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
/* /*
* Try to remove the physical infrastructure * Try to remove the physical infrastructure
*/ */
@ -471,6 +462,11 @@ DropTableSpace(DropTableSpaceStmt *stmt)
(void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata); (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata);
} }
/*
* Allow TablespaceCreateDbspace again.
*/
LWLockRelease(TablespaceCreateLock);
/* We keep the lock on pg_tablespace until commit */ /* We keep the lock on pg_tablespace until commit */
heap_close(rel, NoLock); heap_close(rel, NoLock);
#else /* !HAVE_SYMLINK */ #else /* !HAVE_SYMLINK */
@ -507,10 +503,10 @@ remove_tablespace_directories(Oid tablespaceoid, bool redo)
* use the tablespace from that database will simply recreate the * use the tablespace from that database will simply recreate the
* subdirectory via TablespaceCreateDbspace.) * subdirectory via TablespaceCreateDbspace.)
* *
* Since we hold exclusive lock, no one else should be creating any fresh * Since we hold TablespaceCreateLock, no one else should be creating any
* subdirectories in parallel. It is possible that new files are being * fresh subdirectories in parallel. It is possible that new files are
* created within subdirectories, though, so the rmdir call could fail. * being created within subdirectories, though, so the rmdir call could
* Worst consequence is a less friendly error message. * fail. Worst consequence is a less friendly error message.
*/ */
dirdesc = AllocateDir(location); dirdesc = AllocateDir(location);
if (dirdesc == NULL) if (dirdesc == NULL)

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.25 2006/01/04 21:06:32 tgl Exp $ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.26 2006/01/19 04:45:38 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -46,6 +46,7 @@ typedef enum LWLockId
RelCacheInitLock, RelCacheInitLock,
BgWriterCommLock, BgWriterCommLock,
TwoPhaseStateLock, TwoPhaseStateLock,
TablespaceCreateLock,
FirstLockMgrLock, /* must be last except for MaxDynamicLWLock */ FirstLockMgrLock, /* must be last except for MaxDynamicLWLock */
MaxDynamicLWLock = 1000000000 MaxDynamicLWLock = 1000000000