diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index dc5872f988..ab9a53b27c 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -295,12 +295,18 @@ static const struct dropmsgstrings dropmsgstringarray[] = { {'\0', 0, NULL, NULL, NULL, NULL} }; +/* communication between RemoveRelations and RangeVarCallbackForDropRelation */ struct DropRelationCallbackState { - char relkind; + /* These fields are set by RemoveRelations: */ + char expected_relkind; + LOCKMODE heap_lockmode; + /* These fields are state to track which subsidiary locks are held: */ Oid heapOid; Oid partParentOid; - bool concurrent; + /* These fields are passed back by RangeVarCallbackForDropRelation: */ + char actual_relkind; + char actual_relpersistence; }; /* Alter table target-type flags for ATSimplePermissions */ @@ -1416,10 +1422,13 @@ RemoveRelations(DropStmt *drop) AcceptInvalidationMessages(); /* Look up the appropriate relation using namespace search. */ - state.relkind = relkind; + state.expected_relkind = relkind; + state.heap_lockmode = drop->concurrent ? + ShareUpdateExclusiveLock : AccessExclusiveLock; + /* We must initialize these fields to show that no locks are held: */ state.heapOid = InvalidOid; state.partParentOid = InvalidOid; - state.concurrent = drop->concurrent; + relOid = RangeVarGetRelidExtended(rel, lockmode, RVR_MISSING_OK, RangeVarCallbackForDropRelation, (void *) &state); @@ -1433,10 +1442,10 @@ RemoveRelations(DropStmt *drop) /* * Decide if concurrent mode needs to be used here or not. The - * relation persistence cannot be known without its OID. + * callback retrieved the rel's persistence for us. */ if (drop->concurrent && - get_rel_persistence(relOid) != RELPERSISTENCE_TEMP) + state.actual_relpersistence != RELPERSISTENCE_TEMP) { Assert(list_length(drop->objects) == 1 && drop->removeType == OBJECT_INDEX); @@ -1448,12 +1457,24 @@ RemoveRelations(DropStmt *drop) * either. */ if ((flags & PERFORM_DELETION_CONCURRENTLY) != 0 && - get_rel_relkind(relOid) == RELKIND_PARTITIONED_INDEX) + state.actual_relkind == RELKIND_PARTITIONED_INDEX) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot drop partitioned index \"%s\" concurrently", rel->relname))); + /* + * If we're told to drop a partitioned index, we must acquire lock on + * all the children of its parent partitioned table before proceeding. + * Otherwise we'd try to lock the child index partitions before their + * tables, leading to potential deadlock against other sessions that + * will lock those objects in the other order. + */ + if (state.actual_relkind == RELKIND_PARTITIONED_INDEX) + (void) find_all_inheritors(state.heapOid, + state.heap_lockmode, + NULL); + /* OK, we're ready to delete this one */ obj.classId = RelationRelationId; obj.objectId = relOid; @@ -1479,7 +1500,6 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, { HeapTuple tuple; struct DropRelationCallbackState *state; - char relkind; char expected_relkind; bool is_partition; Form_pg_class classform; @@ -1487,9 +1507,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, bool invalid_system_index = false; state = (struct DropRelationCallbackState *) arg; - relkind = state->relkind; - heap_lockmode = state->concurrent ? - ShareUpdateExclusiveLock : AccessExclusiveLock; + heap_lockmode = state->heap_lockmode; /* * If we previously locked some other index's heap, and the name we're @@ -1523,6 +1541,10 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, classform = (Form_pg_class) GETSTRUCT(tuple); is_partition = classform->relispartition; + /* Pass back some data to save lookups in RemoveRelations */ + state->actual_relkind = classform->relkind; + state->actual_relpersistence = classform->relpersistence; + /* * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE, * but RemoveRelations() can only pass one relkind for a given relation. @@ -1538,13 +1560,15 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, else expected_relkind = classform->relkind; - if (relkind != expected_relkind) - DropErrorMsgWrongType(rel->relname, classform->relkind, relkind); + if (state->expected_relkind != expected_relkind) + DropErrorMsgWrongType(rel->relname, classform->relkind, + state->expected_relkind); /* Allow DROP to either table owner or schema owner */ if (!pg_class_ownercheck(relOid, GetUserId()) && !pg_namespace_ownercheck(classform->relnamespace, GetUserId())) - aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)), + aclcheck_error(ACLCHECK_NOT_OWNER, + get_relkind_objtype(classform->relkind), rel->relname); /* @@ -1553,7 +1577,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, * only concerns indexes of toast relations that became invalid during a * REINDEX CONCURRENTLY process. */ - if (IsSystemClass(relOid, classform) && relkind == RELKIND_INDEX) + if (IsSystemClass(relOid, classform) && classform->relkind == RELKIND_INDEX) { HeapTuple locTuple; Form_pg_index indexform; @@ -1589,9 +1613,10 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, * locking the index. index_drop() will need this anyway, and since * regular queries lock tables before their indexes, we risk deadlock if * we do it the other way around. No error if we don't find a pg_index - * entry, though --- the relation may have been dropped. + * entry, though --- the relation may have been dropped. Note that this + * code will execute for either plain or partitioned indexes. */ - if ((relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX) && + if (expected_relkind == RELKIND_INDEX && relOid != oldRelOid) { state->heapOid = IndexGetRelation(relOid, true); @@ -1602,7 +1627,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, /* * Similarly, if the relation is a partition, we must acquire lock on its * parent before locking the partition. That's because queries lock the - * parent before its partitions, so we risk deadlock it we do it the other + * parent before its partitions, so we risk deadlock if we do it the other * way around. */ if (is_partition && relOid != oldRelOid) diff --git a/src/test/isolation/expected/partition-drop-index-locking.out b/src/test/isolation/expected/partition-drop-index-locking.out new file mode 100644 index 0000000000..9acd51dfde --- /dev/null +++ b/src/test/isolation/expected/partition-drop-index-locking.out @@ -0,0 +1,100 @@ +Parsed test spec with 3 sessions + +starting permutation: s1begin s1lock s2begin s2drop s1select s3getlocks s1commit s3getlocks s2commit +step s1begin: BEGIN; +step s1lock: LOCK TABLE part_drop_index_locking_subpart_child IN ACCESS SHARE MODE; +step s2begin: BEGIN; +step s2drop: DROP INDEX part_drop_index_locking_idx; +step s1select: SELECT * FROM part_drop_index_locking_subpart_child; +id +-- +(0 rows) + +step s3getlocks: + SELECT s.query, c.relname, l.mode, l.granted + FROM pg_locks l + JOIN pg_class c ON l.relation = c.oid + JOIN pg_stat_activity s ON l.pid = s.pid + WHERE c.relname LIKE 'part_drop_index_locking%' + ORDER BY s.query, c.relname, l.mode, l.granted; + +query |relname |mode |granted +----------------------------------------------------+---------------------------------------------+-------------------+------- +DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking_idx |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking_subpart |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking_subpart_child |AccessExclusiveLock|f +SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child |AccessShareLock |t +SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx |AccessShareLock |t +SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx1|AccessShareLock |t +(7 rows) + +step s1commit: COMMIT; +step s2drop: <... completed> +step s3getlocks: + SELECT s.query, c.relname, l.mode, l.granted + FROM pg_locks l + JOIN pg_class c ON l.relation = c.oid + JOIN pg_stat_activity s ON l.pid = s.pid + WHERE c.relname LIKE 'part_drop_index_locking%' + ORDER BY s.query, c.relname, l.mode, l.granted; + +query |relname |mode |granted +---------------------------------------+--------------------------------------------+-------------------+------- +DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_idx |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart_child |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart_child_id_idx|AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart_id_idx |AccessExclusiveLock|t +(6 rows) + +step s2commit: COMMIT; + +starting permutation: s1begin s1lock s2begin s2dropsub s1select s3getlocks s1commit s3getlocks s2commit +step s1begin: BEGIN; +step s1lock: LOCK TABLE part_drop_index_locking_subpart_child IN ACCESS SHARE MODE; +step s2begin: BEGIN; +step s2dropsub: DROP INDEX part_drop_index_locking_subpart_idx; +step s1select: SELECT * FROM part_drop_index_locking_subpart_child; +id +-- +(0 rows) + +step s3getlocks: + SELECT s.query, c.relname, l.mode, l.granted + FROM pg_locks l + JOIN pg_class c ON l.relation = c.oid + JOIN pg_stat_activity s ON l.pid = s.pid + WHERE c.relname LIKE 'part_drop_index_locking%' + ORDER BY s.query, c.relname, l.mode, l.granted; + +query |relname |mode |granted +----------------------------------------------------+---------------------------------------------+-------------------+------- +DROP INDEX part_drop_index_locking_subpart_idx; |part_drop_index_locking_subpart |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_subpart_idx; |part_drop_index_locking_subpart_child |AccessExclusiveLock|f +DROP INDEX part_drop_index_locking_subpart_idx; |part_drop_index_locking_subpart_idx |AccessExclusiveLock|t +SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child |AccessShareLock |t +SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx |AccessShareLock |t +SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx1|AccessShareLock |t +(6 rows) + +step s1commit: COMMIT; +step s2dropsub: <... completed> +step s3getlocks: + SELECT s.query, c.relname, l.mode, l.granted + FROM pg_locks l + JOIN pg_class c ON l.relation = c.oid + JOIN pg_stat_activity s ON l.pid = s.pid + WHERE c.relname LIKE 'part_drop_index_locking%' + ORDER BY s.query, c.relname, l.mode, l.granted; + +query |relname |mode |granted +-----------------------------------------------+---------------------------------------------+-------------------+------- +DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart_child |AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart_child_id_idx1|AccessExclusiveLock|t +DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart_idx |AccessExclusiveLock|t +(4 rows) + +step s2commit: COMMIT; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 0dae483e82..8e87098150 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -90,6 +90,7 @@ test: predicate-hash test: predicate-gist test: predicate-gin test: partition-concurrent-attach +test: partition-drop-index-locking test: partition-key-update-1 test: partition-key-update-2 test: partition-key-update-3 diff --git a/src/test/isolation/specs/partition-drop-index-locking.spec b/src/test/isolation/specs/partition-drop-index-locking.spec new file mode 100644 index 0000000000..34e8b528b8 --- /dev/null +++ b/src/test/isolation/specs/partition-drop-index-locking.spec @@ -0,0 +1,47 @@ +# Verify that DROP INDEX properly locks all downward sub-partitions +# and partitions before locking the indexes. + +setup +{ + CREATE TABLE part_drop_index_locking (id int) PARTITION BY RANGE(id); + CREATE TABLE part_drop_index_locking_subpart PARTITION OF part_drop_index_locking FOR VALUES FROM (1) TO (100) PARTITION BY RANGE(id); + CREATE TABLE part_drop_index_locking_subpart_child PARTITION OF part_drop_index_locking_subpart FOR VALUES FROM (1) TO (100); + CREATE INDEX part_drop_index_locking_idx ON part_drop_index_locking(id); + CREATE INDEX part_drop_index_locking_subpart_idx ON part_drop_index_locking_subpart(id); +} + +teardown +{ + DROP TABLE part_drop_index_locking; +} + +# SELECT will take AccessShare lock first on the table and then on its index. +# We can simulate the case where DROP INDEX starts between those steps +# by manually taking the table lock beforehand. +session s1 +step s1begin { BEGIN; } +step s1lock { LOCK TABLE part_drop_index_locking_subpart_child IN ACCESS SHARE MODE; } +step s1select { SELECT * FROM part_drop_index_locking_subpart_child; } +step s1commit { COMMIT; } + +session s2 +step s2begin { BEGIN; } +step s2drop { DROP INDEX part_drop_index_locking_idx; } +step s2dropsub { DROP INDEX part_drop_index_locking_subpart_idx; } +step s2commit { COMMIT; } + +session s3 +step s3getlocks { + SELECT s.query, c.relname, l.mode, l.granted + FROM pg_locks l + JOIN pg_class c ON l.relation = c.oid + JOIN pg_stat_activity s ON l.pid = s.pid + WHERE c.relname LIKE 'part_drop_index_locking%' + ORDER BY s.query, c.relname, l.mode, l.granted; +} + +# Run DROP INDEX on top partitioned table +permutation s1begin s1lock s2begin s2drop(s1commit) s1select s3getlocks s1commit s3getlocks s2commit + +# Run DROP INDEX on top sub-partition table +permutation s1begin s1lock s2begin s2dropsub(s1commit) s1select s3getlocks s1commit s3getlocks s2commit