Allow a partdesc-omitting-partitions to be cached

Makes partition descriptor acquisition faster during the transient
period in which a partition is in the process of being detached.

This also adds the restriction that only one partition can be in
pending-detach state for a partitioned table.

While at it, return find_inheritance_children() API to what it was
before 71f4c8c6f7, and create a separate
find_inheritance_children_extended() that returns detailed info about
detached partitions.

(This incidentally fixes a bug in 8aba932251 whereby a memory context
holding a transient partdesc is reparented to a NULL PortalContext,
leading to permanent leak of that memory.  The fix is to no longer rely
on reparenting contexts to PortalContext.   Reported by Amit Langote.)

Per gripe from Amit Langote
Discussion: https://postgr.es/m/CA+HiwqFgpP1LxJZOBYGt9rpvTjXXkg5qG2+Xch2Z1Q7KrqZR1A@mail.gmail.com
This commit is contained in:
Alvaro Herrera 2021-04-28 15:44:35 -04:00
parent c93f8f3b8d
commit d6b8d29419
No known key found for this signature in database
GPG Key ID: 1C20ACB9D5C564AE
10 changed files with 330 additions and 74 deletions

View File

@ -986,6 +986,8 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
If <literal>FINALIZE</literal> is specified, a previous If <literal>FINALIZE</literal> is specified, a previous
<literal>DETACH CONCURRENTLY</literal> invocation that was cancelled or <literal>DETACH CONCURRENTLY</literal> invocation that was cancelled or
interrupted is completed. interrupted is completed.
At most one partition in a partitioned table can be pending detach at
a time.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>

View File

@ -52,6 +52,22 @@ typedef struct SeenRelsEntry
* then no locks are acquired, but caller must beware of race conditions * then no locks are acquired, but caller must beware of race conditions
* against possible DROPs of child relations. * against possible DROPs of child relations.
* *
* Partitions marked as being detached are omitted; see
* find_inheritance_children_extended for details.
*/
List *
find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
{
return find_inheritance_children_extended(parentrelId, true, lockmode,
NULL, NULL);
}
/*
* find_inheritance_children_extended
*
* As find_inheritance_children, with more options regarding detached
* partitions.
*
* If a partition's pg_inherits row is marked "detach pending", * If a partition's pg_inherits row is marked "detach pending",
* *detached_exist (if not null) is set true. * *detached_exist (if not null) is set true.
* *
@ -60,11 +76,13 @@ typedef struct SeenRelsEntry
* marked "detach pending" is visible to that snapshot, then that partition is * marked "detach pending" is visible to that snapshot, then that partition is
* omitted from the output list. This makes partitions invisible depending on * omitted from the output list. This makes partitions invisible depending on
* whether the transaction that marked those partitions as detached appears * whether the transaction that marked those partitions as detached appears
* committed to the active snapshot. * committed to the active snapshot. In addition, *detached_xmin (if not null)
* is set to the xmin of the row of the detached partition.
*/ */
List * List *
find_inheritance_children(Oid parentrelId, bool omit_detached, find_inheritance_children_extended(Oid parentrelId, bool omit_detached,
LOCKMODE lockmode, bool *detached_exist) LOCKMODE lockmode, bool *detached_exist,
TransactionId *detached_xmin)
{ {
List *list = NIL; List *list = NIL;
Relation relation; Relation relation;
@ -132,9 +150,34 @@ find_inheritance_children(Oid parentrelId, bool omit_detached,
snap = GetActiveSnapshot(); snap = GetActiveSnapshot();
if (!XidInMVCCSnapshot(xmin, snap)) if (!XidInMVCCSnapshot(xmin, snap))
{
if (detached_xmin)
{
/*
* Two detached partitions should not occur (see
* checks in MarkInheritDetached), but if they do,
* track the newer of the two. Make sure to warn the
* user, so that they can clean up. Since this is
* just a cross-check against potentially corrupt
* catalogs, we don't make it a full-fledged error
* message.
*/
if (*detached_xmin != InvalidTransactionId)
{
elog(WARNING, "more than one partition pending detach found for table with OID %u",
parentrelId);
if (TransactionIdFollows(xmin, *detached_xmin))
*detached_xmin = xmin;
}
else
*detached_xmin = xmin;
}
/* Don't add the partition to the output list */
continue; continue;
} }
} }
}
inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid; inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid;
if (numoids >= maxoids) if (numoids >= maxoids)
@ -247,8 +290,7 @@ find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
ListCell *lc; ListCell *lc;
/* Get the direct children of this rel */ /* Get the direct children of this rel */
currentchildren = find_inheritance_children(currentrel, true, currentchildren = find_inheritance_children(currentrel, lockmode);
lockmode, NULL);
/* /*
* Add to the queue only those children not already seen. This avoids * Add to the queue only those children not already seen. This avoids

View File

@ -3492,7 +3492,7 @@ renameatt_internal(Oid myrelid,
* expected_parents will only be 0 if we are not already recursing. * expected_parents will only be 0 if we are not already recursing.
*/ */
if (expected_parents == 0 && if (expected_parents == 0 &&
find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) find_inheritance_children(myrelid, NoLock) != NIL)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION), (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited column \"%s\" must be renamed in child tables too", errmsg("inherited column \"%s\" must be renamed in child tables too",
@ -3691,7 +3691,7 @@ rename_constraint_internal(Oid myrelid,
else else
{ {
if (expected_parents == 0 && if (expected_parents == 0 &&
find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) find_inheritance_children(myrelid, NoLock) != NIL)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION), (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited constraint \"%s\" must be renamed in child tables too", errmsg("inherited constraint \"%s\" must be renamed in child tables too",
@ -6565,7 +6565,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
*/ */
if (colDef->identity && if (colDef->identity &&
recurse && recurse &&
find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) find_inheritance_children(myrelid, NoLock) != NIL)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION), (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("cannot recursively add identity column to table that has child tables"))); errmsg("cannot recursively add identity column to table that has child tables")));
@ -6811,7 +6811,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
* use find_all_inheritors to do it in one pass. * use find_all_inheritors to do it in one pass.
*/ */
children = children =
find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); find_inheritance_children(RelationGetRelid(rel), lockmode);
/* /*
* If we are told not to recurse, there had better not be any child * If we are told not to recurse, there had better not be any child
@ -7674,7 +7674,7 @@ ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recurs
* resulting state can be properly dumped and restored. * resulting state can be properly dumped and restored.
*/ */
if (!recurse && if (!recurse &&
find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL)) find_inheritance_children(RelationGetRelid(rel), lockmode))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too"))); errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too")));
@ -8282,7 +8282,7 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
* use find_all_inheritors to do it in one pass. * use find_all_inheritors to do it in one pass.
*/ */
children = children =
find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); find_inheritance_children(RelationGetRelid(rel), lockmode);
if (children) if (children)
{ {
@ -8770,7 +8770,7 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
* use find_all_inheritors to do it in one pass. * use find_all_inheritors to do it in one pass.
*/ */
children = children =
find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); find_inheritance_children(RelationGetRelid(rel), lockmode);
/* /*
* Check if ONLY was specified with ALTER TABLE. If so, allow the * Check if ONLY was specified with ALTER TABLE. If so, allow the
@ -11303,8 +11303,7 @@ ATExecDropConstraint(Relation rel, const char *constrName,
* use find_all_inheritors to do it in one pass. * use find_all_inheritors to do it in one pass.
*/ */
if (!is_no_inherit_constraint) if (!is_no_inherit_constraint)
children = find_inheritance_children(RelationGetRelid(rel), true, children = find_inheritance_children(RelationGetRelid(rel), lockmode);
lockmode, NULL);
else else
children = NIL; children = NIL;
@ -11688,8 +11687,7 @@ ATPrepAlterColumnType(List **wqueue,
} }
} }
else if (!recursing && else if (!recursing &&
find_inheritance_children(RelationGetRelid(rel), true, find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL)
NoLock, NULL) != NIL)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION), (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("type of inherited column \"%s\" must be changed in child tables too", errmsg("type of inherited column \"%s\" must be changed in child tables too",
@ -14601,7 +14599,8 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
* MarkInheritDetached * MarkInheritDetached
* *
* Set inhdetachpending for a partition, for ATExecDetachPartition * Set inhdetachpending for a partition, for ATExecDetachPartition
* in concurrent mode. * in concurrent mode. While at it, verify that no other partition is
* already pending detach.
*/ */
static void static void
MarkInheritDetached(Relation child_rel, Relation parent_rel) MarkInheritDetached(Relation child_rel, Relation parent_rel)
@ -14617,24 +14616,34 @@ MarkInheritDetached(Relation child_rel, Relation parent_rel)
Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
/* /*
* Find pg_inherits entries by inhrelid. * Find pg_inherits entries by inhparent. (We need to scan them all in
* order to verify that no other partition is pending detach.)
*/ */
catalogRelation = table_open(InheritsRelationId, RowExclusiveLock); catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
ScanKeyInit(&key, ScanKeyInit(&key,
Anum_pg_inherits_inhrelid, Anum_pg_inherits_inhparent,
BTEqualStrategyNumber, F_OIDEQ, BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(RelationGetRelid(child_rel))); ObjectIdGetDatum(RelationGetRelid(parent_rel)));
scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, scan = systable_beginscan(catalogRelation, InheritsParentIndexId,
true, NULL, 1, &key); true, NULL, 1, &key);
while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan))) while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
{ {
HeapTuple newtup; Form_pg_inherits inhForm;
if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent != inhForm = (Form_pg_inherits) GETSTRUCT(inheritsTuple);
RelationGetRelid(parent_rel)) if (inhForm->inhdetachpending)
elog(ERROR, "bad parent tuple found for partition %u", ereport(ERROR,
RelationGetRelid(child_rel)); errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("partition \"%s\" already pending detach in partitioned table \"%s.%s\"",
get_rel_name(inhForm->inhrelid),
get_namespace_name(parent_rel->rd_rel->relnamespace),
RelationGetRelationName(parent_rel)),
errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the detach operation."));
if (inhForm->inhrelid == RelationGetRelid(child_rel))
{
HeapTuple newtup;
newtup = heap_copytuple(inheritsTuple); newtup = heap_copytuple(inheritsTuple);
((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true; ((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true;
@ -14643,6 +14652,9 @@ MarkInheritDetached(Relation child_rel, Relation parent_rel)
&inheritsTuple->t_self, &inheritsTuple->t_self,
newtup); newtup);
found = true; found = true;
heap_freetuple(newtup);
/* keep looking, to ensure we catch others pending detach */
}
} }
/* Done */ /* Done */

View File

@ -1141,8 +1141,7 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
ListCell *l; ListCell *l;
List *idxs = NIL; List *idxs = NIL;
idxs = find_inheritance_children(indexOid, true, idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock);
ShareRowExclusiveLock, NULL);
foreach(l, idxs) foreach(l, idxs)
childTbls = lappend_oid(childTbls, childTbls = lappend_oid(childTbls,
IndexGetRelation(lfirst_oid(l), IndexGetRelation(lfirst_oid(l),

View File

@ -54,6 +54,12 @@ static PartitionDesc RelationBuildPartitionDesc(Relation rel,
/* /*
* RelationGetPartitionDesc -- get partition descriptor, if relation is partitioned * RelationGetPartitionDesc -- get partition descriptor, if relation is partitioned
* *
* We keep two partdescs in relcache: rd_partdesc includes all partitions
* (even those being concurrently marked detached), while rd_partdesc_nodetach
* omits (some of) those. We store the pg_inherits.xmin value for the latter,
* to determine whether it can be validly reused in each case, since that
* depends on the active snapshot.
*
* Note: we arrange for partition descriptors to not get freed until the * Note: we arrange for partition descriptors to not get freed until the
* relcache entry's refcount goes to zero (see hacks in RelationClose, * relcache entry's refcount goes to zero (see hacks in RelationClose,
* RelationClearRelation, and RelationBuildPartitionDesc). Therefore, even * RelationClearRelation, and RelationBuildPartitionDesc). Therefore, even
@ -61,13 +67,6 @@ static PartitionDesc RelationBuildPartitionDesc(Relation rel,
* for callers to continue to use that pointer as long as (a) they hold the * for callers to continue to use that pointer as long as (a) they hold the
* relation open, and (b) they hold a relation lock strong enough to ensure * relation open, and (b) they hold a relation lock strong enough to ensure
* that the data doesn't become stale. * that the data doesn't become stale.
*
* The above applies to partition descriptors that are complete regarding
* partitions concurrently being detached. When a descriptor that omits
* partitions being detached is requested (and such partitions are present),
* said descriptor is not part of relcache and so it isn't freed by
* invalidations either. Caller must not use such a descriptor beyond the
* current Portal.
*/ */
PartitionDesc PartitionDesc
RelationGetPartitionDesc(Relation rel, bool omit_detached) RelationGetPartitionDesc(Relation rel, bool omit_detached)
@ -78,11 +77,36 @@ RelationGetPartitionDesc(Relation rel, bool omit_detached)
* If relcache has a partition descriptor, use that. However, we can only * If relcache has a partition descriptor, use that. However, we can only
* do so when we are asked to include all partitions including detached; * do so when we are asked to include all partitions including detached;
* and also when we know that there are no detached partitions. * and also when we know that there are no detached partitions.
*
* If there is no active snapshot, detached partitions aren't omitted
* either, so we can use the cached descriptor too in that case.
*/ */
if (likely(rel->rd_partdesc && if (likely(rel->rd_partdesc &&
(!rel->rd_partdesc->detached_exist || !omit_detached))) (!rel->rd_partdesc->detached_exist || !omit_detached ||
!ActiveSnapshotSet())))
return rel->rd_partdesc; return rel->rd_partdesc;
/*
* If we're asked to omit detached partitions, we may be able to use a
* cached descriptor too. We determine that based on the pg_inherits.xmin
* that was saved alongside that descriptor: if the xmin that was not in
* progress for that active snapshot is also not in progress for the
* current active snapshot, then we can use use it. Otherwise build one
* from scratch.
*/
if (omit_detached &&
rel->rd_partdesc_nodetached &&
TransactionIdIsValid(rel->rd_partdesc_nodetached_xmin) &&
ActiveSnapshotSet())
{
Snapshot activesnap;
activesnap = GetActiveSnapshot();
if (!XidInMVCCSnapshot(rel->rd_partdesc_nodetached_xmin, activesnap))
return rel->rd_partdesc_nodetached;
}
return RelationBuildPartitionDesc(rel, omit_detached); return RelationBuildPartitionDesc(rel, omit_detached);
} }
@ -117,6 +141,8 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached)
Oid *oids = NULL; Oid *oids = NULL;
bool *is_leaf = NULL; bool *is_leaf = NULL;
bool detached_exist; bool detached_exist;
bool is_omit;
TransactionId detached_xmin;
ListCell *cell; ListCell *cell;
int i, int i,
nparts; nparts;
@ -132,8 +158,11 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached)
* some well-defined point in time. * some well-defined point in time.
*/ */
detached_exist = false; detached_exist = false;
inhoids = find_inheritance_children(RelationGetRelid(rel), omit_detached, detached_xmin = InvalidTransactionId;
NoLock, &detached_exist); inhoids = find_inheritance_children_extended(RelationGetRelid(rel),
omit_detached, NoLock,
&detached_exist,
&detached_xmin);
nparts = list_length(inhoids); nparts = list_length(inhoids);
/* Allocate working arrays for OIDs, leaf flags, and boundspecs. */ /* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
@ -281,37 +310,49 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached)
} }
/* /*
* We have a fully valid partdesc. Reparent it so that it has the right * Are we working with the partition that omits detached partitions, or
* lifespan, and if appropriate put it into the relation's relcache entry. * the one that includes them?
*/
if (omit_detached && detached_exist)
{
/*
* A transient partition descriptor is only good for the current
* statement, so make it a child of the current portal's context.
*/
MemoryContextSetParent(new_pdcxt, PortalContext);
}
else
{
/*
* This partdesc goes into relcache.
*/ */
is_omit = omit_detached && detached_exist && ActiveSnapshotSet();
/*
* We have a fully valid partdesc. Reparent it so that it has the right
* lifespan.
*/
MemoryContextSetParent(new_pdcxt, CacheMemoryContext); MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
/* /*
* But first, a kluge: if there's an old rd_pdcxt, it contains an old * Store it into relcache.
* partition descriptor that may still be referenced somewhere. *
* Preserve it, while not leaking it, by reattaching it as a child * But first, a kluge: if there's an old context for this type of
* context of the new rd_pdcxt. Eventually it will get dropped by * descriptor, it contains an old partition descriptor that may still be
* either RelationClose or RelationClearRelation. * referenced somewhere. Preserve it, while not leaking it, by
* reattaching it as a child context of the new one. Eventually it will
* get dropped by either RelationClose or RelationClearRelation.
* (We keep the regular partdesc in rd_pdcxt, and the partdesc-excluding-
* detached-partitions in rd_pddcxt.)
*/ */
if (is_omit)
{
if (rel->rd_pddcxt != NULL)
MemoryContextSetParent(rel->rd_pddcxt, new_pdcxt);
rel->rd_pddcxt = new_pdcxt;
rel->rd_partdesc_nodetached = partdesc;
/*
* For partdescs built excluding detached partitions, which we save
* separately, we also record the pg_inherits.xmin of the detached
* partition that was omitted; this informs a future potential user of
* such a cached partdescs. (This might be InvalidXid; see comments
* in struct RelationData).
*/
rel->rd_partdesc_nodetached_xmin = detached_xmin;
}
else
{
if (rel->rd_pdcxt != NULL) if (rel->rd_pdcxt != NULL)
MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt); MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
rel->rd_pdcxt = new_pdcxt; rel->rd_pdcxt = new_pdcxt;
/* Store it into relcache */
rel->rd_partdesc = partdesc; rel->rd_partdesc = partdesc;
} }

View File

@ -1157,7 +1157,10 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
relation->rd_partkey = NULL; relation->rd_partkey = NULL;
relation->rd_partkeycxt = NULL; relation->rd_partkeycxt = NULL;
relation->rd_partdesc = NULL; relation->rd_partdesc = NULL;
relation->rd_partdesc_nodetached = NULL;
relation->rd_partdesc_nodetached_xmin = InvalidTransactionId;
relation->rd_pdcxt = NULL; relation->rd_pdcxt = NULL;
relation->rd_pddcxt = NULL;
relation->rd_partcheck = NIL; relation->rd_partcheck = NIL;
relation->rd_partcheckvalid = false; relation->rd_partcheckvalid = false;
relation->rd_partcheckcxt = NULL; relation->rd_partcheckcxt = NULL;
@ -2103,11 +2106,17 @@ RelationClose(Relation relation)
* stale partition descriptors it has. This is unlikely, so check to see * stale partition descriptors it has. This is unlikely, so check to see
* if there are child contexts before expending a call to mcxt.c. * if there are child contexts before expending a call to mcxt.c.
*/ */
if (RelationHasReferenceCountZero(relation) && if (RelationHasReferenceCountZero(relation))
relation->rd_pdcxt != NULL && {
if (relation->rd_pdcxt != NULL &&
relation->rd_pdcxt->firstchild != NULL) relation->rd_pdcxt->firstchild != NULL)
MemoryContextDeleteChildren(relation->rd_pdcxt); MemoryContextDeleteChildren(relation->rd_pdcxt);
if (relation->rd_pddcxt != NULL &&
relation->rd_pddcxt->firstchild != NULL)
MemoryContextDeleteChildren(relation->rd_pddcxt);
}
#ifdef RELCACHE_FORCE_RELEASE #ifdef RELCACHE_FORCE_RELEASE
if (RelationHasReferenceCountZero(relation) && if (RelationHasReferenceCountZero(relation) &&
relation->rd_createSubid == InvalidSubTransactionId && relation->rd_createSubid == InvalidSubTransactionId &&
@ -2390,6 +2399,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
MemoryContextDelete(relation->rd_partkeycxt); MemoryContextDelete(relation->rd_partkeycxt);
if (relation->rd_pdcxt) if (relation->rd_pdcxt)
MemoryContextDelete(relation->rd_pdcxt); MemoryContextDelete(relation->rd_pdcxt);
if (relation->rd_pddcxt)
MemoryContextDelete(relation->rd_pddcxt);
if (relation->rd_partcheckcxt) if (relation->rd_partcheckcxt)
MemoryContextDelete(relation->rd_partcheckcxt); MemoryContextDelete(relation->rd_partcheckcxt);
pfree(relation); pfree(relation);
@ -2644,7 +2655,7 @@ RelationClearRelation(Relation relation, bool rebuild)
SWAPFIELD(PartitionKey, rd_partkey); SWAPFIELD(PartitionKey, rd_partkey);
SWAPFIELD(MemoryContext, rd_partkeycxt); SWAPFIELD(MemoryContext, rd_partkeycxt);
} }
if (newrel->rd_pdcxt != NULL) if (newrel->rd_pdcxt != NULL || newrel->rd_pddcxt != NULL)
{ {
/* /*
* We are rebuilding a partitioned relation with a non-zero * We are rebuilding a partitioned relation with a non-zero
@ -2672,13 +2683,22 @@ RelationClearRelation(Relation relation, bool rebuild)
* newrel. * newrel.
*/ */
relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */ relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */
relation->rd_partdesc_nodetached = NULL;
relation->rd_partdesc_nodetached_xmin = InvalidTransactionId;
if (relation->rd_pdcxt != NULL) /* probably never happens */ if (relation->rd_pdcxt != NULL) /* probably never happens */
MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt); MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
else else
relation->rd_pdcxt = newrel->rd_pdcxt; relation->rd_pdcxt = newrel->rd_pdcxt;
if (relation->rd_pddcxt != NULL)
MemoryContextSetParent(newrel->rd_pddcxt, relation->rd_pddcxt);
else
relation->rd_pddcxt = newrel->rd_pddcxt;
/* drop newrel's pointers so we don't destroy it below */ /* drop newrel's pointers so we don't destroy it below */
newrel->rd_partdesc = NULL; newrel->rd_partdesc = NULL;
newrel->rd_partdesc_nodetached = NULL;
newrel->rd_partdesc_nodetached_xmin = InvalidTransactionId;
newrel->rd_pdcxt = NULL; newrel->rd_pdcxt = NULL;
newrel->rd_pddcxt = NULL;
} }
#undef SWAPFIELD #undef SWAPFIELD
@ -6017,7 +6037,10 @@ load_relcache_init_file(bool shared)
rel->rd_partkey = NULL; rel->rd_partkey = NULL;
rel->rd_partkeycxt = NULL; rel->rd_partkeycxt = NULL;
rel->rd_partdesc = NULL; rel->rd_partdesc = NULL;
rel->rd_partdesc_nodetached = NULL;
rel->rd_partdesc_nodetached_xmin = InvalidTransactionId;
rel->rd_pdcxt = NULL; rel->rd_pdcxt = NULL;
rel->rd_pddcxt = NULL;
rel->rd_partcheck = NIL; rel->rd_partcheck = NIL;
rel->rd_partcheckvalid = false; rel->rd_partcheckvalid = false;
rel->rd_partcheckcxt = NULL; rel->rd_partcheckcxt = NULL;

View File

@ -50,8 +50,10 @@ DECLARE_INDEX(pg_inherits_parent_index, 2187, on pg_inherits using btree(inhpare
#define InheritsParentIndexId 2187 #define InheritsParentIndexId 2187
extern List *find_inheritance_children(Oid parentrelId, bool omit_detached, extern List *find_inheritance_children(Oid parentrelId, LOCKMODE lockmode);
LOCKMODE lockmode, bool *detached_exist); extern List *find_inheritance_children_extended(Oid parentrelId, bool omit_detached,
LOCKMODE lockmode, bool *detached_exist, TransactionId *detached_xmin);
extern List *find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, extern List *find_all_inheritors(Oid parentrelId, LOCKMODE lockmode,
List **parents); List **parents);
extern bool has_subclass(Oid relationId); extern bool has_subclass(Oid relationId);

View File

@ -129,6 +129,21 @@ typedef struct RelationData
PartitionDesc rd_partdesc; /* partition descriptor, or NULL */ PartitionDesc rd_partdesc; /* partition descriptor, or NULL */
MemoryContext rd_pdcxt; /* private context for rd_partdesc, if any */ MemoryContext rd_pdcxt; /* private context for rd_partdesc, if any */
/* Same as above, for partdescs that omit detached partitions */
PartitionDesc rd_partdesc_nodetached; /* partdesc w/o detached parts */
MemoryContext rd_pddcxt; /* for rd_partdesc_nodetached, if any */
/*
* pg_inherits.xmin of the partition that was excluded in
* rd_partdesc_nodetached. This informs a future user of that partdesc:
* if this value is not in progress for the active snapshot, then the
* partdesc can be used, otherwise they have to build a new one. (This
* matches what find_inheritance_children_extended would do). In the rare
* case where the pg_inherits tuple has been frozen, this will be
* InvalidXid; behave as if the partdesc is unusable in that case.
*/
TransactionId rd_partdesc_nodetached_xmin;
/* data managed by RelationGetPartitionQual: */ /* data managed by RelationGetPartitionQual: */
List *rd_partcheck; /* partition CHECK quals */ List *rd_partcheck; /* partition CHECK quals */
bool rd_partcheckvalid; /* true if list has been computed */ bool rd_partcheckvalid; /* true if list has been computed */

View File

@ -20,6 +20,7 @@ step s1describe: SELECT 'd3_listp' AS root, * FROM pg_partition_tree('d3_listp')
root relid parentrelid isleaf level root relid parentrelid isleaf level
d3_listp d3_listp f 0 d3_listp d3_listp f 0
d3_listp d3_listp2 d3_listp t 1
d3_listp1 d3_listp1 t 0 d3_listp1 d3_listp1 t 0
step s1alter: ALTER TABLE d3_listp1 ALTER a DROP NOT NULL; step s1alter: ALTER TABLE d3_listp1 ALTER a DROP NOT NULL;
ERROR: cannot alter partition "d3_listp1" with an incomplete detach ERROR: cannot alter partition "d3_listp1" with an incomplete detach
@ -81,6 +82,59 @@ error in steps s1cancel s2detach: ERROR: canceling statement due to user reques
step s1c: COMMIT; step s1c: COMMIT;
step s1insertpart: INSERT INTO d3_listp1 VALUES (1); step s1insertpart: INSERT INTO d3_listp1 VALUES (1);
starting permutation: s2snitch s1b s1s s2detach2 s1cancel s1c s1brr s1insert s1s s1insert s1c
step s2snitch: INSERT INTO d3_pid SELECT pg_backend_pid();
step s1b: BEGIN;
step s1s: SELECT * FROM d3_listp;
a
1
step s2detach2: ALTER TABLE d3_listp DETACH PARTITION d3_listp2 CONCURRENTLY; <waiting ...>
step s1cancel: SELECT pg_cancel_backend(pid) FROM d3_pid;
pg_cancel_backend
t
step s2detach2: <... completed>
error in steps s1cancel s2detach2: ERROR: canceling statement due to user request
step s1c: COMMIT;
step s1brr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s1insert: INSERT INTO d3_listp VALUES (1);
step s1s: SELECT * FROM d3_listp;
a
1
1
step s1insert: INSERT INTO d3_listp VALUES (1);
step s1c: COMMIT;
starting permutation: s2snitch s1b s1s s2detach2 s1cancel s1c s1brr s1s s1insert s1s s1c
step s2snitch: INSERT INTO d3_pid SELECT pg_backend_pid();
step s1b: BEGIN;
step s1s: SELECT * FROM d3_listp;
a
1
step s2detach2: ALTER TABLE d3_listp DETACH PARTITION d3_listp2 CONCURRENTLY; <waiting ...>
step s1cancel: SELECT pg_cancel_backend(pid) FROM d3_pid;
pg_cancel_backend
t
step s2detach2: <... completed>
error in steps s1cancel s2detach2: ERROR: canceling statement due to user request
step s1c: COMMIT;
step s1brr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s1s: SELECT * FROM d3_listp;
a
1
step s1insert: INSERT INTO d3_listp VALUES (1);
step s1s: SELECT * FROM d3_listp;
a
1
1
step s1c: COMMIT;
starting permutation: s2snitch s1b s1s s2detach s1cancel s1c s1drop s1list starting permutation: s2snitch s1b s1s s2detach s1cancel s1c s1drop s1list
step s2snitch: INSERT INTO d3_pid SELECT pg_backend_pid(); step s2snitch: INSERT INTO d3_pid SELECT pg_backend_pid();
step s1b: BEGIN; step s1b: BEGIN;
@ -123,6 +177,61 @@ a
1 1
starting permutation: s2snitch s1b s1s s2detach s1cancel s2detach2 s1c
step s2snitch: INSERT INTO d3_pid SELECT pg_backend_pid();
step s1b: BEGIN;
step s1s: SELECT * FROM d3_listp;
a
1
step s2detach: ALTER TABLE d3_listp DETACH PARTITION d3_listp1 CONCURRENTLY; <waiting ...>
step s1cancel: SELECT pg_cancel_backend(pid) FROM d3_pid;
pg_cancel_backend
t
step s2detach: <... completed>
error in steps s1cancel s2detach: ERROR: canceling statement due to user request
step s2detach2: ALTER TABLE d3_listp DETACH PARTITION d3_listp2 CONCURRENTLY;
ERROR: partition "d3_listp1" already pending detach in partitioned table "public.d3_listp"
step s1c: COMMIT;
starting permutation: s2snitch s1b s1s s2detach s1cancel s2detachfinal s1c s2detach2
step s2snitch: INSERT INTO d3_pid SELECT pg_backend_pid();
step s1b: BEGIN;
step s1s: SELECT * FROM d3_listp;
a
1
step s2detach: ALTER TABLE d3_listp DETACH PARTITION d3_listp1 CONCURRENTLY; <waiting ...>
step s1cancel: SELECT pg_cancel_backend(pid) FROM d3_pid;
pg_cancel_backend
t
step s2detach: <... completed>
error in steps s1cancel s2detach: ERROR: canceling statement due to user request
step s2detachfinal: ALTER TABLE d3_listp DETACH PARTITION d3_listp1 FINALIZE; <waiting ...>
step s1c: COMMIT;
step s2detachfinal: <... completed>
step s2detach2: ALTER TABLE d3_listp DETACH PARTITION d3_listp2 CONCURRENTLY;
starting permutation: s2snitch s1b s1s s2detach s1cancel s1c s1droppart s2detach2
step s2snitch: INSERT INTO d3_pid SELECT pg_backend_pid();
step s1b: BEGIN;
step s1s: SELECT * FROM d3_listp;
a
1
step s2detach: ALTER TABLE d3_listp DETACH PARTITION d3_listp1 CONCURRENTLY; <waiting ...>
step s1cancel: SELECT pg_cancel_backend(pid) FROM d3_pid;
pg_cancel_backend
t
step s2detach: <... completed>
error in steps s1cancel s2detach: ERROR: canceling statement due to user request
step s1c: COMMIT;
step s1droppart: DROP TABLE d3_listp1;
step s2detach2: ALTER TABLE d3_listp DETACH PARTITION d3_listp2 CONCURRENTLY;
starting permutation: s2snitch s1b s1s s2detach s1cancel s1c s2begin s2drop s1s s2commit starting permutation: s2snitch s1b s1s s2detach s1cancel s1c s2begin s2drop s1s s2commit
step s2snitch: INSERT INTO d3_pid SELECT pg_backend_pid(); step s2snitch: INSERT INTO d3_pid SELECT pg_backend_pid();
step s1b: BEGIN; step s1b: BEGIN;
@ -279,4 +388,3 @@ step s2detachfinal: ALTER TABLE d3_listp DETACH PARTITION d3_listp1 FINALIZE;
step s1insertpart: INSERT INTO d3_listp1 VALUES (1); <waiting ...> step s1insertpart: INSERT INTO d3_listp1 VALUES (1); <waiting ...>
step s2commit: COMMIT; step s2commit: COMMIT;
step s1insertpart: <... completed> step s1insertpart: <... completed>
unused step name: s1droppart

View File

@ -4,12 +4,13 @@ setup
{ {
CREATE TABLE d3_listp (a int) PARTITION BY LIST(a); CREATE TABLE d3_listp (a int) PARTITION BY LIST(a);
CREATE TABLE d3_listp1 PARTITION OF d3_listp FOR VALUES IN (1); CREATE TABLE d3_listp1 PARTITION OF d3_listp FOR VALUES IN (1);
CREATE TABLE d3_listp2 PARTITION OF d3_listp FOR VALUES IN (2);
CREATE TABLE d3_pid (pid int); CREATE TABLE d3_pid (pid int);
INSERT INTO d3_listp VALUES (1); INSERT INTO d3_listp VALUES (1);
} }
teardown { teardown {
DROP TABLE IF EXISTS d3_listp, d3_listp1, d3_pid; DROP TABLE IF EXISTS d3_listp, d3_listp1, d3_listp2, d3_pid;
} }
session "s1" session "s1"
@ -34,6 +35,7 @@ session "s2"
step "s2begin" { BEGIN; } step "s2begin" { BEGIN; }
step "s2snitch" { INSERT INTO d3_pid SELECT pg_backend_pid(); } step "s2snitch" { INSERT INTO d3_pid SELECT pg_backend_pid(); }
step "s2detach" { ALTER TABLE d3_listp DETACH PARTITION d3_listp1 CONCURRENTLY; } step "s2detach" { ALTER TABLE d3_listp DETACH PARTITION d3_listp1 CONCURRENTLY; }
step "s2detach2" { ALTER TABLE d3_listp DETACH PARTITION d3_listp2 CONCURRENTLY; }
step "s2detachfinal" { ALTER TABLE d3_listp DETACH PARTITION d3_listp1 FINALIZE; } step "s2detachfinal" { ALTER TABLE d3_listp DETACH PARTITION d3_listp1 FINALIZE; }
step "s2drop" { DROP TABLE d3_listp1; } step "s2drop" { DROP TABLE d3_listp1; }
step "s2commit" { COMMIT; } step "s2commit" { COMMIT; }
@ -44,11 +46,21 @@ permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s1describe" "s1a
permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1insert" "s1c" permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1insert" "s1c"
permutation "s2snitch" "s1brr" "s1s" "s2detach" "s1cancel" "s1insert" "s1c" "s1spart" permutation "s2snitch" "s1brr" "s1s" "s2detach" "s1cancel" "s1insert" "s1c" "s1spart"
permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s1insertpart" permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s1insertpart"
# Test partition descriptor caching
permutation "s2snitch" "s1b" "s1s" "s2detach2" "s1cancel" "s1c" "s1brr" "s1insert" "s1s" "s1insert" "s1c"
permutation "s2snitch" "s1b" "s1s" "s2detach2" "s1cancel" "s1c" "s1brr" "s1s" "s1insert" "s1s" "s1c"
# "drop" here does both tables # "drop" here does both tables
permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s1drop" "s1list" permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s1drop" "s1list"
# "truncate" only does parent, not partition # "truncate" only does parent, not partition
permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s1trunc" "s1spart" permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s1trunc" "s1spart"
# If a partition pending detach exists, we cannot drop another one
permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s2detach2" "s1c"
permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s2detachfinal" "s1c" "s2detach2"
permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s1droppart" "s2detach2"
# When a partition with incomplete detach is dropped, we grab lock on parent too. # When a partition with incomplete detach is dropped, we grab lock on parent too.
permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s2begin" "s2drop" "s1s" "s2commit" permutation "s2snitch" "s1b" "s1s" "s2detach" "s1cancel" "s1c" "s2begin" "s2drop" "s1s" "s2commit"