Fix deadlock for multiple replicating truncates of the same table.

While applying the truncate change, the logical apply worker acquires
RowExclusiveLock on the relation being truncated. This allowed truncate on
the relation at a time by two apply workers which lead to a deadlock. The
reason was that one of the workers after updating the pg_class tuple tries
to acquire SHARE lock on the relation and started to wait for the second
worker which has acquired RowExclusiveLock on the relation. And when the
second worker tries to update the pg_class tuple, it starts to wait for
the first worker which leads to a deadlock. Fix it by acquiring
AccessExclusiveLock on the relation before applying the truncate change as
we do for normal truncate operation.

Author: Peter Smith, test case by Haiying Tang
Reviewed-by: Dilip Kumar, Amit Kapila
Backpatch-through: 11
Discussion: https://postgr.es/m/CAHut+PsNm43p0jM+idTvWwiGZPcP0hGrHMPK9TOAkc+a4UpUqw@mail.gmail.com
This commit is contained in:
Amit Kapila 2021-05-21 08:03:38 +05:30
parent c64183f234
commit c83c0257e4
2 changed files with 57 additions and 5 deletions

View File

@ -1248,6 +1248,7 @@ apply_handle_truncate(StringInfo s)
List *relids = NIL;
List *relids_logged = NIL;
ListCell *lc;
LOCKMODE lockmode = AccessExclusiveLock;
ensure_transaction();
@ -1258,14 +1259,14 @@ apply_handle_truncate(StringInfo s)
LogicalRepRelId relid = lfirst_oid(lc);
LogicalRepRelMapEntry *rel;
rel = logicalrep_rel_open(relid, RowExclusiveLock);
rel = logicalrep_rel_open(relid, lockmode);
if (!should_apply_changes_for_rel(rel))
{
/*
* The relation can't become interesting in the middle of the
* transaction so it's safe to unlock it.
*/
logicalrep_rel_close(rel, RowExclusiveLock);
logicalrep_rel_close(rel, lockmode);
continue;
}
@ -1283,7 +1284,7 @@ apply_handle_truncate(StringInfo s)
{
ListCell *child;
List *children = find_all_inheritors(rel->localreloid,
RowExclusiveLock,
lockmode,
NULL);
foreach(child, children)
@ -1303,7 +1304,7 @@ apply_handle_truncate(StringInfo s)
*/
if (RELATION_IS_OTHER_TEMP(childrel))
{
table_close(childrel, RowExclusiveLock);
table_close(childrel, lockmode);
continue;
}

View File

@ -3,7 +3,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 9;
use Test::More tests => 12;
# setup
@ -13,6 +13,8 @@ $node_publisher->start;
my $node_subscriber = get_new_node('subscriber');
$node_subscriber->init(allows_streaming => 'logical');
$node_subscriber->append_conf('postgresql.conf',
qq(max_logical_replication_workers = 6));
$node_subscriber->start;
my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@ -158,3 +160,52 @@ is($result, qq(0||), 'truncate of multiple tables some not published');
$result = $node_subscriber->safe_psql('postgres',
"SELECT count(*), min(a), max(a) FROM tab2");
is($result, qq(3|1|3), 'truncate of multiple tables some not published');
# test that truncate works for logical replication when there are multiple
# subscriptions for a single table
$node_publisher->safe_psql('postgres',
"CREATE TABLE tab5 (a int)");
$node_subscriber->safe_psql('postgres',
"CREATE TABLE tab5 (a int)");
$node_publisher->safe_psql('postgres',
"CREATE PUBLICATION pub5 FOR TABLE tab5");
$node_subscriber->safe_psql('postgres',
"CREATE SUBSCRIPTION sub5_1 CONNECTION '$publisher_connstr' PUBLICATION pub5"
);
$node_subscriber->safe_psql('postgres',
"CREATE SUBSCRIPTION sub5_2 CONNECTION '$publisher_connstr' PUBLICATION pub5"
);
# wait for initial data sync
$node_subscriber->poll_query_until('postgres', $synced_query)
or die "Timed out while waiting for subscriber to synchronize data";
# insert data to truncate
$node_publisher->safe_psql('postgres',
"INSERT INTO tab5 VALUES (1), (2), (3)");
$node_publisher->wait_for_catchup('sub5_1');
$node_publisher->wait_for_catchup('sub5_2');
$result = $node_subscriber->safe_psql('postgres',
"SELECT count(*), min(a), max(a) FROM tab5");
is($result, qq(6|1|3), 'insert replicated for multiple subscriptions');
$node_publisher->safe_psql('postgres', "TRUNCATE tab5");
$node_publisher->wait_for_catchup('sub5_1');
$node_publisher->wait_for_catchup('sub5_2');
$result = $node_subscriber->safe_psql('postgres',
"SELECT count(*), min(a), max(a) FROM tab5");
is($result, qq(0||),
'truncate replicated for multiple subscriptions');
# check deadlocks
$result = $node_subscriber->safe_psql('postgres',
"SELECT deadlocks FROM pg_stat_database WHERE datname='postgres'");
is($result, qq(0), 'no deadlocks detected');