Skip to content

Commit 6d0eb38

Browse files
author
Amit Kapila
committed
Fix deadlock for multiple replicating truncates of the same table.
While applying the truncate change, the logical apply worker acquires RowExclusiveLock on the relation being truncated. This allowed truncate on the relation at a time by two apply workers which lead to a deadlock. The reason was that one of the workers after updating the pg_class tuple tries to acquire SHARE lock on the relation and started to wait for the second worker which has acquired RowExclusiveLock on the relation. And when the second worker tries to update the pg_class tuple, it starts to wait for the first worker which leads to a deadlock. Fix it by acquiring AccessExclusiveLock on the relation before applying the truncate change as we do for normal truncate operation. Author: Peter Smith, test case by Haiying Tang Reviewed-by: Dilip Kumar, Amit Kapila Backpatch-through: 11 Discussion: https://postgr.es/m/CAHut+PsNm43p0jM+idTvWwiGZPcP0hGrHMPK9TOAkc+a4UpUqw@mail.gmail.com
1 parent f21fada commit 6d0eb38

File tree

2 files changed

+61
-5
lines changed

2 files changed

+61
-5
lines changed

src/backend/replication/logical/worker.c

+5-4
Original file line numberDiff line numberDiff line change
@@ -1818,6 +1818,7 @@ apply_handle_truncate(StringInfo s)
18181818
List *relids = NIL;
18191819
List *relids_logged = NIL;
18201820
ListCell *lc;
1821+
LOCKMODE lockmode = AccessExclusiveLock;
18211822

18221823
if (handle_streamed_transaction(LOGICAL_REP_MSG_TRUNCATE, s))
18231824
return;
@@ -1831,14 +1832,14 @@ apply_handle_truncate(StringInfo s)
18311832
LogicalRepRelId relid = lfirst_oid(lc);
18321833
LogicalRepRelMapEntry *rel;
18331834

1834-
rel = logicalrep_rel_open(relid, RowExclusiveLock);
1835+
rel = logicalrep_rel_open(relid, lockmode);
18351836
if (!should_apply_changes_for_rel(rel))
18361837
{
18371838
/*
18381839
* The relation can't become interesting in the middle of the
18391840
* transaction so it's safe to unlock it.
18401841
*/
1841-
logicalrep_rel_close(rel, RowExclusiveLock);
1842+
logicalrep_rel_close(rel, lockmode);
18421843
continue;
18431844
}
18441845

@@ -1856,7 +1857,7 @@ apply_handle_truncate(StringInfo s)
18561857
{
18571858
ListCell *child;
18581859
List *children = find_all_inheritors(rel->localreloid,
1859-
RowExclusiveLock,
1860+
lockmode,
18601861
NULL);
18611862

18621863
foreach(child, children)
@@ -1876,7 +1877,7 @@ apply_handle_truncate(StringInfo s)
18761877
*/
18771878
if (RELATION_IS_OTHER_TEMP(childrel))
18781879
{
1879-
table_close(childrel, RowExclusiveLock);
1880+
table_close(childrel, lockmode);
18801881
continue;
18811882
}
18821883

src/test/subscription/t/010_truncate.pl

+56-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
use warnings;
77
use PostgresNode;
88
use TestLib;
9-
use Test::More tests => 11;
9+
use Test::More tests => 14;
1010

1111
# setup
1212

@@ -16,6 +16,8 @@
1616

1717
my $node_subscriber = get_new_node('subscriber');
1818
$node_subscriber->init(allows_streaming => 'logical');
19+
$node_subscriber->append_conf('postgresql.conf',
20+
qq(max_logical_replication_workers = 6));
1921
$node_subscriber->start;
2022

2123
my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -187,3 +189,56 @@
187189
"SELECT count(*), min(a), max(a) FROM tab1");
188190
is($result, qq(0||),
189191
'truncate replicated in synchronous logical replication');
192+
193+
$node_publisher->safe_psql('postgres',
194+
"ALTER SYSTEM RESET synchronous_standby_names");
195+
$node_publisher->safe_psql('postgres', "SELECT pg_reload_conf()");
196+
197+
# test that truncate works for logical replication when there are multiple
198+
# subscriptions for a single table
199+
200+
$node_publisher->safe_psql('postgres',
201+
"CREATE TABLE tab5 (a int)");
202+
203+
$node_subscriber->safe_psql('postgres',
204+
"CREATE TABLE tab5 (a int)");
205+
206+
$node_publisher->safe_psql('postgres',
207+
"CREATE PUBLICATION pub5 FOR TABLE tab5");
208+
$node_subscriber->safe_psql('postgres',
209+
"CREATE SUBSCRIPTION sub5_1 CONNECTION '$publisher_connstr' PUBLICATION pub5"
210+
);
211+
$node_subscriber->safe_psql('postgres',
212+
"CREATE SUBSCRIPTION sub5_2 CONNECTION '$publisher_connstr' PUBLICATION pub5"
213+
);
214+
215+
# wait for initial data sync
216+
$node_subscriber->poll_query_until('postgres', $synced_query)
217+
or die "Timed out while waiting for subscriber to synchronize data";
218+
219+
# insert data to truncate
220+
221+
$node_publisher->safe_psql('postgres',
222+
"INSERT INTO tab5 VALUES (1), (2), (3)");
223+
224+
$node_publisher->wait_for_catchup('sub5_1');
225+
$node_publisher->wait_for_catchup('sub5_2');
226+
227+
$result = $node_subscriber->safe_psql('postgres',
228+
"SELECT count(*), min(a), max(a) FROM tab5");
229+
is($result, qq(6|1|3), 'insert replicated for multiple subscriptions');
230+
231+
$node_publisher->safe_psql('postgres', "TRUNCATE tab5");
232+
233+
$node_publisher->wait_for_catchup('sub5_1');
234+
$node_publisher->wait_for_catchup('sub5_2');
235+
236+
$result = $node_subscriber->safe_psql('postgres',
237+
"SELECT count(*), min(a), max(a) FROM tab5");
238+
is($result, qq(0||),
239+
'truncate replicated for multiple subscriptions');
240+
241+
# check deadlocks
242+
$result = $node_subscriber->safe_psql('postgres',
243+
"SELECT deadlocks FROM pg_stat_database WHERE datname='postgres'");
244+
is($result, qq(0), 'no deadlocks detected');

0 commit comments

Comments
 (0)