Skip to content

Commit 2241e5c

Browse files
committed
Fix risk of deadlock failure while dropping a partitioned index.
DROP INDEX needs to lock the index's table before the index itself, else it will deadlock against ordinary queries that acquire the relation locks in that order. This is correctly mechanized for plain indexes by RangeVarCallbackForDropRelation; but in the case of a partitioned index, we neglected to lock the child tables in advance of locking the child indexes. We can fix that by traversing the inheritance tree and acquiring the needed locks in RemoveRelations, after we have acquired our locks on the parent partitioned table and index. While at it, do some refactoring to eliminate confusion between the actual and expected relkind in RangeVarCallbackForDropRelation. We can save a couple of syscache lookups too, by having that function pass back info that RemoveRelations will need. Back-patch to v11 where partitioned indexes were added. Jimmy Yih, Gaurab Dey, Tom Lane Discussion: https://postgr.es/m/BYAPR05MB645402330042E17D91A70C12BD5F9@BYAPR05MB6454.namprd05.prod.outlook.com
1 parent 36c3acb commit 2241e5c

File tree

4 files changed

+191
-18
lines changed

4 files changed

+191
-18
lines changed

src/backend/commands/tablecmds.c

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -280,12 +280,18 @@ static const struct dropmsgstrings dropmsgstringarray[] = {
280280
{'\0', 0, NULL, NULL, NULL, NULL}
281281
};
282282

283+
/* communication between RemoveRelations and RangeVarCallbackForDropRelation */
283284
struct DropRelationCallbackState
284285
{
285-
char relkind;
286+
/* These fields are set by RemoveRelations: */
287+
char expected_relkind;
288+
LOCKMODE heap_lockmode;
289+
/* These fields are state to track which subsidiary locks are held: */
286290
Oid heapOid;
287291
Oid partParentOid;
288-
bool concurrent;
292+
/* These fields are passed back by RangeVarCallbackForDropRelation: */
293+
char actual_relkind;
294+
char actual_relpersistence;
289295
};
290296

291297
/* Alter table target-type flags for ATSimplePermissions */
@@ -1349,10 +1355,13 @@ RemoveRelations(DropStmt *drop)
13491355
AcceptInvalidationMessages();
13501356

13511357
/* Look up the appropriate relation using namespace search. */
1352-
state.relkind = relkind;
1358+
state.expected_relkind = relkind;
1359+
state.heap_lockmode = drop->concurrent ?
1360+
ShareUpdateExclusiveLock : AccessExclusiveLock;
1361+
/* We must initialize these fields to show that no locks are held: */
13531362
state.heapOid = InvalidOid;
13541363
state.partParentOid = InvalidOid;
1355-
state.concurrent = drop->concurrent;
1364+
13561365
relOid = RangeVarGetRelidExtended(rel, lockmode, RVR_MISSING_OK,
13571366
RangeVarCallbackForDropRelation,
13581367
(void *) &state);
@@ -1366,10 +1375,10 @@ RemoveRelations(DropStmt *drop)
13661375

13671376
/*
13681377
* Decide if concurrent mode needs to be used here or not. The
1369-
* relation persistence cannot be known without its OID.
1378+
* callback retrieved the rel's persistence for us.
13701379
*/
13711380
if (drop->concurrent &&
1372-
get_rel_persistence(relOid) != RELPERSISTENCE_TEMP)
1381+
state.actual_relpersistence != RELPERSISTENCE_TEMP)
13731382
{
13741383
Assert(list_length(drop->objects) == 1 &&
13751384
drop->removeType == OBJECT_INDEX);
@@ -1381,12 +1390,24 @@ RemoveRelations(DropStmt *drop)
13811390
* either.
13821391
*/
13831392
if ((flags & PERFORM_DELETION_CONCURRENTLY) != 0 &&
1384-
get_rel_relkind(relOid) == RELKIND_PARTITIONED_INDEX)
1393+
state.actual_relkind == RELKIND_PARTITIONED_INDEX)
13851394
ereport(ERROR,
13861395
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
13871396
errmsg("cannot drop partitioned index \"%s\" concurrently",
13881397
rel->relname)));
13891398

1399+
/*
1400+
* If we're told to drop a partitioned index, we must acquire lock on
1401+
* all the children of its parent partitioned table before proceeding.
1402+
* Otherwise we'd try to lock the child index partitions before their
1403+
* tables, leading to potential deadlock against other sessions that
1404+
* will lock those objects in the other order.
1405+
*/
1406+
if (state.actual_relkind == RELKIND_PARTITIONED_INDEX)
1407+
(void) find_all_inheritors(state.heapOid,
1408+
state.heap_lockmode,
1409+
NULL);
1410+
13901411
/* OK, we're ready to delete this one */
13911412
obj.classId = RelationRelationId;
13921413
obj.objectId = relOid;
@@ -1412,17 +1433,14 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
14121433
{
14131434
HeapTuple tuple;
14141435
struct DropRelationCallbackState *state;
1415-
char relkind;
14161436
char expected_relkind;
14171437
bool is_partition;
14181438
Form_pg_class classform;
14191439
LOCKMODE heap_lockmode;
14201440
bool invalid_system_index = false;
14211441

14221442
state = (struct DropRelationCallbackState *) arg;
1423-
relkind = state->relkind;
1424-
heap_lockmode = state->concurrent ?
1425-
ShareUpdateExclusiveLock : AccessExclusiveLock;
1443+
heap_lockmode = state->heap_lockmode;
14261444

14271445
/*
14281446
* If we previously locked some other index's heap, and the name we're
@@ -1456,6 +1474,10 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
14561474
classform = (Form_pg_class) GETSTRUCT(tuple);
14571475
is_partition = classform->relispartition;
14581476

1477+
/* Pass back some data to save lookups in RemoveRelations */
1478+
state->actual_relkind = classform->relkind;
1479+
state->actual_relpersistence = classform->relpersistence;
1480+
14591481
/*
14601482
* Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
14611483
* but RemoveRelations() can only pass one relkind for a given relation.
@@ -1471,13 +1493,15 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
14711493
else
14721494
expected_relkind = classform->relkind;
14731495

1474-
if (relkind != expected_relkind)
1475-
DropErrorMsgWrongType(rel->relname, classform->relkind, relkind);
1496+
if (state->expected_relkind != expected_relkind)
1497+
DropErrorMsgWrongType(rel->relname, classform->relkind,
1498+
state->expected_relkind);
14761499

14771500
/* Allow DROP to either table owner or schema owner */
14781501
if (!pg_class_ownercheck(relOid, GetUserId()) &&
14791502
!pg_namespace_ownercheck(classform->relnamespace, GetUserId()))
1480-
aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)),
1503+
aclcheck_error(ACLCHECK_NOT_OWNER,
1504+
get_relkind_objtype(classform->relkind),
14811505
rel->relname);
14821506

14831507
/*
@@ -1486,7 +1510,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
14861510
* only concerns indexes of toast relations that became invalid during a
14871511
* REINDEX CONCURRENTLY process.
14881512
*/
1489-
if (IsSystemClass(relOid, classform) && relkind == RELKIND_INDEX)
1513+
if (IsSystemClass(relOid, classform) && classform->relkind == RELKIND_INDEX)
14901514
{
14911515
HeapTuple locTuple;
14921516
Form_pg_index indexform;
@@ -1522,9 +1546,10 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
15221546
* locking the index. index_drop() will need this anyway, and since
15231547
* regular queries lock tables before their indexes, we risk deadlock if
15241548
* we do it the other way around. No error if we don't find a pg_index
1525-
* entry, though --- the relation may have been dropped.
1549+
* entry, though --- the relation may have been dropped. Note that this
1550+
* code will execute for either plain or partitioned indexes.
15261551
*/
1527-
if ((relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX) &&
1552+
if (expected_relkind == RELKIND_INDEX &&
15281553
relOid != oldRelOid)
15291554
{
15301555
state->heapOid = IndexGetRelation(relOid, true);
@@ -1535,7 +1560,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
15351560
/*
15361561
* Similarly, if the relation is a partition, we must acquire lock on its
15371562
* parent before locking the partition. That's because queries lock the
1538-
* parent before its partitions, so we risk deadlock it we do it the other
1563+
* parent before its partitions, so we risk deadlock if we do it the other
15391564
* way around.
15401565
*/
15411566
if (is_partition && relOid != oldRelOid)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
Parsed test spec with 3 sessions
2+
3+
starting permutation: s1begin s1lock s2begin s2drop s1select s3getlocks s1commit s3getlocks s2commit
4+
step s1begin: BEGIN;
5+
step s1lock: LOCK TABLE part_drop_index_locking_subpart_child IN ACCESS SHARE MODE;
6+
step s2begin: BEGIN;
7+
step s2drop: DROP INDEX part_drop_index_locking_idx; <waiting ...>
8+
step s1select: SELECT * FROM part_drop_index_locking_subpart_child;
9+
id
10+
--
11+
(0 rows)
12+
13+
step s3getlocks:
14+
SELECT s.query, c.relname, l.mode, l.granted
15+
FROM pg_locks l
16+
JOIN pg_class c ON l.relation = c.oid
17+
JOIN pg_stat_activity s ON l.pid = s.pid
18+
WHERE c.relname LIKE 'part_drop_index_locking%'
19+
ORDER BY s.query, c.relname, l.mode, l.granted;
20+
21+
query |relname |mode |granted
22+
----------------------------------------------------+---------------------------------------------+-------------------+-------
23+
DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking |AccessExclusiveLock|t
24+
DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking_idx |AccessExclusiveLock|t
25+
DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking_subpart |AccessExclusiveLock|t
26+
DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking_subpart_child |AccessExclusiveLock|f
27+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child |AccessShareLock |t
28+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx |AccessShareLock |t
29+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx1|AccessShareLock |t
30+
(7 rows)
31+
32+
step s1commit: COMMIT;
33+
step s2drop: <... completed>
34+
step s3getlocks:
35+
SELECT s.query, c.relname, l.mode, l.granted
36+
FROM pg_locks l
37+
JOIN pg_class c ON l.relation = c.oid
38+
JOIN pg_stat_activity s ON l.pid = s.pid
39+
WHERE c.relname LIKE 'part_drop_index_locking%'
40+
ORDER BY s.query, c.relname, l.mode, l.granted;
41+
42+
query |relname |mode |granted
43+
---------------------------------------+--------------------------------------------+-------------------+-------
44+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking |AccessExclusiveLock|t
45+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_idx |AccessExclusiveLock|t
46+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart |AccessExclusiveLock|t
47+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart_child |AccessExclusiveLock|t
48+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart_child_id_idx|AccessExclusiveLock|t
49+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart_id_idx |AccessExclusiveLock|t
50+
(6 rows)
51+
52+
step s2commit: COMMIT;
53+
54+
starting permutation: s1begin s1lock s2begin s2dropsub s1select s3getlocks s1commit s3getlocks s2commit
55+
step s1begin: BEGIN;
56+
step s1lock: LOCK TABLE part_drop_index_locking_subpart_child IN ACCESS SHARE MODE;
57+
step s2begin: BEGIN;
58+
step s2dropsub: DROP INDEX part_drop_index_locking_subpart_idx; <waiting ...>
59+
step s1select: SELECT * FROM part_drop_index_locking_subpart_child;
60+
id
61+
--
62+
(0 rows)
63+
64+
step s3getlocks:
65+
SELECT s.query, c.relname, l.mode, l.granted
66+
FROM pg_locks l
67+
JOIN pg_class c ON l.relation = c.oid
68+
JOIN pg_stat_activity s ON l.pid = s.pid
69+
WHERE c.relname LIKE 'part_drop_index_locking%'
70+
ORDER BY s.query, c.relname, l.mode, l.granted;
71+
72+
query |relname |mode |granted
73+
----------------------------------------------------+---------------------------------------------+-------------------+-------
74+
DROP INDEX part_drop_index_locking_subpart_idx; |part_drop_index_locking_subpart |AccessExclusiveLock|t
75+
DROP INDEX part_drop_index_locking_subpart_idx; |part_drop_index_locking_subpart_child |AccessExclusiveLock|f
76+
DROP INDEX part_drop_index_locking_subpart_idx; |part_drop_index_locking_subpart_idx |AccessExclusiveLock|t
77+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child |AccessShareLock |t
78+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx |AccessShareLock |t
79+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx1|AccessShareLock |t
80+
(6 rows)
81+
82+
step s1commit: COMMIT;
83+
step s2dropsub: <... completed>
84+
step s3getlocks:
85+
SELECT s.query, c.relname, l.mode, l.granted
86+
FROM pg_locks l
87+
JOIN pg_class c ON l.relation = c.oid
88+
JOIN pg_stat_activity s ON l.pid = s.pid
89+
WHERE c.relname LIKE 'part_drop_index_locking%'
90+
ORDER BY s.query, c.relname, l.mode, l.granted;
91+
92+
query |relname |mode |granted
93+
-----------------------------------------------+---------------------------------------------+-------------------+-------
94+
DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart |AccessExclusiveLock|t
95+
DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart_child |AccessExclusiveLock|t
96+
DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart_child_id_idx1|AccessExclusiveLock|t
97+
DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart_idx |AccessExclusiveLock|t
98+
(4 rows)
99+
100+
step s2commit: COMMIT;

src/test/isolation/isolation_schedule

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ test: predicate-hash
8484
test: predicate-gist
8585
test: predicate-gin
8686
test: partition-concurrent-attach
87+
test: partition-drop-index-locking
8788
test: partition-key-update-1
8889
test: partition-key-update-2
8990
test: partition-key-update-3
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Verify that DROP INDEX properly locks all downward sub-partitions
2+
# and partitions before locking the indexes.
3+
4+
setup
5+
{
6+
CREATE TABLE part_drop_index_locking (id int) PARTITION BY RANGE(id);
7+
CREATE TABLE part_drop_index_locking_subpart PARTITION OF part_drop_index_locking FOR VALUES FROM (1) TO (100) PARTITION BY RANGE(id);
8+
CREATE TABLE part_drop_index_locking_subpart_child PARTITION OF part_drop_index_locking_subpart FOR VALUES FROM (1) TO (100);
9+
CREATE INDEX part_drop_index_locking_idx ON part_drop_index_locking(id);
10+
CREATE INDEX part_drop_index_locking_subpart_idx ON part_drop_index_locking_subpart(id);
11+
}
12+
13+
teardown
14+
{
15+
DROP TABLE part_drop_index_locking;
16+
}
17+
18+
# SELECT will take AccessShare lock first on the table and then on its index.
19+
# We can simulate the case where DROP INDEX starts between those steps
20+
# by manually taking the table lock beforehand.
21+
session s1
22+
step s1begin { BEGIN; }
23+
step s1lock { LOCK TABLE part_drop_index_locking_subpart_child IN ACCESS SHARE MODE; }
24+
step s1select { SELECT * FROM part_drop_index_locking_subpart_child; }
25+
step s1commit { COMMIT; }
26+
27+
session s2
28+
step s2begin { BEGIN; }
29+
step s2drop { DROP INDEX part_drop_index_locking_idx; }
30+
step s2dropsub { DROP INDEX part_drop_index_locking_subpart_idx; }
31+
step s2commit { COMMIT; }
32+
33+
session s3
34+
step s3getlocks {
35+
SELECT s.query, c.relname, l.mode, l.granted
36+
FROM pg_locks l
37+
JOIN pg_class c ON l.relation = c.oid
38+
JOIN pg_stat_activity s ON l.pid = s.pid
39+
WHERE c.relname LIKE 'part_drop_index_locking%'
40+
ORDER BY s.query, c.relname, l.mode, l.granted;
41+
}
42+
43+
# Run DROP INDEX on top partitioned table
44+
permutation s1begin s1lock s2begin s2drop(s1commit) s1select s3getlocks s1commit s3getlocks s2commit
45+
46+
# Run DROP INDEX on top sub-partition table
47+
permutation s1begin s1lock s2begin s2dropsub(s1commit) s1select s3getlocks s1commit s3getlocks s2commit

0 commit comments

Comments
 (0)