@@ -180,42 +180,6 @@ static struct super_block *cpuset_sb = NULL;
180
180
*/
181
181
182
182
static DECLARE_MUTEX (cpuset_sem );
183
- static struct task_struct * cpuset_sem_owner ;
184
- static int cpuset_sem_depth ;
185
-
186
- /*
187
- * The global cpuset semaphore cpuset_sem can be needed by the
188
- * memory allocator to update a tasks mems_allowed (see the calls
189
- * to cpuset_update_current_mems_allowed()) or to walk up the
190
- * cpuset hierarchy to find a mem_exclusive cpuset see the calls
191
- * to cpuset_excl_nodes_overlap()).
192
- *
193
- * But if the memory allocation is being done by cpuset.c code, it
194
- * usually already holds cpuset_sem. Double tripping on a kernel
195
- * semaphore deadlocks the current task, and any other task that
196
- * subsequently tries to obtain the lock.
197
- *
198
- * Run all up's and down's on cpuset_sem through the following
199
- * wrappers, which will detect this nested locking, and avoid
200
- * deadlocking.
201
- */
202
-
203
- static inline void cpuset_down (struct semaphore * psem )
204
- {
205
- if (cpuset_sem_owner != current ) {
206
- down (psem );
207
- cpuset_sem_owner = current ;
208
- }
209
- cpuset_sem_depth ++ ;
210
- }
211
-
212
- static inline void cpuset_up (struct semaphore * psem )
213
- {
214
- if (-- cpuset_sem_depth == 0 ) {
215
- cpuset_sem_owner = NULL ;
216
- up (psem );
217
- }
218
- }
219
183
220
184
/*
221
185
* A couple of forward declarations required, due to cyclic reference loop:
@@ -558,10 +522,19 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
558
522
* Refresh current tasks mems_allowed and mems_generation from
559
523
* current tasks cpuset. Call with cpuset_sem held.
560
524
*
561
- * This routine is needed to update the per-task mems_allowed
562
- * data, within the tasks context, when it is trying to allocate
563
- * memory (in various mm/mempolicy.c routines) and notices
564
- * that some other task has been modifying its cpuset.
525
+ * Be sure to call refresh_mems() on any cpuset operation which
526
+ * (1) holds cpuset_sem, and (2) might possibly alloc memory.
527
+ * Call after obtaining cpuset_sem lock, before any possible
528
+ * allocation. Otherwise one risks trying to allocate memory
529
+ * while the task cpuset_mems_generation is not the same as
530
+ * the mems_generation in its cpuset, which would deadlock on
531
+ * cpuset_sem in cpuset_update_current_mems_allowed().
532
+ *
533
+ * Since we hold cpuset_sem, once refresh_mems() is called, the
534
+ * test (current->cpuset_mems_generation != cs->mems_generation)
535
+ * in cpuset_update_current_mems_allowed() will remain false,
536
+ * until we drop cpuset_sem. Anyone else who would change our
537
+ * cpusets mems_generation needs to lock cpuset_sem first.
565
538
*/
566
539
567
540
static void refresh_mems (void )
@@ -867,7 +840,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
867
840
}
868
841
buffer [nbytes ] = 0 ; /* nul-terminate */
869
842
870
- cpuset_down (& cpuset_sem );
843
+ down (& cpuset_sem );
871
844
872
845
if (is_removed (cs )) {
873
846
retval = - ENODEV ;
@@ -901,7 +874,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
901
874
if (retval == 0 )
902
875
retval = nbytes ;
903
876
out2 :
904
- cpuset_up (& cpuset_sem );
877
+ up (& cpuset_sem );
905
878
cpuset_release_agent (pathbuf );
906
879
out1 :
907
880
kfree (buffer );
@@ -941,9 +914,9 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
941
914
{
942
915
cpumask_t mask ;
943
916
944
- cpuset_down (& cpuset_sem );
917
+ down (& cpuset_sem );
945
918
mask = cs -> cpus_allowed ;
946
- cpuset_up (& cpuset_sem );
919
+ up (& cpuset_sem );
947
920
948
921
return cpulist_scnprintf (page , PAGE_SIZE , mask );
949
922
}
@@ -952,9 +925,9 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
952
925
{
953
926
nodemask_t mask ;
954
927
955
- cpuset_down (& cpuset_sem );
928
+ down (& cpuset_sem );
956
929
mask = cs -> mems_allowed ;
957
- cpuset_up (& cpuset_sem );
930
+ up (& cpuset_sem );
958
931
959
932
return nodelist_scnprintf (page , PAGE_SIZE , mask );
960
933
}
@@ -1351,7 +1324,8 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1351
1324
if (!cs )
1352
1325
return - ENOMEM ;
1353
1326
1354
- cpuset_down (& cpuset_sem );
1327
+ down (& cpuset_sem );
1328
+ refresh_mems ();
1355
1329
cs -> flags = 0 ;
1356
1330
if (notify_on_release (parent ))
1357
1331
set_bit (CS_NOTIFY_ON_RELEASE , & cs -> flags );
@@ -1376,14 +1350,14 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1376
1350
* will down() this new directory's i_sem and if we race with
1377
1351
* another mkdir, we might deadlock.
1378
1352
*/
1379
- cpuset_up (& cpuset_sem );
1353
+ up (& cpuset_sem );
1380
1354
1381
1355
err = cpuset_populate_dir (cs -> dentry );
1382
1356
/* If err < 0, we have a half-filled directory - oh well ;) */
1383
1357
return 0 ;
1384
1358
err :
1385
1359
list_del (& cs -> sibling );
1386
- cpuset_up (& cpuset_sem );
1360
+ up (& cpuset_sem );
1387
1361
kfree (cs );
1388
1362
return err ;
1389
1363
}
@@ -1405,13 +1379,14 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1405
1379
1406
1380
/* the vfs holds both inode->i_sem already */
1407
1381
1408
- cpuset_down (& cpuset_sem );
1382
+ down (& cpuset_sem );
1383
+ refresh_mems ();
1409
1384
if (atomic_read (& cs -> count ) > 0 ) {
1410
- cpuset_up (& cpuset_sem );
1385
+ up (& cpuset_sem );
1411
1386
return - EBUSY ;
1412
1387
}
1413
1388
if (!list_empty (& cs -> children )) {
1414
- cpuset_up (& cpuset_sem );
1389
+ up (& cpuset_sem );
1415
1390
return - EBUSY ;
1416
1391
}
1417
1392
parent = cs -> parent ;
@@ -1427,7 +1402,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1427
1402
spin_unlock (& d -> d_lock );
1428
1403
cpuset_d_remove_dir (d );
1429
1404
dput (d );
1430
- cpuset_up (& cpuset_sem );
1405
+ up (& cpuset_sem );
1431
1406
cpuset_release_agent (pathbuf );
1432
1407
return 0 ;
1433
1408
}
@@ -1530,10 +1505,10 @@ void cpuset_exit(struct task_struct *tsk)
1530
1505
if (notify_on_release (cs )) {
1531
1506
char * pathbuf = NULL ;
1532
1507
1533
- cpuset_down (& cpuset_sem );
1508
+ down (& cpuset_sem );
1534
1509
if (atomic_dec_and_test (& cs -> count ))
1535
1510
check_for_release (cs , & pathbuf );
1536
- cpuset_up (& cpuset_sem );
1511
+ up (& cpuset_sem );
1537
1512
cpuset_release_agent (pathbuf );
1538
1513
} else {
1539
1514
atomic_dec (& cs -> count );
@@ -1554,11 +1529,11 @@ cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk)
1554
1529
{
1555
1530
cpumask_t mask ;
1556
1531
1557
- cpuset_down (& cpuset_sem );
1532
+ down (& cpuset_sem );
1558
1533
task_lock ((struct task_struct * )tsk );
1559
1534
guarantee_online_cpus (tsk -> cpuset , & mask );
1560
1535
task_unlock ((struct task_struct * )tsk );
1561
- cpuset_up (& cpuset_sem );
1536
+ up (& cpuset_sem );
1562
1537
1563
1538
return mask ;
1564
1539
}
@@ -1583,9 +1558,9 @@ void cpuset_update_current_mems_allowed(void)
1583
1558
if (!cs )
1584
1559
return ; /* task is exiting */
1585
1560
if (current -> cpuset_mems_generation != cs -> mems_generation ) {
1586
- cpuset_down (& cpuset_sem );
1561
+ down (& cpuset_sem );
1587
1562
refresh_mems ();
1588
- cpuset_up (& cpuset_sem );
1563
+ up (& cpuset_sem );
1589
1564
}
1590
1565
}
1591
1566
@@ -1684,14 +1659,14 @@ int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
1684
1659
return 0 ;
1685
1660
1686
1661
/* Not hardwall and node outside mems_allowed: scan up cpusets */
1687
- cpuset_down (& cpuset_sem );
1662
+ down (& cpuset_sem );
1688
1663
cs = current -> cpuset ;
1689
1664
if (!cs )
1690
1665
goto done ; /* current task exiting */
1691
1666
cs = nearest_exclusive_ancestor (cs );
1692
1667
allowed = node_isset (node , cs -> mems_allowed );
1693
1668
done :
1694
- cpuset_up (& cpuset_sem );
1669
+ up (& cpuset_sem );
1695
1670
return allowed ;
1696
1671
}
1697
1672
@@ -1712,7 +1687,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1712
1687
const struct cpuset * cs1 , * cs2 ; /* my and p's cpuset ancestors */
1713
1688
int overlap = 0 ; /* do cpusets overlap? */
1714
1689
1715
- cpuset_down (& cpuset_sem );
1690
+ down (& cpuset_sem );
1716
1691
cs1 = current -> cpuset ;
1717
1692
if (!cs1 )
1718
1693
goto done ; /* current task exiting */
@@ -1723,7 +1698,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1723
1698
cs2 = nearest_exclusive_ancestor (cs2 );
1724
1699
overlap = nodes_intersects (cs1 -> mems_allowed , cs2 -> mems_allowed );
1725
1700
done :
1726
- cpuset_up (& cpuset_sem );
1701
+ up (& cpuset_sem );
1727
1702
1728
1703
return overlap ;
1729
1704
}
@@ -1746,7 +1721,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1746
1721
return - ENOMEM ;
1747
1722
1748
1723
tsk = m -> private ;
1749
- cpuset_down (& cpuset_sem );
1724
+ down (& cpuset_sem );
1750
1725
task_lock (tsk );
1751
1726
cs = tsk -> cpuset ;
1752
1727
task_unlock (tsk );
@@ -1761,7 +1736,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1761
1736
seq_puts (m , buf );
1762
1737
seq_putc (m , '\n' );
1763
1738
out :
1764
- cpuset_up (& cpuset_sem );
1739
+ up (& cpuset_sem );
1765
1740
kfree (buf );
1766
1741
return retval ;
1767
1742
}
0 commit comments