Skip to content

Commit 885842d

Browse files
committed
Merge branch 'Replacing-net_mutex-with-rw_semaphore'
Kirill Tkhai says: ==================== Replacing net_mutex with rw_semaphore this is the third version of the patchset introducing net_sem instead of net_mutex. The patchset adds net_sem in addition to net_mutex and allows pernet_operations to be "async". This flag means, the pernet_operations methods are safe to be executed with any other pernet_operations (un)initializing another net. If there are only async pernet_operations in the system, net_mutex is not used either for setup_net() or for cleanup_net(). The pernet_operations converted in this patchset allow to create minimal .config to have network working, and the changes improve the performance like you may see below: %for i in {1..10000}; do unshare -n bash -c exit; done *before* real 1m40,377s user 0m9,672s sys 0m19,928s *after* real 0m17,007s user 0m5,311s sys 0m11,779 (5.8 times faster) In the future, when all pernet_operations become async, we'll just remove this "async" field tree-wide. All the new logic is concentrated in patches [1-5/32]. The rest of patches converts specific operations: review, rationale of they can be converted, and setting of async flag. Kirill v3: Improved patches descriptions. Added comment into [5/32]. Added [32/32] converting netlink_tap_net_ops (new pernet operations introduced in 2018). v2: Single patch -> patchset with rationale of every conversion ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents cf19e5e + b86b47a commit 885842d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+116
-41
lines changed

drivers/net/loopback.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,4 +230,5 @@ static __net_init int loopback_net_init(struct net *net)
230230
/* Registered in net/core/dev.c */
231231
struct pernet_operations __net_initdata loopback_net_ops = {
232232
.init = loopback_net_init,
233+
.async = true,
233234
};

fs/proc/proc_net.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ static __net_exit void proc_net_ns_exit(struct net *net)
237237
static struct pernet_operations __net_initdata proc_net_ns_ops = {
238238
.init = proc_net_ns_init,
239239
.exit = proc_net_ns_exit,
240+
.async = true,
240241
};
241242

242243
int __init proc_net_init(void)

include/linux/rtnetlink.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ extern int rtnl_is_locked(void);
3636

3737
extern wait_queue_head_t netdev_unregistering_wq;
3838
extern struct mutex net_mutex;
39+
extern struct rw_semaphore net_sem;
3940

4041
#ifdef CONFIG_PROVE_LOCKING
4142
extern bool lockdep_rtnl_is_held(void);

include/net/net_namespace.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,12 @@ struct pernet_operations {
313313
void (*exit_batch)(struct list_head *net_exit_list);
314314
unsigned int *id;
315315
size_t size;
316+
/*
317+
* Indicates above methods are allowed to be executed in parallel
318+
* with methods of any other pernet_operations, i.e. they are not
319+
* need synchronization via net_mutex.
320+
*/
321+
bool async;
316322
};
317323

318324
/*

kernel/audit.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1526,6 +1526,7 @@ static struct pernet_operations audit_net_ops __net_initdata = {
15261526
.exit = audit_net_exit,
15271527
.id = &audit_net_id,
15281528
.size = sizeof(struct audit_net),
1529+
.async = true,
15291530
};
15301531

15311532
/* Initialize audit support at boot time. */

lib/kobject_uevent.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ static void uevent_net_exit(struct net *net)
650650
static struct pernet_operations uevent_net_ops = {
651651
.init = uevent_net_init,
652652
.exit = uevent_net_exit,
653+
.async = true,
653654
};
654655

655656
static int __init kobject_uevent_init(void)

net/core/dev.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8833,6 +8833,7 @@ static void __net_exit netdev_exit(struct net *net)
88338833
static struct pernet_operations __net_initdata netdev_net_ops = {
88348834
.init = netdev_init,
88358835
.exit = netdev_exit,
8836+
.async = true,
88368837
};
88378838

88388839
static void __net_exit default_device_exit(struct net *net)
@@ -8933,6 +8934,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
89338934
static struct pernet_operations __net_initdata default_device_ops = {
89348935
.exit = default_device_exit,
89358936
.exit_batch = default_device_exit_batch,
8937+
.async = true,
89368938
};
89378939

89388940
/*

net/core/fib_notifier.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ static void __net_exit fib_notifier_net_exit(struct net *net)
171171
static struct pernet_operations fib_notifier_net_ops = {
172172
.init = fib_notifier_net_init,
173173
.exit = fib_notifier_net_exit,
174+
.async = true,
174175
};
175176

176177
static int __init fib_notifier_init(void)

net/core/fib_rules.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,7 @@ static void __net_exit fib_rules_net_exit(struct net *net)
10301030
static struct pernet_operations fib_rules_net_ops = {
10311031
.init = fib_rules_net_init,
10321032
.exit = fib_rules_net_exit,
1033+
.async = true,
10331034
};
10341035

10351036
static int __init fib_rules_init(void)

net/core/net-procfs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ static void __net_exit dev_proc_net_exit(struct net *net)
349349
static struct pernet_operations __net_initdata dev_proc_ops = {
350350
.init = dev_proc_net_init,
351351
.exit = dev_proc_net_exit,
352+
.async = true,
352353
};
353354

354355
static int dev_mc_seq_show(struct seq_file *seq, void *v)
@@ -405,6 +406,7 @@ static void __net_exit dev_mc_net_exit(struct net *net)
405406
static struct pernet_operations __net_initdata dev_mc_net_ops = {
406407
.init = dev_mc_net_init,
407408
.exit = dev_mc_net_exit,
409+
.async = true,
408410
};
409411

410412
int __init dev_proc_init(void)

net/core/net_namespace.c

Lines changed: 56 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
static LIST_HEAD(pernet_list);
3131
static struct list_head *first_device = &pernet_list;
32+
/* Used only if there are !async pernet_operations registered */
3233
DEFINE_MUTEX(net_mutex);
3334

3435
LIST_HEAD(net_namespace_list);
@@ -41,6 +42,12 @@ struct net init_net = {
4142
EXPORT_SYMBOL(init_net);
4243

4344
static bool init_net_initialized;
45+
static unsigned nr_sync_pernet_ops;
46+
/*
47+
* net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops,
48+
* init_net_initialized and first_device pointer.
49+
*/
50+
DECLARE_RWSEM(net_sem);
4451

4552
#define MIN_PERNET_OPS_ID \
4653
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -65,11 +72,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
6572
{
6673
struct net_generic *ng, *old_ng;
6774

68-
BUG_ON(!mutex_is_locked(&net_mutex));
6975
BUG_ON(id < MIN_PERNET_OPS_ID);
7076

7177
old_ng = rcu_dereference_protected(net->gen,
72-
lockdep_is_held(&net_mutex));
78+
lockdep_is_held(&net_sem));
7379
if (old_ng->s.len > id) {
7480
old_ng->ptr[id] = data;
7581
return 0;
@@ -286,7 +292,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
286292
*/
287293
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
288294
{
289-
/* Must be called with net_mutex held */
295+
/* Must be called with net_sem held */
290296
const struct pernet_operations *ops, *saved_ops;
291297
int error = 0;
292298
LIST_HEAD(net_exit_list);
@@ -303,6 +309,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
303309
if (error < 0)
304310
goto out_undo;
305311
}
312+
rtnl_lock();
313+
list_add_tail_rcu(&net->list, &net_namespace_list);
314+
rtnl_unlock();
306315
out:
307316
return error;
308317

@@ -331,6 +340,7 @@ static int __net_init net_defaults_init_net(struct net *net)
331340

332341
static struct pernet_operations net_defaults_ops = {
333342
.init = net_defaults_init_net,
343+
.async = true,
334344
};
335345

336346
static __init int net_defaults_init(void)
@@ -408,32 +418,32 @@ struct net *copy_net_ns(unsigned long flags,
408418

409419
net = net_alloc();
410420
if (!net) {
411-
dec_net_namespaces(ucounts);
412-
return ERR_PTR(-ENOMEM);
421+
rv = -ENOMEM;
422+
goto dec_ucounts;
413423
}
414-
424+
refcount_set(&net->passive, 1);
425+
net->ucounts = ucounts;
415426
get_user_ns(user_ns);
416427

417-
rv = mutex_lock_killable(&net_mutex);
418-
if (rv < 0) {
419-
net_free(net);
420-
dec_net_namespaces(ucounts);
421-
put_user_ns(user_ns);
422-
return ERR_PTR(rv);
428+
rv = down_read_killable(&net_sem);
429+
if (rv < 0)
430+
goto put_userns;
431+
if (nr_sync_pernet_ops) {
432+
rv = mutex_lock_killable(&net_mutex);
433+
if (rv < 0)
434+
goto up_read;
423435
}
424-
425-
net->ucounts = ucounts;
426436
rv = setup_net(net, user_ns);
427-
if (rv == 0) {
428-
rtnl_lock();
429-
list_add_tail_rcu(&net->list, &net_namespace_list);
430-
rtnl_unlock();
431-
}
432-
mutex_unlock(&net_mutex);
437+
if (nr_sync_pernet_ops)
438+
mutex_unlock(&net_mutex);
439+
up_read:
440+
up_read(&net_sem);
433441
if (rv < 0) {
434-
dec_net_namespaces(ucounts);
442+
put_userns:
435443
put_user_ns(user_ns);
436444
net_drop_ns(net);
445+
dec_ucounts:
446+
dec_net_namespaces(ucounts);
437447
return ERR_PTR(rv);
438448
}
439449
return net;
@@ -481,7 +491,9 @@ static void cleanup_net(struct work_struct *work)
481491
list_replace_init(&cleanup_list, &net_kill_list);
482492
spin_unlock_irq(&cleanup_list_lock);
483493

484-
mutex_lock(&net_mutex);
494+
down_read(&net_sem);
495+
if (nr_sync_pernet_ops)
496+
mutex_lock(&net_mutex);
485497

486498
/* Don't let anyone else find us. */
487499
rtnl_lock();
@@ -516,11 +528,14 @@ static void cleanup_net(struct work_struct *work)
516528
list_for_each_entry_reverse(ops, &pernet_list, list)
517529
ops_exit_list(ops, &net_exit_list);
518530

531+
if (nr_sync_pernet_ops)
532+
mutex_unlock(&net_mutex);
533+
519534
/* Free the net generic variables */
520535
list_for_each_entry_reverse(ops, &pernet_list, list)
521536
ops_free_list(ops, &net_exit_list);
522537

523-
mutex_unlock(&net_mutex);
538+
up_read(&net_sem);
524539

525540
/* Ensure there are no outstanding rcu callbacks using this
526541
* network namespace.
@@ -547,8 +562,10 @@ static void cleanup_net(struct work_struct *work)
547562
*/
548563
void net_ns_barrier(void)
549564
{
565+
down_write(&net_sem);
550566
mutex_lock(&net_mutex);
551567
mutex_unlock(&net_mutex);
568+
up_write(&net_sem);
552569
}
553570
EXPORT_SYMBOL(net_ns_barrier);
554571

@@ -633,6 +650,7 @@ static __net_exit void net_ns_net_exit(struct net *net)
633650
static struct pernet_operations __net_initdata net_ns_ops = {
634651
.init = net_ns_net_init,
635652
.exit = net_ns_net_exit,
653+
.async = true,
636654
};
637655

638656
static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
@@ -875,17 +893,12 @@ static int __init net_ns_init(void)
875893

876894
rcu_assign_pointer(init_net.gen, ng);
877895

878-
mutex_lock(&net_mutex);
896+
down_write(&net_sem);
879897
if (setup_net(&init_net, &init_user_ns))
880898
panic("Could not setup the initial network namespace");
881899

882900
init_net_initialized = true;
883-
884-
rtnl_lock();
885-
list_add_tail_rcu(&init_net.list, &net_namespace_list);
886-
rtnl_unlock();
887-
888-
mutex_unlock(&net_mutex);
901+
up_write(&net_sem);
889902

890903
register_pernet_subsys(&net_ns_ops);
891904

@@ -989,14 +1002,18 @@ static int register_pernet_operations(struct list_head *list,
9891002
rcu_barrier();
9901003
if (ops->id)
9911004
ida_remove(&net_generic_ids, *ops->id);
1005+
} else if (!ops->async) {
1006+
pr_info_once("Pernet operations %ps are sync.\n", ops);
1007+
nr_sync_pernet_ops++;
9921008
}
9931009

9941010
return error;
9951011
}
9961012

9971013
static void unregister_pernet_operations(struct pernet_operations *ops)
9981014
{
999-
1015+
if (!ops->async)
1016+
BUG_ON(nr_sync_pernet_ops-- == 0);
10001017
__unregister_pernet_operations(ops);
10011018
rcu_barrier();
10021019
if (ops->id)
@@ -1025,9 +1042,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
10251042
int register_pernet_subsys(struct pernet_operations *ops)
10261043
{
10271044
int error;
1028-
mutex_lock(&net_mutex);
1045+
down_write(&net_sem);
10291046
error = register_pernet_operations(first_device, ops);
1030-
mutex_unlock(&net_mutex);
1047+
up_write(&net_sem);
10311048
return error;
10321049
}
10331050
EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1043,9 +1060,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
10431060
*/
10441061
void unregister_pernet_subsys(struct pernet_operations *ops)
10451062
{
1046-
mutex_lock(&net_mutex);
1063+
down_write(&net_sem);
10471064
unregister_pernet_operations(ops);
1048-
mutex_unlock(&net_mutex);
1065+
up_write(&net_sem);
10491066
}
10501067
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
10511068

@@ -1071,11 +1088,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
10711088
int register_pernet_device(struct pernet_operations *ops)
10721089
{
10731090
int error;
1074-
mutex_lock(&net_mutex);
1091+
down_write(&net_sem);
10751092
error = register_pernet_operations(&pernet_list, ops);
10761093
if (!error && (first_device == &pernet_list))
10771094
first_device = &ops->list;
1078-
mutex_unlock(&net_mutex);
1095+
up_write(&net_sem);
10791096
return error;
10801097
}
10811098
EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1091,11 +1108,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
10911108
*/
10921109
void unregister_pernet_device(struct pernet_operations *ops)
10931110
{
1094-
mutex_lock(&net_mutex);
1111+
down_write(&net_sem);
10951112
if (&ops->list == first_device)
10961113
first_device = first_device->next;
10971114
unregister_pernet_operations(ops);
1098-
mutex_unlock(&net_mutex);
1115+
up_write(&net_sem);
10991116
}
11001117
EXPORT_SYMBOL_GPL(unregister_pernet_device);
11011118

net/core/rtnetlink.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -454,11 +454,11 @@ static void rtnl_lock_unregistering_all(void)
454454
void rtnl_link_unregister(struct rtnl_link_ops *ops)
455455
{
456456
/* Close the race with cleanup_net() */
457-
mutex_lock(&net_mutex);
457+
down_write(&net_sem);
458458
rtnl_lock_unregistering_all();
459459
__rtnl_link_unregister(ops);
460460
rtnl_unlock();
461-
mutex_unlock(&net_mutex);
461+
up_write(&net_sem);
462462
}
463463
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
464464

@@ -4724,6 +4724,7 @@ static void __net_exit rtnetlink_net_exit(struct net *net)
47244724
static struct pernet_operations rtnetlink_net_ops = {
47254725
.init = rtnetlink_net_init,
47264726
.exit = rtnetlink_net_exit,
4727+
.async = true,
47274728
};
47284729

47294730
void __init rtnetlink_init(void)

net/core/sock.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3112,6 +3112,7 @@ static void __net_exit sock_inuse_exit_net(struct net *net)
31123112
static struct pernet_operations net_inuse_ops = {
31133113
.init = sock_inuse_init_net,
31143114
.exit = sock_inuse_exit_net,
3115+
.async = true,
31153116
};
31163117

31173118
static __init int net_inuse_init(void)
@@ -3385,6 +3386,7 @@ static __net_exit void proto_exit_net(struct net *net)
33853386
static __net_initdata struct pernet_operations proto_net_ops = {
33863387
.init = proto_init_net,
33873388
.exit = proto_exit_net,
3389+
.async = true,
33883390
};
33893391

33903392
static int __init proto_init(void)

net/core/sock_diag.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ static void __net_exit diag_net_exit(struct net *net)
328328
static struct pernet_operations diag_net_ops = {
329329
.init = diag_net_init,
330330
.exit = diag_net_exit,
331+
.async = true,
331332
};
332333

333334
static int __init sock_diag_init(void)

0 commit comments

Comments
 (0)