Skip to content

Commit 6670e15

Browse files
shemmingerdavem330
authored andcommitted
tcp: Namespace-ify sysctl_tcp_default_congestion_control
Make default TCP default congestion control to a per namespace value. This changes default congestion control to a pointer to congestion ops (rather than implicit as first element of available lsit). The congestion control setting of new namespaces is inherited from the current setting of the root namespace. Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 11bf284 commit 6670e15

File tree

7 files changed

+64
-54
lines changed

7 files changed

+64
-54
lines changed

include/net/netns/ipv4.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ struct netns_ipv4 {
160160
struct inet_timewait_death_row tcp_death_row;
161161
int sysctl_max_syn_backlog;
162162
int sysctl_tcp_fastopen;
163+
const struct tcp_congestion_ops __rcu *tcp_congestion_control;
163164
struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
164165
spinlock_t tcp_fastopen_ctx_lock;
165166
unsigned int sysctl_tcp_fastopen_blackhole_timeout;

include/net/tcp.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,8 +1002,8 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
10021002
void tcp_assign_congestion_control(struct sock *sk);
10031003
void tcp_init_congestion_control(struct sock *sk);
10041004
void tcp_cleanup_congestion_control(struct sock *sk);
1005-
int tcp_set_default_congestion_control(const char *name);
1006-
void tcp_get_default_congestion_control(char *name);
1005+
int tcp_set_default_congestion_control(struct net *net, const char *name);
1006+
void tcp_get_default_congestion_control(struct net *net, char *name);
10071007
void tcp_get_available_congestion_control(char *buf, size_t len);
10081008
void tcp_get_allowed_congestion_control(char *buf, size_t len);
10091009
int tcp_set_allowed_congestion_control(char *allowed);
@@ -1017,7 +1017,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
10171017
extern struct tcp_congestion_ops tcp_reno;
10181018

10191019
struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
1020-
u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca);
1020+
u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca);
10211021
#ifdef CONFIG_INET
10221022
char *tcp_ca_get_name_by_key(u32 key, char *buffer);
10231023
#else

net/ipv4/fib_semantics.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
710710
bool ecn_ca = false;
711711

712712
nla_strlcpy(tmp, nla, sizeof(tmp));
713-
val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
713+
val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
714714
} else {
715715
val = nla_get_u32(nla);
716716
}
@@ -1030,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
10301030
char tmp[TCP_CA_NAME_MAX];
10311031

10321032
nla_strlcpy(tmp, nla, sizeof(tmp));
1033-
val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1033+
val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
10341034
if (val == TCP_CA_UNSPEC)
10351035
return -EINVAL;
10361036
} else {

net/ipv4/sysctl_net_ipv4.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -201,18 +201,20 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
201201
static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
202202
void __user *buffer, size_t *lenp, loff_t *ppos)
203203
{
204+
struct net *net = container_of(ctl->data, struct net,
205+
ipv4.tcp_congestion_control);
204206
char val[TCP_CA_NAME_MAX];
205207
struct ctl_table tbl = {
206208
.data = val,
207209
.maxlen = TCP_CA_NAME_MAX,
208210
};
209211
int ret;
210212

211-
tcp_get_default_congestion_control(val);
213+
tcp_get_default_congestion_control(net, val);
212214

213215
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
214216
if (write && ret == 0)
215-
ret = tcp_set_default_congestion_control(val);
217+
ret = tcp_set_default_congestion_control(net, val);
216218
return ret;
217219
}
218220

@@ -447,12 +449,6 @@ static struct ctl_table ipv4_table[] = {
447449
.mode = 0644,
448450
.proc_handler = proc_dointvec
449451
},
450-
{
451-
.procname = "tcp_congestion_control",
452-
.mode = 0644,
453-
.maxlen = TCP_CA_NAME_MAX,
454-
.proc_handler = proc_tcp_congestion_control,
455-
},
456452
#ifdef CONFIG_NETLABEL
457453
{
458454
.procname = "cipso_cache_enable",
@@ -763,6 +759,13 @@ static struct ctl_table ipv4_net_table[] = {
763759
.extra1 = &one
764760
},
765761
#endif
762+
{
763+
.procname = "tcp_congestion_control",
764+
.data = &init_net.ipv4.tcp_congestion_control,
765+
.mode = 0644,
766+
.maxlen = TCP_CA_NAME_MAX,
767+
.proc_handler = proc_tcp_congestion_control,
768+
},
766769
{
767770
.procname = "tcp_keepalive_time",
768771
.data = &init_net.ipv4.sysctl_tcp_keepalive_time,

net/ipv4/tcp_cong.c

Lines changed: 36 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
3333
}
3434

3535
/* Must be called with rcu lock held */
36-
static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name)
36+
static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
37+
const char *name)
3738
{
38-
const struct tcp_congestion_ops *ca = tcp_ca_find(name);
39+
struct tcp_congestion_ops *ca = tcp_ca_find(name);
40+
3941
#ifdef CONFIG_MODULES
4042
if (!ca && capable(CAP_NET_ADMIN)) {
4143
rcu_read_unlock();
@@ -115,15 +117,15 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
115117
}
116118
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
117119

118-
u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
120+
u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
119121
{
120122
const struct tcp_congestion_ops *ca;
121123
u32 key = TCP_CA_UNSPEC;
122124

123125
might_sleep();
124126

125127
rcu_read_lock();
126-
ca = __tcp_ca_find_autoload(name);
128+
ca = tcp_ca_find_autoload(net, name);
127129
if (ca) {
128130
key = ca->key;
129131
*ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
@@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
153155
/* Assign choice of congestion control. */
154156
void tcp_assign_congestion_control(struct sock *sk)
155157
{
158+
struct net *net = sock_net(sk);
156159
struct inet_connection_sock *icsk = inet_csk(sk);
157-
struct tcp_congestion_ops *ca;
160+
const struct tcp_congestion_ops *ca;
158161

159162
rcu_read_lock();
160-
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
161-
if (likely(try_module_get(ca->owner))) {
162-
icsk->icsk_ca_ops = ca;
163-
goto out;
164-
}
165-
/* Fallback to next available. The last really
166-
* guaranteed fallback is Reno from this list.
167-
*/
168-
}
169-
out:
163+
ca = rcu_dereference(net->ipv4.tcp_congestion_control);
164+
if (unlikely(!try_module_get(ca->owner)))
165+
ca = &tcp_reno;
166+
icsk->icsk_ca_ops = ca;
170167
rcu_read_unlock();
171-
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
172168

169+
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
173170
if (ca->flags & TCP_CONG_NEEDS_ECN)
174171
INET_ECN_xmit(sk);
175172
else
@@ -214,37 +211,36 @@ void tcp_cleanup_congestion_control(struct sock *sk)
214211
}
215212

216213
/* Used by sysctl to change default congestion control */
217-
int tcp_set_default_congestion_control(const char *name)
214+
int tcp_set_default_congestion_control(struct net *net, const char *name)
218215
{
219216
struct tcp_congestion_ops *ca;
220-
int ret = -ENOENT;
221-
222-
spin_lock(&tcp_cong_list_lock);
223-
ca = tcp_ca_find(name);
224-
#ifdef CONFIG_MODULES
225-
if (!ca && capable(CAP_NET_ADMIN)) {
226-
spin_unlock(&tcp_cong_list_lock);
217+
const struct tcp_congestion_ops *prev;
218+
int ret;
227219

228-
request_module("tcp_%s", name);
229-
spin_lock(&tcp_cong_list_lock);
230-
ca = tcp_ca_find(name);
231-
}
232-
#endif
220+
rcu_read_lock();
221+
ca = tcp_ca_find_autoload(net, name);
222+
if (!ca) {
223+
ret = -ENOENT;
224+
} else if (!try_module_get(ca->owner)) {
225+
ret = -EBUSY;
226+
} else {
227+
prev = xchg(&net->ipv4.tcp_congestion_control, ca);
228+
if (prev)
229+
module_put(prev->owner);
233230

234-
if (ca) {
235-
ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
236-
list_move(&ca->list, &tcp_cong_list);
231+
ca->flags |= TCP_CONG_NON_RESTRICTED;
237232
ret = 0;
238233
}
239-
spin_unlock(&tcp_cong_list_lock);
234+
rcu_read_unlock();
240235

241236
return ret;
242237
}
243238

244239
/* Set default value from kernel configuration at bootup */
245240
static int __init tcp_congestion_default(void)
246241
{
247-
return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
242+
return tcp_set_default_congestion_control(&init_net,
243+
CONFIG_DEFAULT_TCP_CONG);
248244
}
249245
late_initcall(tcp_congestion_default);
250246

@@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen)
264260
}
265261

266262
/* Get current default congestion control */
267-
void tcp_get_default_congestion_control(char *name)
263+
void tcp_get_default_congestion_control(struct net *net, char *name)
268264
{
269-
struct tcp_congestion_ops *ca;
270-
/* We will always have reno... */
271-
BUG_ON(list_empty(&tcp_cong_list));
265+
const struct tcp_congestion_ops *ca;
272266

273267
rcu_read_lock();
274-
ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list);
268+
ca = rcu_dereference(net->ipv4.tcp_congestion_control);
275269
strncpy(name, ca->name, TCP_CA_NAME_MAX);
276270
rcu_read_unlock();
277271
}
@@ -351,12 +345,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo
351345
if (!load)
352346
ca = tcp_ca_find(name);
353347
else
354-
ca = __tcp_ca_find_autoload(name);
348+
ca = tcp_ca_find_autoload(sock_net(sk), name);
349+
355350
/* No change asking for existing value */
356351
if (ca == icsk->icsk_ca_ops) {
357352
icsk->icsk_ca_setsockopt = 1;
358353
goto out;
359354
}
355+
360356
if (!ca) {
361357
err = -ENOENT;
362358
} else if (!load) {

net/ipv4/tcp_ipv4.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2430,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
24302430
{
24312431
int cpu;
24322432

2433+
module_put(net->ipv4.tcp_congestion_control->owner);
2434+
24332435
for_each_possible_cpu(cpu)
24342436
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
24352437
free_percpu(net->ipv4.tcp_sk);
@@ -2522,6 +2524,13 @@ static int __net_init tcp_sk_init(struct net *net)
25222524
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
25232525
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
25242526

2527+
/* Reno is always built in */
2528+
if (!net_eq(net, &init_net) &&
2529+
try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2530+
net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2531+
else
2532+
net->ipv4.tcp_congestion_control = &tcp_reno;
2533+
25252534
return 0;
25262535
fail:
25272536
tcp_sk_exit(net);

net/ipv6/route.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2378,6 +2378,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
23782378
static int ip6_convert_metrics(struct mx6_config *mxc,
23792379
const struct fib6_config *cfg)
23802380
{
2381+
struct net *net = cfg->fc_nlinfo.nl_net;
23812382
bool ecn_ca = false;
23822383
struct nlattr *nla;
23832384
int remaining;
@@ -2403,7 +2404,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
24032404
char tmp[TCP_CA_NAME_MAX];
24042405

24052406
nla_strlcpy(tmp, nla, sizeof(tmp));
2406-
val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
2407+
val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
24072408
if (val == TCP_CA_UNSPEC)
24082409
goto err;
24092410
} else {

0 commit comments

Comments
 (0)