Skip to content

Commit c0303ef

Browse files
netoptimizerdavem330
authored andcommitted
net: reduce cycles spend on ICMP replies that gets rate limited
This patch split the global and per (inet)peer ICMP-reply limiter code, and moves the global limit check to earlier in the packet processing path. Thus, avoid spending cycles on ICMP replies that gets limited/suppressed anyhow. The global ICMP rate limiter icmp_global_allow() is a good solution, it just happens too late in the process. The kernel goes through the full route lookup (return path) for the ICMP message, before taking the rate limit decision of not sending the ICMP reply. Details: The kernels global rate limiter for ICMP messages got added in commit 4cdf507 ("icmp: add a global rate limitation"). It is a token bucket limiter with a global lock. It brilliantly avoids locking congestion by only updating when 20ms (HZ/50) were elapsed. It can then avoids taking lock when credit is exhausted (when under pressure) and time constraint for refill is not yet meet. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 8d9ba38 commit c0303ef

File tree

2 files changed

+82
-38
lines changed

2 files changed

+82
-38
lines changed

net/ipv4/icmp.c

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,33 @@ bool icmp_global_allow(void)
282282
}
283283
EXPORT_SYMBOL(icmp_global_allow);
284284

285+
static bool icmpv4_mask_allow(struct net *net, int type, int code)
286+
{
287+
if (type > NR_ICMP_TYPES)
288+
return true;
289+
290+
/* Don't limit PMTU discovery. */
291+
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
292+
return true;
293+
294+
/* Limit if icmp type is enabled in ratemask. */
295+
if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
296+
return true;
297+
298+
return false;
299+
}
300+
301+
static bool icmpv4_global_allow(struct net *net, int type, int code)
302+
{
303+
if (icmpv4_mask_allow(net, type, code))
304+
return true;
305+
306+
if (icmp_global_allow())
307+
return true;
308+
309+
return false;
310+
}
311+
285312
/*
286313
* Send an ICMP frame.
287314
*/
@@ -290,34 +317,22 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
290317
struct flowi4 *fl4, int type, int code)
291318
{
292319
struct dst_entry *dst = &rt->dst;
320+
struct inet_peer *peer;
293321
bool rc = true;
322+
int vif;
294323

295-
if (type > NR_ICMP_TYPES)
296-
goto out;
297-
298-
/* Don't limit PMTU discovery. */
299-
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
324+
if (icmpv4_mask_allow(net, type, code))
300325
goto out;
301326

302327
/* No rate limit on loopback */
303328
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
304329
goto out;
305330

306-
/* Limit if icmp type is enabled in ratemask. */
307-
if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
308-
goto out;
309-
310-
rc = false;
311-
if (icmp_global_allow()) {
312-
int vif = l3mdev_master_ifindex(dst->dev);
313-
struct inet_peer *peer;
314-
315-
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
316-
rc = inet_peer_xrlim_allow(peer,
317-
net->ipv4.sysctl_icmp_ratelimit);
318-
if (peer)
319-
inet_putpeer(peer);
320-
}
331+
vif = l3mdev_master_ifindex(dst->dev);
332+
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
333+
rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit);
334+
if (peer)
335+
inet_putpeer(peer);
321336
out:
322337
return rc;
323338
}
@@ -396,6 +411,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
396411
struct inet_sock *inet;
397412
__be32 daddr, saddr;
398413
u32 mark = IP4_REPLY_MARK(net, skb->mark);
414+
int type = icmp_param->data.icmph.type;
415+
int code = icmp_param->data.icmph.code;
399416

400417
if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
401418
return;
@@ -405,6 +422,10 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
405422
return;
406423
inet = inet_sk(sk);
407424

425+
/* global icmp_msgs_per_sec */
426+
if (!icmpv4_global_allow(net, type, code))
427+
goto out_unlock;
428+
408429
icmp_param->data.icmph.checksum = 0;
409430

410431
inet->tos = ip_hdr(skb)->tos;
@@ -433,8 +454,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
433454
rt = ip_route_output_key(net, &fl4);
434455
if (IS_ERR(rt))
435456
goto out_unlock;
436-
if (icmpv4_xrlim_allow(net, rt, &fl4, icmp_param->data.icmph.type,
437-
icmp_param->data.icmph.code))
457+
if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
438458
icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
439459
ip_rt_put(rt);
440460
out_unlock:
@@ -650,7 +670,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
650670

651671
sk = icmp_xmit_lock(net);
652672
if (!sk)
653-
return;
673+
goto out;
674+
675+
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
676+
if (!icmpv4_global_allow(net, type, code))
677+
goto out_unlock;
654678

655679
/*
656680
* Construct source address and options.
@@ -704,6 +728,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
704728
if (IS_ERR(rt))
705729
goto out_unlock;
706730

731+
/* peer icmp_ratelimit */
707732
if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
708733
goto ende;
709734

net/ipv6/icmp.c

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,30 @@ static bool is_ineligible(const struct sk_buff *skb)
168168
return false;
169169
}
170170

171+
static bool icmpv6_mask_allow(int type)
172+
{
173+
/* Informational messages are not limited. */
174+
if (type & ICMPV6_INFOMSG_MASK)
175+
return true;
176+
177+
/* Do not limit pmtu discovery, it would break it. */
178+
if (type == ICMPV6_PKT_TOOBIG)
179+
return true;
180+
181+
return false;
182+
}
183+
184+
static bool icmpv6_global_allow(int type)
185+
{
186+
if (icmpv6_mask_allow(type))
187+
return true;
188+
189+
if (icmp_global_allow())
190+
return true;
191+
192+
return false;
193+
}
194+
171195
/*
172196
* Check the ICMP output rate limit
173197
*/
@@ -178,12 +202,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
178202
struct dst_entry *dst;
179203
bool res = false;
180204

181-
/* Informational messages are not limited. */
182-
if (type & ICMPV6_INFOMSG_MASK)
183-
return true;
184-
185-
/* Do not limit pmtu discovery, it would break it. */
186-
if (type == ICMPV6_PKT_TOOBIG)
205+
if (icmpv6_mask_allow(type))
187206
return true;
188207

189208
/*
@@ -200,20 +219,16 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
200219
} else {
201220
struct rt6_info *rt = (struct rt6_info *)dst;
202221
int tmo = net->ipv6.sysctl.icmpv6_time;
222+
struct inet_peer *peer;
203223

204224
/* Give more bandwidth to wider prefixes. */
205225
if (rt->rt6i_dst.plen < 128)
206226
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
207227

208-
if (icmp_global_allow()) {
209-
struct inet_peer *peer;
210-
211-
peer = inet_getpeer_v6(net->ipv6.peers,
212-
&fl6->daddr, 1);
213-
res = inet_peer_xrlim_allow(peer, tmo);
214-
if (peer)
215-
inet_putpeer(peer);
216-
}
228+
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
229+
res = inet_peer_xrlim_allow(peer, tmo);
230+
if (peer)
231+
inet_putpeer(peer);
217232
}
218233
dst_release(dst);
219234
return res;
@@ -493,6 +508,10 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
493508
sk = icmpv6_xmit_lock(net);
494509
if (!sk)
495510
return;
511+
512+
if (!icmpv6_global_allow(type))
513+
goto out;
514+
496515
sk->sk_mark = mark;
497516
np = inet6_sk(sk);
498517

0 commit comments

Comments
 (0)