Skip to content

Commit 4cdf507

Browse files
edumazetdavem330
authored andcommitted
icmp: add a global rate limitation
Current ICMP rate limiting uses inetpeer cache, which is an RBL tree protected by a lock, meaning that hosts can be stuck hard if all cpus want to check ICMP limits. When say a DNS or NTP server process is restarted, inetpeer tree grows quick and machine comes to its knees. iptables can not help because the bottleneck happens before ICMP messages are even cooked and sent. This patch adds a new global limitation, using a token bucket filter, controlled by two new sysctl : icmp_msgs_per_sec - INTEGER Limit maximal number of ICMP packets sent per second from this host. Only messages whose type matches icmp_ratemask are controlled by this limit. Default: 1000 icmp_msgs_burst - INTEGER icmp_msgs_per_sec controls number of ICMP packets sent per second, while icmp_msgs_burst controls the burst size of these packets. Default: 50 Note that if we really want to send millions of ICMP messages per second, we might extend idea and infra added in commit 04ca697 ("ip: make IP identifiers less predictable") : add a token bucket in the ip_idents hash and no longer rely on inetpeer. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent e8b56d5 commit 4cdf507

File tree

5 files changed

+105
-12
lines changed

5 files changed

+105
-12
lines changed

Documentation/networking/ip-sysctl.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,8 +769,21 @@ icmp_ratelimit - INTEGER
769769
icmp_ratemask (see below) to specific targets.
770770
0 to disable any limiting,
771771
otherwise the minimal space between responses in milliseconds.
772+
Note that another sysctl, icmp_msgs_per_sec limits the number
773+
of ICMP packets sent on all targets.
772774
Default: 1000
773775

776+
icmp_msgs_per_sec - INTEGER
777+
Limit maximal number of ICMP packets sent per second from this host.
778+
Only messages whose type matches icmp_ratemask (see below) are
779+
controlled by this limit.
780+
Default: 1000
781+
782+
icmp_msgs_burst - INTEGER
783+
icmp_msgs_per_sec controls number of ICMP packets sent per second,
784+
while icmp_msgs_burst controls the burst size of these packets.
785+
Default: 50
786+
774787
icmp_ratemask - INTEGER
775788
Mask made of ICMP types for which rates are being limited.
776789
Significant bits: IHGFEDCBA9876543210

include/net/ip.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,10 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
548548
void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
549549
u32 info);
550550

551+
bool icmp_global_allow(void);
552+
extern int sysctl_icmp_msgs_per_sec;
553+
extern int sysctl_icmp_msgs_burst;
554+
551555
#ifdef CONFIG_PROC_FS
552556
int ip_misc_proc_init(void);
553557
#endif

net/ipv4/icmp.c

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -231,12 +231,62 @@ static inline void icmp_xmit_unlock(struct sock *sk)
231231
spin_unlock_bh(&sk->sk_lock.slock);
232232
}
233233

234+
int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
235+
int sysctl_icmp_msgs_burst __read_mostly = 50;
236+
237+
static struct {
238+
spinlock_t lock;
239+
u32 credit;
240+
u32 stamp;
241+
} icmp_global = {
242+
.lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
243+
};
244+
245+
/**
246+
* icmp_global_allow - Are we allowed to send one more ICMP message ?
247+
*
248+
* Uses a token bucket to limit our ICMP messages to sysctl_icmp_msgs_per_sec.
249+
* Returns false if we reached the limit and can not send another packet.
250+
* Note: called with BH disabled
251+
*/
252+
bool icmp_global_allow(void)
253+
{
254+
u32 credit, delta, incr = 0, now = (u32)jiffies;
255+
bool rc = false;
256+
257+
/* Check if token bucket is empty and cannot be refilled
258+
* without taking the spinlock.
259+
*/
260+
if (!icmp_global.credit) {
261+
delta = min_t(u32, now - icmp_global.stamp, HZ);
262+
if (delta < HZ / 50)
263+
return false;
264+
}
265+
266+
spin_lock(&icmp_global.lock);
267+
delta = min_t(u32, now - icmp_global.stamp, HZ);
268+
if (delta >= HZ / 50) {
269+
incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
270+
if (incr)
271+
icmp_global.stamp = now;
272+
}
273+
credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
274+
if (credit) {
275+
credit--;
276+
rc = true;
277+
}
278+
icmp_global.credit = credit;
279+
spin_unlock(&icmp_global.lock);
280+
return rc;
281+
}
282+
EXPORT_SYMBOL(icmp_global_allow);
283+
234284
/*
235285
* Send an ICMP frame.
236286
*/
237287

238-
static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
239-
struct flowi4 *fl4, int type, int code)
288+
static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
289+
struct flowi4 *fl4, int type, int code)
240290
{
241291
struct dst_entry *dst = &rt->dst;
242292
bool rc = true;
@@ -253,8 +303,14 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
253303
goto out;
254304

255305
/* Limit if icmp type is enabled in ratemask. */
256-
if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
257-
struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
306+
if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
307+
goto out;
308+
309+
rc = false;
310+
if (icmp_global_allow()) {
311+
struct inet_peer *peer;
312+
313+
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
258314
rc = inet_peer_xrlim_allow(peer,
259315
net->ipv4.sysctl_icmp_ratelimit);
260316
if (peer)

net/ipv4/sysctl_net_ipv4.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,22 @@ static struct ctl_table ipv4_table[] = {
730730
.extra1 = &zero,
731731
.extra2 = &one,
732732
},
733+
{
734+
.procname = "icmp_msgs_per_sec",
735+
.data = &sysctl_icmp_msgs_per_sec,
736+
.maxlen = sizeof(int),
737+
.mode = 0644,
738+
.proc_handler = proc_dointvec_minmax,
739+
.extra1 = &zero,
740+
},
741+
{
742+
.procname = "icmp_msgs_burst",
743+
.data = &sysctl_icmp_msgs_burst,
744+
.maxlen = sizeof(int),
745+
.mode = 0644,
746+
.proc_handler = proc_dointvec_minmax,
747+
.extra1 = &zero,
748+
},
733749
{
734750
.procname = "udp_mem",
735751
.data = &sysctl_udp_mem,

net/ipv6/icmp.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,11 @@ static bool is_ineligible(const struct sk_buff *skb)
170170
/*
171171
* Check the ICMP output rate limit
172172
*/
173-
static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
174-
struct flowi6 *fl6)
173+
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
174+
struct flowi6 *fl6)
175175
{
176-
struct dst_entry *dst;
177176
struct net *net = sock_net(sk);
177+
struct dst_entry *dst;
178178
bool res = false;
179179

180180
/* Informational messages are not limited. */
@@ -199,16 +199,20 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
199199
} else {
200200
struct rt6_info *rt = (struct rt6_info *)dst;
201201
int tmo = net->ipv6.sysctl.icmpv6_time;
202-
struct inet_peer *peer;
203202

204203
/* Give more bandwidth to wider prefixes. */
205204
if (rt->rt6i_dst.plen < 128)
206205
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
207206

208-
peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
209-
res = inet_peer_xrlim_allow(peer, tmo);
210-
if (peer)
211-
inet_putpeer(peer);
207+
if (icmp_global_allow()) {
208+
struct inet_peer *peer;
209+
210+
peer = inet_getpeer_v6(net->ipv6.peers,
211+
&rt->rt6i_dst.addr, 1);
212+
res = inet_peer_xrlim_allow(peer, tmo);
213+
if (peer)
214+
inet_putpeer(peer);
215+
}
212216
}
213217
dst_release(dst);
214218
return res;

0 commit comments

Comments
 (0)