Skip to content

Commit bc044e8

Browse files
Paolo Abenidavem330
authored andcommitted
udp: perform source validation for mcast early demux
The UDP early demux can leverate the rx dst cache even for multicast unconnected sockets. In such scenario the ipv4 source address is validated only on the first packet in the given flow. After that, when we fetch the dst entry from the socket rx cache, we stop enforcing the rp_filter and we even start accepting any kind of martian addresses. Disabling the dst cache for unconnected multicast socket will cause large performace regression, nearly reducing by half the max ingress tput. Instead we factor out a route helper to completely validate an skb source address for multicast packets and we call it from the UDP early demux for mcast packets landing on unconnected sockets, after successful fetching the related cached dst entry. This still gives a measurable, but limited performance regression: rp_filter = 0 rp_filter = 1 edmux disabled: 1182 Kpps 1127 Kpps edmux before: 2238 Kpps 2238 Kpps edmux after: 2037 Kpps 2019 Kpps The above figures are on top of current net tree. Applying the net-next commit 6e617de ("net: avoid a full fib lookup when rp_filter is disabled.") the delta with rp_filter == 0 will decrease even more. Fixes: 421b388 ("udp: ipv4: Add udp early demux") Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 7487449 commit bc044e8

File tree

3 files changed

+41
-22
lines changed

3 files changed

+41
-22
lines changed

include/net/route.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
175175
fl4->fl4_gre_key = gre_key;
176176
return ip_route_output_key(net, fl4);
177177
}
178-
178+
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
179+
u8 tos, struct net_device *dev,
180+
struct in_device *in_dev, u32 *itag);
179181
int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
180182
u8 tos, struct net_device *devin);
181183
int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,

net/ipv4/route.c

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
15201520
EXPORT_SYMBOL(rt_dst_alloc);
15211521

15221522
/* called in rcu_read_lock() section */
1523-
static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1524-
u8 tos, struct net_device *dev, int our)
1523+
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1524+
u8 tos, struct net_device *dev,
1525+
struct in_device *in_dev, u32 *itag)
15251526
{
1526-
struct rtable *rth;
1527-
struct in_device *in_dev = __in_dev_get_rcu(dev);
1528-
unsigned int flags = RTCF_MULTICAST;
1529-
u32 itag = 0;
15301527
int err;
15311528

15321529
/* Primary sanity checks. */
1533-
15341530
if (!in_dev)
15351531
return -EINVAL;
15361532

15371533
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
15381534
skb->protocol != htons(ETH_P_IP))
1539-
goto e_inval;
1535+
return -EINVAL;
15401536

15411537
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1542-
goto e_inval;
1538+
return -EINVAL;
15431539

15441540
if (ipv4_is_zeronet(saddr)) {
15451541
if (!ipv4_is_local_multicast(daddr))
1546-
goto e_inval;
1542+
return -EINVAL;
15471543
} else {
15481544
err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1549-
in_dev, &itag);
1545+
in_dev, itag);
15501546
if (err < 0)
1551-
goto e_err;
1547+
return err;
15521548
}
1549+
return 0;
1550+
}
1551+
1552+
/* called in rcu_read_lock() section */
1553+
static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1554+
u8 tos, struct net_device *dev, int our)
1555+
{
1556+
struct in_device *in_dev = __in_dev_get_rcu(dev);
1557+
unsigned int flags = RTCF_MULTICAST;
1558+
struct rtable *rth;
1559+
u32 itag = 0;
1560+
int err;
1561+
1562+
err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1563+
if (err)
1564+
return err;
1565+
15531566
if (our)
15541567
flags |= RTCF_LOCAL;
15551568

15561569
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
15571570
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
15581571
if (!rth)
1559-
goto e_nobufs;
1572+
return -ENOBUFS;
15601573

15611574
#ifdef CONFIG_IP_ROUTE_CLASSID
15621575
rth->dst.tclassid = itag;
@@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
15721585

15731586
skb_dst_set(skb, &rth->dst);
15741587
return 0;
1575-
1576-
e_nobufs:
1577-
return -ENOBUFS;
1578-
e_inval:
1579-
return -EINVAL;
1580-
e_err:
1581-
return err;
15821588
}
15831589

15841590

net/ipv4/udp.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2224,6 +2224,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
22242224
int udp_v4_early_demux(struct sk_buff *skb)
22252225
{
22262226
struct net *net = dev_net(skb->dev);
2227+
struct in_device *in_dev = NULL;
22272228
const struct iphdr *iph;
22282229
const struct udphdr *uh;
22292230
struct sock *sk = NULL;
@@ -2241,7 +2242,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
22412242

22422243
if (skb->pkt_type == PACKET_BROADCAST ||
22432244
skb->pkt_type == PACKET_MULTICAST) {
2244-
struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
2245+
in_dev = __in_dev_get_rcu(skb->dev);
22452246

22462247
if (!in_dev)
22472248
return 0;
@@ -2272,11 +2273,21 @@ int udp_v4_early_demux(struct sk_buff *skb)
22722273
if (dst)
22732274
dst = dst_check(dst, 0);
22742275
if (dst) {
2276+
u32 itag = 0;
2277+
22752278
/* set noref for now.
22762279
* any place which wants to hold dst has to call
22772280
* dst_hold_safe()
22782281
*/
22792282
skb_dst_set_noref(skb, dst);
2283+
2284+
/* for unconnected multicast sockets we need to validate
2285+
* the source on each packet
2286+
*/
2287+
if (!inet_sk(sk)->inet_daddr && in_dev)
2288+
return ip_mc_validate_source(skb, iph->daddr,
2289+
iph->saddr, iph->tos,
2290+
skb->dev, in_dev, &itag);
22802291
}
22812292
return 0;
22822293
}

0 commit comments

Comments
 (0)