Skip to content

Commit 230583c

Browse files
committed
Merge branch 'udp-fix-early-demux-for-mcast-packets'
Paolo Abeni says: ==================== udp: fix early demux for mcast packets Currently the early demux callbacks do not perform source address validation. This is not an issue for TCP or UDP unicast, where the early demux is only allowed for connected sockets and the source address is validated for the first packet and never change. The UDP protocol currently allows early demux also for unconnected multicast sockets, and we are not currently doing any validation for them, after that the first packet lands on the socket: beyond ignoring the rp_filter - if enabled - any kind of martian sources are also allowed. This series addresses the issue allowing the early demux callback to return an error code, and performing the proper checks for unconnected UDP multicast sockets before leveraging the rx dst cache. Alternatively we could disable the early demux for unconnected mcast sockets, but that would cause relevant performance regression - around 50% - while with this series, with full rp_filter in place, we keep the regression to a more moderate level. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents d41bb33 + bc044e8 commit 230583c

File tree

8 files changed

+71
-45
lines changed

8 files changed

+71
-45
lines changed

include/net/protocol.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@
3939

4040
/* This is used to register protocols. */
4141
struct net_protocol {
42-
void (*early_demux)(struct sk_buff *skb);
43-
void (*early_demux_handler)(struct sk_buff *skb);
42+
int (*early_demux)(struct sk_buff *skb);
43+
int (*early_demux_handler)(struct sk_buff *skb);
4444
int (*handler)(struct sk_buff *skb);
4545
void (*err_handler)(struct sk_buff *skb, u32 info);
4646
unsigned int no_policy:1,

include/net/route.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
175175
fl4->fl4_gre_key = gre_key;
176176
return ip_route_output_key(net, fl4);
177177
}
178-
178+
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
179+
u8 tos, struct net_device *dev,
180+
struct in_device *in_dev, u32 *itag);
179181
int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
180182
u8 tos, struct net_device *devin);
181183
int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,

include/net/tcp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ void tcp_v4_err(struct sk_buff *skb, u32);
345345

346346
void tcp_shutdown(struct sock *sk, int how);
347347

348-
void tcp_v4_early_demux(struct sk_buff *skb);
348+
int tcp_v4_early_demux(struct sk_buff *skb);
349349
int tcp_v4_rcv(struct sk_buff *skb);
350350

351351
int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);

include/net/udp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
259259
return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
260260
}
261261

262-
void udp_v4_early_demux(struct sk_buff *skb);
262+
int udp_v4_early_demux(struct sk_buff *skb);
263263
bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
264264
int udp_get_port(struct sock *sk, unsigned short snum,
265265
int (*saddr_cmp)(const struct sock *,

net/ipv4/ip_input.c

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -311,9 +311,10 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
311311
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
312312
{
313313
const struct iphdr *iph = ip_hdr(skb);
314-
struct rtable *rt;
314+
int (*edemux)(struct sk_buff *skb);
315315
struct net_device *dev = skb->dev;
316-
void (*edemux)(struct sk_buff *skb);
316+
struct rtable *rt;
317+
int err;
317318

318319
/* if ingress device is enslaved to an L3 master device pass the
319320
* skb to its handler for processing
@@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
331332

332333
ipprot = rcu_dereference(inet_protos[protocol]);
333334
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
334-
edemux(skb);
335+
err = edemux(skb);
336+
if (unlikely(err))
337+
goto drop_error;
335338
/* must reload iph, skb->head might have changed */
336339
iph = ip_hdr(skb);
337340
}
@@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
342345
* how the packet travels inside Linux networking.
343346
*/
344347
if (!skb_valid_dst(skb)) {
345-
int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
346-
iph->tos, dev);
347-
if (unlikely(err)) {
348-
if (err == -EXDEV)
349-
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
350-
goto drop;
351-
}
348+
err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
349+
iph->tos, dev);
350+
if (unlikely(err))
351+
goto drop_error;
352352
}
353353

354354
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
399399
drop:
400400
kfree_skb(skb);
401401
return NET_RX_DROP;
402+
403+
drop_error:
404+
if (err == -EXDEV)
405+
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
406+
goto drop;
402407
}
403408

404409
/*

net/ipv4/route.c

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
15201520
EXPORT_SYMBOL(rt_dst_alloc);
15211521

15221522
/* called in rcu_read_lock() section */
1523-
static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1524-
u8 tos, struct net_device *dev, int our)
1523+
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1524+
u8 tos, struct net_device *dev,
1525+
struct in_device *in_dev, u32 *itag)
15251526
{
1526-
struct rtable *rth;
1527-
struct in_device *in_dev = __in_dev_get_rcu(dev);
1528-
unsigned int flags = RTCF_MULTICAST;
1529-
u32 itag = 0;
15301527
int err;
15311528

15321529
/* Primary sanity checks. */
1533-
15341530
if (!in_dev)
15351531
return -EINVAL;
15361532

15371533
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
15381534
skb->protocol != htons(ETH_P_IP))
1539-
goto e_inval;
1535+
return -EINVAL;
15401536

15411537
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1542-
goto e_inval;
1538+
return -EINVAL;
15431539

15441540
if (ipv4_is_zeronet(saddr)) {
15451541
if (!ipv4_is_local_multicast(daddr))
1546-
goto e_inval;
1542+
return -EINVAL;
15471543
} else {
15481544
err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1549-
in_dev, &itag);
1545+
in_dev, itag);
15501546
if (err < 0)
1551-
goto e_err;
1547+
return err;
15521548
}
1549+
return 0;
1550+
}
1551+
1552+
/* called in rcu_read_lock() section */
1553+
static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1554+
u8 tos, struct net_device *dev, int our)
1555+
{
1556+
struct in_device *in_dev = __in_dev_get_rcu(dev);
1557+
unsigned int flags = RTCF_MULTICAST;
1558+
struct rtable *rth;
1559+
u32 itag = 0;
1560+
int err;
1561+
1562+
err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1563+
if (err)
1564+
return err;
1565+
15531566
if (our)
15541567
flags |= RTCF_LOCAL;
15551568

15561569
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
15571570
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
15581571
if (!rth)
1559-
goto e_nobufs;
1572+
return -ENOBUFS;
15601573

15611574
#ifdef CONFIG_IP_ROUTE_CLASSID
15621575
rth->dst.tclassid = itag;
@@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
15721585

15731586
skb_dst_set(skb, &rth->dst);
15741587
return 0;
1575-
1576-
e_nobufs:
1577-
return -ENOBUFS;
1578-
e_inval:
1579-
return -EINVAL;
1580-
e_err:
1581-
return err;
15821588
}
15831589

15841590

net/ipv4/tcp_ipv4.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1503,23 +1503,23 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
15031503
}
15041504
EXPORT_SYMBOL(tcp_v4_do_rcv);
15051505

1506-
void tcp_v4_early_demux(struct sk_buff *skb)
1506+
int tcp_v4_early_demux(struct sk_buff *skb)
15071507
{
15081508
const struct iphdr *iph;
15091509
const struct tcphdr *th;
15101510
struct sock *sk;
15111511

15121512
if (skb->pkt_type != PACKET_HOST)
1513-
return;
1513+
return 0;
15141514

15151515
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1516-
return;
1516+
return 0;
15171517

15181518
iph = ip_hdr(skb);
15191519
th = tcp_hdr(skb);
15201520

15211521
if (th->doff < sizeof(struct tcphdr) / 4)
1522-
return;
1522+
return 0;
15231523

15241524
sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
15251525
iph->saddr, th->source,
@@ -1538,6 +1538,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
15381538
skb_dst_set_noref(skb, dst);
15391539
}
15401540
}
1541+
return 0;
15411542
}
15421543

15431544
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)

net/ipv4/udp.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2221,9 +2221,10 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
22212221
return NULL;
22222222
}
22232223

2224-
void udp_v4_early_demux(struct sk_buff *skb)
2224+
int udp_v4_early_demux(struct sk_buff *skb)
22252225
{
22262226
struct net *net = dev_net(skb->dev);
2227+
struct in_device *in_dev = NULL;
22272228
const struct iphdr *iph;
22282229
const struct udphdr *uh;
22292230
struct sock *sk = NULL;
@@ -2234,24 +2235,24 @@ void udp_v4_early_demux(struct sk_buff *skb)
22342235

22352236
/* validate the packet */
22362237
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
2237-
return;
2238+
return 0;
22382239

22392240
iph = ip_hdr(skb);
22402241
uh = udp_hdr(skb);
22412242

22422243
if (skb->pkt_type == PACKET_BROADCAST ||
22432244
skb->pkt_type == PACKET_MULTICAST) {
2244-
struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
2245+
in_dev = __in_dev_get_rcu(skb->dev);
22452246

22462247
if (!in_dev)
2247-
return;
2248+
return 0;
22482249

22492250
/* we are supposed to accept bcast packets */
22502251
if (skb->pkt_type == PACKET_MULTICAST) {
22512252
ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
22522253
iph->protocol);
22532254
if (!ours)
2254-
return;
2255+
return 0;
22552256
}
22562257

22572258
sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
@@ -2263,7 +2264,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
22632264
}
22642265

22652266
if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
2266-
return;
2267+
return 0;
22672268

22682269
skb->sk = sk;
22692270
skb->destructor = sock_efree;
@@ -2272,12 +2273,23 @@ void udp_v4_early_demux(struct sk_buff *skb)
22722273
if (dst)
22732274
dst = dst_check(dst, 0);
22742275
if (dst) {
2276+
u32 itag = 0;
2277+
22752278
/* set noref for now.
22762279
* any place which wants to hold dst has to call
22772280
* dst_hold_safe()
22782281
*/
22792282
skb_dst_set_noref(skb, dst);
2283+
2284+
/* for unconnected multicast sockets we need to validate
2285+
* the source on each packet
2286+
*/
2287+
if (!inet_sk(sk)->inet_daddr && in_dev)
2288+
return ip_mc_validate_source(skb, iph->daddr,
2289+
iph->saddr, iph->tos,
2290+
skb->dev, in_dev, &itag);
22802291
}
2292+
return 0;
22812293
}
22822294

22832295
int udp_rcv(struct sk_buff *skb)

0 commit comments

Comments
 (0)