Skip to content

Commit 20da4ef

Browse files
committed
Merge branch 'ICMP-error-handling-for-UDP-tunnels'
Stefano Brivio says: ==================== ICMP error handling for UDP tunnels This series introduces ICMP error handling for UDP tunnels and encapsulations and related selftests. We need to handle ICMP errors to support PMTU discovery and route redirection -- this support is entirely missing right now: - patch 1/11 adds a socket lookup for UDP tunnels that use, by design, the same destination port on both endpoints -- i.e. VXLAN and GENEVE - patches 2/11 to 7/11 are specific to VxLAN and GENEVE - patches 8/11 and 9/11 add infrastructure for lookup of encapsulations where sent packets cannot be matched via receiving socket lookup, i.e. FoU and GUE - patches 10/11 and 11/11 are specific to FoU and GUE v2: changes are listed in the single patches ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 141b95d + 56fd865 commit 20da4ef

40 files changed

+1083
-166
lines changed

drivers/net/geneve.c

Lines changed: 99 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ struct geneve_dev {
7070
bool collect_md;
7171
bool use_udp6_rx_checksums;
7272
bool ttl_inherit;
73+
enum ifla_geneve_df df;
7374
};
7475

7576
struct geneve_sock {
@@ -387,6 +388,57 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
387388
return 0;
388389
}
389390

391+
/* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
392+
static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
393+
{
394+
struct genevehdr *geneveh;
395+
struct geneve_sock *gs;
396+
u8 zero_vni[3] = { 0 };
397+
u8 *vni = zero_vni;
398+
399+
if (skb->len < GENEVE_BASE_HLEN)
400+
return -EINVAL;
401+
402+
geneveh = geneve_hdr(skb);
403+
if (geneveh->ver != GENEVE_VER)
404+
return -EINVAL;
405+
406+
if (geneveh->proto_type != htons(ETH_P_TEB))
407+
return -EINVAL;
408+
409+
gs = rcu_dereference_sk_user_data(sk);
410+
if (!gs)
411+
return -ENOENT;
412+
413+
if (geneve_get_sk_family(gs) == AF_INET) {
414+
struct iphdr *iph = ip_hdr(skb);
415+
__be32 addr4 = 0;
416+
417+
if (!gs->collect_md) {
418+
vni = geneve_hdr(skb)->vni;
419+
addr4 = iph->daddr;
420+
}
421+
422+
return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
423+
}
424+
425+
#if IS_ENABLED(CONFIG_IPV6)
426+
if (geneve_get_sk_family(gs) == AF_INET6) {
427+
struct ipv6hdr *ip6h = ipv6_hdr(skb);
428+
struct in6_addr addr6 = { 0 };
429+
430+
if (!gs->collect_md) {
431+
vni = geneve_hdr(skb)->vni;
432+
addr6 = ip6h->daddr;
433+
}
434+
435+
return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
436+
}
437+
#endif
438+
439+
return -EPFNOSUPPORT;
440+
}
441+
390442
static struct socket *geneve_create_sock(struct net *net, bool ipv6,
391443
__be16 port, bool ipv6_rx_csum)
392444
{
@@ -544,6 +596,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
544596
tunnel_cfg.gro_receive = geneve_gro_receive;
545597
tunnel_cfg.gro_complete = geneve_gro_complete;
546598
tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
599+
tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
547600
tunnel_cfg.encap_destroy = NULL;
548601
setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
549602
list_add(&gs->list, &gn->sock_list);
@@ -823,8 +876,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
823876
struct rtable *rt;
824877
struct flowi4 fl4;
825878
__u8 tos, ttl;
879+
__be16 df = 0;
826880
__be16 sport;
827-
__be16 df;
828881
int err;
829882

830883
rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
@@ -838,15 +891,31 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
838891
if (geneve->collect_md) {
839892
tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
840893
ttl = key->ttl;
894+
895+
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
841896
} else {
842897
tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
843898
if (geneve->ttl_inherit)
844899
ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
845900
else
846901
ttl = key->ttl;
847902
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
903+
904+
if (geneve->df == GENEVE_DF_SET) {
905+
df = htons(IP_DF);
906+
} else if (geneve->df == GENEVE_DF_INHERIT) {
907+
struct ethhdr *eth = eth_hdr(skb);
908+
909+
if (ntohs(eth->h_proto) == ETH_P_IPV6) {
910+
df = htons(IP_DF);
911+
} else if (ntohs(eth->h_proto) == ETH_P_IP) {
912+
struct iphdr *iph = ip_hdr(skb);
913+
914+
if (iph->frag_off & htons(IP_DF))
915+
df = htons(IP_DF);
916+
}
917+
}
848918
}
849-
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
850919

851920
err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
852921
if (unlikely(err))
@@ -1093,6 +1162,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
10931162
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
10941163
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
10951164
[IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
1165+
[IFLA_GENEVE_DF] = { .type = NLA_U8 },
10961166
};
10971167

10981168
static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1128,6 +1198,16 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
11281198
}
11291199
}
11301200

1201+
if (data[IFLA_GENEVE_DF]) {
1202+
enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
1203+
1204+
if (df < 0 || df > GENEVE_DF_MAX) {
1205+
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_GENEVE_DF],
1206+
"Invalid DF attribute");
1207+
return -EINVAL;
1208+
}
1209+
}
1210+
11311211
return 0;
11321212
}
11331213

@@ -1173,7 +1253,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
11731253
struct netlink_ext_ack *extack,
11741254
const struct ip_tunnel_info *info,
11751255
bool metadata, bool ipv6_rx_csum,
1176-
bool ttl_inherit)
1256+
bool ttl_inherit, enum ifla_geneve_df df)
11771257
{
11781258
struct geneve_net *gn = net_generic(net, geneve_net_id);
11791259
struct geneve_dev *t, *geneve = netdev_priv(dev);
@@ -1223,6 +1303,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
12231303
geneve->collect_md = metadata;
12241304
geneve->use_udp6_rx_checksums = ipv6_rx_csum;
12251305
geneve->ttl_inherit = ttl_inherit;
1306+
geneve->df = df;
12261307

12271308
err = register_netdevice(dev);
12281309
if (err)
@@ -1242,7 +1323,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
12421323
struct netlink_ext_ack *extack,
12431324
struct ip_tunnel_info *info, bool *metadata,
12441325
bool *use_udp6_rx_checksums, bool *ttl_inherit,
1245-
bool changelink)
1326+
enum ifla_geneve_df *df, bool changelink)
12461327
{
12471328
int attrtype;
12481329

@@ -1330,6 +1411,9 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
13301411
if (data[IFLA_GENEVE_TOS])
13311412
info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
13321413

1414+
if (data[IFLA_GENEVE_DF])
1415+
*df = nla_get_u8(data[IFLA_GENEVE_DF]);
1416+
13331417
if (data[IFLA_GENEVE_LABEL]) {
13341418
info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
13351419
IPV6_FLOWLABEL_MASK;
@@ -1448,6 +1532,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
14481532
struct nlattr *tb[], struct nlattr *data[],
14491533
struct netlink_ext_ack *extack)
14501534
{
1535+
enum ifla_geneve_df df = GENEVE_DF_UNSET;
14511536
bool use_udp6_rx_checksums = false;
14521537
struct ip_tunnel_info info;
14531538
bool ttl_inherit = false;
@@ -1456,12 +1541,12 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
14561541

14571542
init_tnl_info(&info, GENEVE_UDP_PORT);
14581543
err = geneve_nl2info(tb, data, extack, &info, &metadata,
1459-
&use_udp6_rx_checksums, &ttl_inherit, false);
1544+
&use_udp6_rx_checksums, &ttl_inherit, &df, false);
14601545
if (err)
14611546
return err;
14621547

14631548
err = geneve_configure(net, dev, extack, &info, metadata,
1464-
use_udp6_rx_checksums, ttl_inherit);
1549+
use_udp6_rx_checksums, ttl_inherit, df);
14651550
if (err)
14661551
return err;
14671552

@@ -1524,6 +1609,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
15241609
struct ip_tunnel_info info;
15251610
bool metadata;
15261611
bool use_udp6_rx_checksums;
1612+
enum ifla_geneve_df df;
15271613
bool ttl_inherit;
15281614
int err;
15291615

@@ -1539,7 +1625,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
15391625
use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
15401626
ttl_inherit = geneve->ttl_inherit;
15411627
err = geneve_nl2info(tb, data, extack, &info, &metadata,
1542-
&use_udp6_rx_checksums, &ttl_inherit, true);
1628+
&use_udp6_rx_checksums, &ttl_inherit, &df, true);
15431629
if (err)
15441630
return err;
15451631

@@ -1572,6 +1658,7 @@ static size_t geneve_get_size(const struct net_device *dev)
15721658
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
15731659
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
15741660
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
1661+
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */
15751662
nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
15761663
nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
15771664
nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
@@ -1620,6 +1707,9 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
16201707
nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
16211708
goto nla_put_failure;
16221709

1710+
if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->df))
1711+
goto nla_put_failure;
1712+
16231713
if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
16241714
goto nla_put_failure;
16251715

@@ -1671,7 +1761,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
16711761
return dev;
16721762

16731763
init_tnl_info(&info, dst_port);
1674-
err = geneve_configure(net, dev, NULL, &info, true, true, false);
1764+
err = geneve_configure(net, dev, NULL, &info,
1765+
true, true, false, GENEVE_DF_UNSET);
16751766
if (err) {
16761767
free_netdev(dev);
16771768
return ERR_PTR(err);

drivers/net/vxlan.c

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1552,6 +1552,34 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
15521552
return 0;
15531553
}
15541554

1555+
/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
1556+
static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
1557+
{
1558+
struct vxlan_dev *vxlan;
1559+
struct vxlan_sock *vs;
1560+
struct vxlanhdr *hdr;
1561+
__be32 vni;
1562+
1563+
if (skb->len < VXLAN_HLEN)
1564+
return -EINVAL;
1565+
1566+
hdr = vxlan_hdr(skb);
1567+
1568+
if (!(hdr->vx_flags & VXLAN_HF_VNI))
1569+
return -EINVAL;
1570+
1571+
vs = rcu_dereference_sk_user_data(sk);
1572+
if (!vs)
1573+
return -ENOENT;
1574+
1575+
vni = vxlan_vni(hdr->vx_vni);
1576+
vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
1577+
if (!vxlan)
1578+
return -ENOENT;
1579+
1580+
return 0;
1581+
}
1582+
15551583
static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
15561584
{
15571585
struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -2250,13 +2278,24 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
22502278
goto tx_error;
22512279
}
22522280

2253-
/* Bypass encapsulation if the destination is local */
22542281
if (!info) {
2282+
/* Bypass encapsulation if the destination is local */
22552283
err = encap_bypass_if_local(skb, dev, vxlan, dst,
22562284
dst_port, ifindex, vni,
22572285
&rt->dst, rt->rt_flags);
22582286
if (err)
22592287
goto out_unlock;
2288+
2289+
if (vxlan->cfg.df == VXLAN_DF_SET) {
2290+
df = htons(IP_DF);
2291+
} else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
2292+
struct ethhdr *eth = eth_hdr(skb);
2293+
2294+
if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
2295+
(ntohs(eth->h_proto) == ETH_P_IP &&
2296+
old_iph->frag_off & htons(IP_DF)))
2297+
df = htons(IP_DF);
2298+
}
22602299
} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
22612300
df = htons(IP_DF);
22622301
}
@@ -2809,6 +2848,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
28092848
[IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
28102849
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
28112850
[IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
2851+
[IFLA_VXLAN_DF] = { .type = NLA_U8 },
28122852
};
28132853

28142854
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -2865,6 +2905,16 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
28652905
}
28662906
}
28672907

2908+
if (data[IFLA_VXLAN_DF]) {
2909+
enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
2910+
2911+
if (df < 0 || df > VXLAN_DF_MAX) {
2912+
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_DF],
2913+
"Invalid DF attribute");
2914+
return -EINVAL;
2915+
}
2916+
}
2917+
28682918
return 0;
28692919
}
28702920

@@ -2948,6 +2998,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
29482998
tunnel_cfg.sk_user_data = vs;
29492999
tunnel_cfg.encap_type = 1;
29503000
tunnel_cfg.encap_rcv = vxlan_rcv;
3001+
tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
29513002
tunnel_cfg.encap_destroy = NULL;
29523003
tunnel_cfg.gro_receive = vxlan_gro_receive;
29533004
tunnel_cfg.gro_complete = vxlan_gro_complete;
@@ -3509,6 +3560,9 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
35093560
conf->mtu = nla_get_u32(tb[IFLA_MTU]);
35103561
}
35113562

3563+
if (data[IFLA_VXLAN_DF])
3564+
conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
3565+
35123566
return 0;
35133567
}
35143568

@@ -3601,6 +3655,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
36013655
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
36023656
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */
36033657
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
3658+
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
36043659
nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
36053660
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
36063661
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
@@ -3667,6 +3722,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
36673722
nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
36683723
!!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
36693724
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
3725+
nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
36703726
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
36713727
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
36723728
!!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||

include/linux/udp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ struct udp_sock {
7777
* For encapsulation sockets.
7878
*/
7979
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
80+
int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
8081
void (*encap_destroy)(struct sock *sk);
8182

8283
/* GRO functions for UDP socket */

include/net/icmp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ struct net;
4141

4242
void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
4343
int icmp_rcv(struct sk_buff *skb);
44-
void icmp_err(struct sk_buff *skb, u32 info);
44+
int icmp_err(struct sk_buff *skb, u32 info);
4545
int icmp_init(void);
4646
void icmp_out_count(struct net *net, unsigned char type);
4747

include/net/ip6_tunnel.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ struct ip6_tnl_encap_ops {
6969
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
7070
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
7171
u8 *protocol, struct flowi6 *fl6);
72+
int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
73+
u8 type, u8 code, int offset, __be32 info);
7274
};
7375

7476
#ifdef CONFIG_INET

include/net/ip_tunnels.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ struct ip_tunnel_encap_ops {
311311
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
312312
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
313313
u8 *protocol, struct flowi4 *fl4);
314+
int (*err_handler)(struct sk_buff *skb, u32 info);
314315
};
315316

316317
#define MAX_IPTUN_ENCAP_OPS 8

0 commit comments

Comments
 (0)