Skip to content

Commit b1be00a

Browse files
Jiri Bencdavem330
authored andcommitted
vxlan: support both IPv4 and IPv6 sockets in a single vxlan device
For metadata based vxlan interface, open both IPv4 and IPv6 socket. This is much more user friendly: it's not necessary to create two vxlan interfaces and pay attention to using the right one in routing rules. Signed-off-by: Jiri Benc <jbenc@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 205f356 commit b1be00a

File tree

3 files changed

+103
-42
lines changed

3 files changed

+103
-42
lines changed

drivers/net/vxlan.c

Lines changed: 90 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -993,19 +993,30 @@ static bool vxlan_snoop(struct net_device *dev,
993993
static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
994994
{
995995
struct vxlan_dev *vxlan;
996+
unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
996997

997998
/* The vxlan_sock is only used by dev, leaving group has
998999
* no effect on other vxlan devices.
9991000
*/
1000-
if (atomic_read(&dev->vn_sock->refcnt) == 1)
1001+
if (family == AF_INET && dev->vn4_sock &&
1002+
atomic_read(&dev->vn4_sock->refcnt) == 1)
10011003
return false;
1004+
#if IS_ENABLED(CONFIG_IPV6)
1005+
if (family == AF_INET6 && dev->vn6_sock &&
1006+
atomic_read(&dev->vn6_sock->refcnt) == 1)
1007+
return false;
1008+
#endif
10021009

10031010
list_for_each_entry(vxlan, &vn->vxlan_list, next) {
10041011
if (!netif_running(vxlan->dev) || vxlan == dev)
10051012
continue;
10061013

1007-
if (vxlan->vn_sock != dev->vn_sock)
1014+
if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock)
10081015
continue;
1016+
#if IS_ENABLED(CONFIG_IPV6)
1017+
if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock)
1018+
continue;
1019+
#endif
10091020

10101021
if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
10111022
&dev->default_dst.remote_ip))
@@ -1021,16 +1032,16 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
10211032
return false;
10221033
}
10231034

1024-
static void vxlan_sock_release(struct vxlan_dev *vxlan)
1035+
static void __vxlan_sock_release(struct vxlan_sock *vs)
10251036
{
1026-
struct vxlan_sock *vs = vxlan->vn_sock;
1027-
struct sock *sk = vs->sock->sk;
1028-
struct net *net = sock_net(sk);
1029-
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1037+
struct vxlan_net *vn;
10301038

1039+
if (!vs)
1040+
return;
10311041
if (!atomic_dec_and_test(&vs->refcnt))
10321042
return;
10331043

1044+
vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
10341045
spin_lock(&vn->sock_lock);
10351046
hlist_del_rcu(&vs->hlist);
10361047
vxlan_notify_del_rx_port(vs);
@@ -1039,60 +1050,74 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)
10391050
queue_work(vxlan_wq, &vs->del_work);
10401051
}
10411052

1053+
static void vxlan_sock_release(struct vxlan_dev *vxlan)
1054+
{
1055+
__vxlan_sock_release(vxlan->vn4_sock);
1056+
#if IS_ENABLED(CONFIG_IPV6)
1057+
__vxlan_sock_release(vxlan->vn6_sock);
1058+
#endif
1059+
}
1060+
10421061
/* Update multicast group membership when first VNI on
10431062
* multicast address is brought up
10441063
*/
10451064
static int vxlan_igmp_join(struct vxlan_dev *vxlan)
10461065
{
1047-
struct vxlan_sock *vs = vxlan->vn_sock;
1048-
struct sock *sk = vs->sock->sk;
1066+
struct sock *sk;
10491067
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
10501068
int ifindex = vxlan->default_dst.remote_ifindex;
10511069
int ret = -EINVAL;
10521070

1053-
lock_sock(sk);
10541071
if (ip->sa.sa_family == AF_INET) {
10551072
struct ip_mreqn mreq = {
10561073
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
10571074
.imr_ifindex = ifindex,
10581075
};
10591076

1077+
sk = vxlan->vn4_sock->sock->sk;
1078+
lock_sock(sk);
10601079
ret = ip_mc_join_group(sk, &mreq);
1080+
release_sock(sk);
10611081
#if IS_ENABLED(CONFIG_IPV6)
10621082
} else {
1083+
sk = vxlan->vn6_sock->sock->sk;
1084+
lock_sock(sk);
10631085
ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
10641086
&ip->sin6.sin6_addr);
1087+
release_sock(sk);
10651088
#endif
10661089
}
1067-
release_sock(sk);
10681090

10691091
return ret;
10701092
}
10711093

10721094
/* Inverse of vxlan_igmp_join when last VNI is brought down */
10731095
static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
10741096
{
1075-
struct vxlan_sock *vs = vxlan->vn_sock;
1076-
struct sock *sk = vs->sock->sk;
1097+
struct sock *sk;
10771098
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
10781099
int ifindex = vxlan->default_dst.remote_ifindex;
10791100
int ret = -EINVAL;
10801101

1081-
lock_sock(sk);
10821102
if (ip->sa.sa_family == AF_INET) {
10831103
struct ip_mreqn mreq = {
10841104
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
10851105
.imr_ifindex = ifindex,
10861106
};
10871107

1108+
sk = vxlan->vn4_sock->sock->sk;
1109+
lock_sock(sk);
10881110
ret = ip_mc_leave_group(sk, &mreq);
1111+
release_sock(sk);
10891112
#if IS_ENABLED(CONFIG_IPV6)
10901113
} else {
1114+
sk = vxlan->vn6_sock->sock->sk;
1115+
lock_sock(sk);
10911116
ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
10921117
&ip->sin6.sin6_addr);
1118+
release_sock(sk);
10931119
#endif
10941120
}
1095-
release_sock(sk);
10961121

10971122
return ret;
10981123
}
@@ -1873,8 +1898,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
18731898
{
18741899
struct ip_tunnel_info *info;
18751900
struct vxlan_dev *vxlan = netdev_priv(dev);
1876-
struct sock *sk = vxlan->vn_sock->sock->sk;
1877-
unsigned short family = vxlan_get_sk_family(vxlan->vn_sock);
1901+
struct sock *sk;
18781902
struct rtable *rt = NULL;
18791903
const struct iphdr *old_iph;
18801904
struct flowi4 fl4;
@@ -1901,13 +1925,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
19011925
dev->name);
19021926
goto drop;
19031927
}
1904-
if (family != ip_tunnel_info_af(info))
1905-
goto drop;
1906-
19071928
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
19081929
vni = be64_to_cpu(info->key.tun_id);
1909-
remote_ip.sa.sa_family = family;
1910-
if (family == AF_INET)
1930+
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
1931+
if (remote_ip.sa.sa_family == AF_INET)
19111932
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
19121933
else
19131934
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
@@ -1952,6 +1973,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
19521973
}
19531974

19541975
if (dst->sa.sa_family == AF_INET) {
1976+
if (!vxlan->vn4_sock)
1977+
goto drop;
1978+
sk = vxlan->vn4_sock->sock->sk;
1979+
19551980
if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
19561981
df = htons(IP_DF);
19571982

@@ -2013,6 +2038,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
20132038
struct flowi6 fl6;
20142039
u32 rt6i_flags;
20152040

2041+
if (!vxlan->vn6_sock)
2042+
goto drop;
2043+
sk = vxlan->vn6_sock->sock->sk;
2044+
20162045
memset(&fl6, 0, sizeof(fl6));
20172046
fl6.flowi6_oif = rdst ? rdst->remote_ifindex : 0;
20182047
fl6.daddr = dst->sin6.sin6_addr;
@@ -2204,7 +2233,6 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
22042233
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
22052234
__u32 vni = vxlan->default_dst.remote_vni;
22062235

2207-
vxlan->vn_sock = vs;
22082236
spin_lock(&vn->sock_lock);
22092237
hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
22102238
spin_unlock(&vn->sock_lock);
@@ -2535,14 +2563,13 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
25352563
}
25362564

25372565
/* Create new listen socket if needed */
2538-
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
2539-
u32 flags)
2566+
static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
2567+
__be16 port, u32 flags)
25402568
{
25412569
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
25422570
struct vxlan_sock *vs;
25432571
struct socket *sock;
25442572
unsigned int h;
2545-
bool ipv6 = !!(flags & VXLAN_F_IPV6);
25462573
struct udp_tunnel_sock_cfg tunnel_cfg;
25472574

25482575
vs = kzalloc(sizeof(*vs), GFP_KERNEL);
@@ -2587,11 +2614,10 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
25872614
return vs;
25882615
}
25892616

2590-
static int vxlan_sock_add(struct vxlan_dev *vxlan)
2617+
static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
25912618
{
25922619
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
25932620
struct vxlan_sock *vs = NULL;
2594-
bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
25952621

25962622
if (!vxlan->cfg.no_share) {
25972623
spin_lock(&vn->sock_lock);
@@ -2604,20 +2630,46 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan)
26042630
spin_unlock(&vn->sock_lock);
26052631
}
26062632
if (!vs)
2607-
vs = vxlan_socket_create(vxlan->net, vxlan->cfg.dst_port,
2608-
vxlan->flags);
2633+
vs = vxlan_socket_create(vxlan->net, ipv6,
2634+
vxlan->cfg.dst_port, vxlan->flags);
26092635
if (IS_ERR(vs))
26102636
return PTR_ERR(vs);
2637+
#if IS_ENABLED(CONFIG_IPV6)
2638+
if (ipv6)
2639+
vxlan->vn6_sock = vs;
2640+
else
2641+
#endif
2642+
vxlan->vn4_sock = vs;
26112643
vxlan_vs_add_dev(vs, vxlan);
26122644
return 0;
26132645
}
26142646

2647+
static int vxlan_sock_add(struct vxlan_dev *vxlan)
2648+
{
2649+
bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
2650+
bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
2651+
int ret = 0;
2652+
2653+
vxlan->vn4_sock = NULL;
2654+
#if IS_ENABLED(CONFIG_IPV6)
2655+
vxlan->vn6_sock = NULL;
2656+
if (ipv6 || metadata)
2657+
ret = __vxlan_sock_add(vxlan, true);
2658+
#endif
2659+
if (!ret && (!ipv6 || metadata))
2660+
ret = __vxlan_sock_add(vxlan, false);
2661+
if (ret < 0)
2662+
vxlan_sock_release(vxlan);
2663+
return ret;
2664+
}
2665+
26152666
static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
26162667
struct vxlan_config *conf)
26172668
{
26182669
struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
26192670
struct vxlan_dev *vxlan = netdev_priv(dev);
26202671
struct vxlan_rdst *dst = &vxlan->default_dst;
2672+
unsigned short needed_headroom = ETH_HLEN;
26212673
int err;
26222674
bool use_ipv6 = false;
26232675
__be16 default_port = vxlan->cfg.dst_port;
@@ -2637,6 +2689,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
26372689
if (!IS_ENABLED(CONFIG_IPV6))
26382690
return -EPFNOSUPPORT;
26392691
use_ipv6 = true;
2692+
vxlan->flags |= VXLAN_F_IPV6;
26402693
}
26412694

26422695
if (conf->remote_ifindex) {
@@ -2657,22 +2710,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
26572710
pr_info("IPv6 is disabled via sysctl\n");
26582711
return -EPERM;
26592712
}
2660-
vxlan->flags |= VXLAN_F_IPV6;
26612713
}
26622714
#endif
26632715

26642716
if (!conf->mtu)
26652717
dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
26662718

2667-
dev->needed_headroom = lowerdev->hard_header_len +
2668-
(use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
2669-
} else if (use_ipv6) {
2670-
vxlan->flags |= VXLAN_F_IPV6;
2671-
dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM;
2672-
} else {
2673-
dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM;
2719+
needed_headroom = lowerdev->hard_header_len;
26742720
}
26752721

2722+
if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
2723+
needed_headroom += VXLAN6_HEADROOM;
2724+
else
2725+
needed_headroom += VXLAN_HEADROOM;
2726+
dev->needed_headroom = needed_headroom;
2727+
26762728
memcpy(&vxlan->cfg, conf, sizeof(*conf));
26772729
if (!vxlan->cfg.dst_port)
26782730
vxlan->cfg.dst_port = default_port;

include/net/vxlan.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,10 @@ struct vxlan_config {
152152
struct vxlan_dev {
153153
struct hlist_node hlist; /* vni hash table */
154154
struct list_head next; /* vxlan's per namespace list */
155-
struct vxlan_sock *vn_sock; /* listening socket */
155+
struct vxlan_sock *vn4_sock; /* listening socket for IPv4 */
156+
#if IS_ENABLED(CONFIG_IPV6)
157+
struct vxlan_sock *vn6_sock; /* listening socket for IPv6 */
158+
#endif
156159
struct net_device *dev;
157160
struct net *net; /* netns for packet i/o */
158161
struct vxlan_rdst default_dst; /* default destination */
@@ -195,9 +198,14 @@ struct vxlan_dev {
195198
struct net_device *vxlan_dev_create(struct net *net, const char *name,
196199
u8 name_assign_type, struct vxlan_config *conf);
197200

198-
static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan)
201+
static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan,
202+
unsigned short family)
199203
{
200-
return inet_sk(vxlan->vn_sock->sock->sk)->inet_sport;
204+
#if IS_ENABLED(CONFIG_IPV6)
205+
if (family == AF_INET6)
206+
return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport;
207+
#endif
208+
return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport;
201209
}
202210

203211
static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,

net/openvswitch/vport-vxlan.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
151151
{
152152
struct vxlan_dev *vxlan = netdev_priv(vport->dev);
153153
struct net *net = ovs_dp_get_net(vport->dp);
154-
__be16 dst_port = vxlan_dev_dst_port(vxlan);
154+
unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info);
155+
__be16 dst_port = vxlan_dev_dst_port(vxlan, family);
155156
__be16 src_port;
156157
int port_min;
157158
int port_max;

0 commit comments

Comments
 (0)