Skip to content

Commit 3093fbe

Browse files
tgrafdavem330
authored andcommitted
route: Per route IP tunnel metadata via lightweight tunnel
This introduces a new IP tunnel lightweight tunnel type which allows to specify IP tunnel instructions per route. Only IPv4 is supported at this point. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 1b7179d commit 3093fbe

File tree

8 files changed

+157
-5
lines changed

8 files changed

+157
-5
lines changed

drivers/net/vxlan.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,7 +1935,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
19351935
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
19361936
struct vxlan_rdst *rdst, bool did_rsc)
19371937
{
1938-
struct ip_tunnel_info *info = skb_tunnel_info(skb);
1938+
struct ip_tunnel_info *info;
19391939
struct vxlan_dev *vxlan = netdev_priv(dev);
19401940
struct sock *sk = vxlan->vn_sock->sock->sk;
19411941
struct rtable *rt = NULL;
@@ -1952,6 +1952,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
19521952
int err;
19531953
u32 flags = vxlan->flags;
19541954

1955+
/* FIXME: Support IPv6 */
1956+
info = skb_tunnel_info(skb, AF_INET);
1957+
19551958
if (rdst) {
19561959
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port;
19571960
vni = rdst->remote_vni;
@@ -2141,12 +2144,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
21412144
static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
21422145
{
21432146
struct vxlan_dev *vxlan = netdev_priv(dev);
2144-
const struct ip_tunnel_info *info = skb_tunnel_info(skb);
2147+
const struct ip_tunnel_info *info;
21452148
struct ethhdr *eth;
21462149
bool did_rsc = false;
21472150
struct vxlan_rdst *rdst, *fdst = NULL;
21482151
struct vxlan_fdb *f;
21492152

2153+
/* FIXME: Support IPv6 */
2154+
info = skb_tunnel_info(skb, AF_INET);
2155+
21502156
skb_reset_mac_header(skb);
21512157
eth = eth_hdr(skb);
21522158

include/net/dst_metadata.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,23 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
2323
return NULL;
2424
}
2525

26-
static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
26+
static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb,
27+
int family)
2728
{
2829
struct metadata_dst *md_dst = skb_metadata_dst(skb);
30+
struct rtable *rt;
2931

3032
if (md_dst)
3133
return &md_dst->u.tun_info;
3234

35+
switch (family) {
36+
case AF_INET:
37+
rt = (struct rtable *)skb_dst(skb);
38+
if (rt && rt->rt_lwtstate)
39+
return lwt_tun_info(rt->rt_lwtstate);
40+
break;
41+
}
42+
3343
return NULL;
3444
}
3545

include/net/ip_tunnels.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
#include <net/dsfield.h>
1010
#include <net/gro_cells.h>
1111
#include <net/inet_ecn.h>
12-
#include <net/ip.h>
1312
#include <net/netns/generic.h>
1413
#include <net/rtnetlink.h>
14+
#include <net/lwtunnel.h>
1515

1616
#if IS_ENABLED(CONFIG_IPV6)
1717
#include <net/ipv6.h>
@@ -298,6 +298,11 @@ static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info, size_t n)
298298
return info + 1;
299299
}
300300

301+
static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
302+
{
303+
return (struct ip_tunnel_info *)lwtstate->data;
304+
}
305+
301306
#endif /* CONFIG_INET */
302307

303308
#endif /* __NET_IP_TUNNELS_H */

include/uapi/linux/lwtunnel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
enum lwtunnel_encap_types {
77
LWTUNNEL_ENCAP_NONE,
88
LWTUNNEL_ENCAP_MPLS,
9+
LWTUNNEL_ENCAP_IP,
910
__LWTUNNEL_ENCAP_MAX,
1011
};
1112

include/uapi/linux/rtnetlink.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,21 @@ enum rt_class_t {
286286

287287
/* Routing message attributes */
288288

289+
enum ip_tunnel_t {
290+
IP_TUN_UNSPEC,
291+
IP_TUN_ID,
292+
IP_TUN_DST,
293+
IP_TUN_SRC,
294+
IP_TUN_TTL,
295+
IP_TUN_TOS,
296+
IP_TUN_SPORT,
297+
IP_TUN_DPORT,
298+
IP_TUN_FLAGS,
299+
__IP_TUN_MAX,
300+
};
301+
302+
#define IP_TUN_MAX (__IP_TUN_MAX - 1)
303+
289304
enum rtattr_type_t {
290305
RTA_UNSPEC,
291306
RTA_DST,

net/ipv4/ip_tunnel_core.c

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,117 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
190190
return tot;
191191
}
192192
EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
193+
194+
static const struct nla_policy ip_tun_policy[IP_TUN_MAX + 1] = {
195+
[IP_TUN_ID] = { .type = NLA_U64 },
196+
[IP_TUN_DST] = { .type = NLA_U32 },
197+
[IP_TUN_SRC] = { .type = NLA_U32 },
198+
[IP_TUN_TTL] = { .type = NLA_U8 },
199+
[IP_TUN_TOS] = { .type = NLA_U8 },
200+
[IP_TUN_SPORT] = { .type = NLA_U16 },
201+
[IP_TUN_DPORT] = { .type = NLA_U16 },
202+
[IP_TUN_FLAGS] = { .type = NLA_U16 },
203+
};
204+
205+
static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr,
206+
struct lwtunnel_state **ts)
207+
{
208+
struct ip_tunnel_info *tun_info;
209+
struct lwtunnel_state *new_state;
210+
struct nlattr *tb[IP_TUN_MAX + 1];
211+
int err;
212+
213+
err = nla_parse_nested(tb, IP_TUN_MAX, attr, ip_tun_policy);
214+
if (err < 0)
215+
return err;
216+
217+
new_state = lwtunnel_state_alloc(sizeof(*tun_info));
218+
if (!new_state)
219+
return -ENOMEM;
220+
221+
new_state->type = LWTUNNEL_ENCAP_IP;
222+
223+
tun_info = lwt_tun_info(new_state);
224+
225+
if (tb[IP_TUN_ID])
226+
tun_info->key.tun_id = nla_get_u64(tb[IP_TUN_ID]);
227+
228+
if (tb[IP_TUN_DST])
229+
tun_info->key.ipv4_dst = nla_get_be32(tb[IP_TUN_DST]);
230+
231+
if (tb[IP_TUN_SRC])
232+
tun_info->key.ipv4_src = nla_get_be32(tb[IP_TUN_SRC]);
233+
234+
if (tb[IP_TUN_TTL])
235+
tun_info->key.ipv4_ttl = nla_get_u8(tb[IP_TUN_TTL]);
236+
237+
if (tb[IP_TUN_TOS])
238+
tun_info->key.ipv4_tos = nla_get_u8(tb[IP_TUN_TOS]);
239+
240+
if (tb[IP_TUN_SPORT])
241+
tun_info->key.tp_src = nla_get_be16(tb[IP_TUN_SPORT]);
242+
243+
if (tb[IP_TUN_DPORT])
244+
tun_info->key.tp_dst = nla_get_be16(tb[IP_TUN_DPORT]);
245+
246+
if (tb[IP_TUN_FLAGS])
247+
tun_info->key.tun_flags = nla_get_u16(tb[IP_TUN_FLAGS]);
248+
249+
tun_info->mode = IP_TUNNEL_INFO_TX;
250+
tun_info->options = NULL;
251+
tun_info->options_len = 0;
252+
253+
*ts = new_state;
254+
255+
return 0;
256+
}
257+
258+
static int ip_tun_fill_encap_info(struct sk_buff *skb,
259+
struct lwtunnel_state *lwtstate)
260+
{
261+
struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
262+
263+
if (nla_put_u64(skb, IP_TUN_ID, tun_info->key.tun_id) ||
264+
nla_put_be32(skb, IP_TUN_DST, tun_info->key.ipv4_dst) ||
265+
nla_put_be32(skb, IP_TUN_SRC, tun_info->key.ipv4_src) ||
266+
nla_put_u8(skb, IP_TUN_TOS, tun_info->key.ipv4_tos) ||
267+
nla_put_u8(skb, IP_TUN_TTL, tun_info->key.ipv4_ttl) ||
268+
nla_put_u16(skb, IP_TUN_SPORT, tun_info->key.tp_src) ||
269+
nla_put_u16(skb, IP_TUN_DPORT, tun_info->key.tp_dst) ||
270+
nla_put_u16(skb, IP_TUN_FLAGS, tun_info->key.tun_flags))
271+
return -ENOMEM;
272+
273+
return 0;
274+
}
275+
276+
static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
277+
{
278+
return nla_total_size(8) /* IP_TUN_ID */
279+
+ nla_total_size(4) /* IP_TUN_DST */
280+
+ nla_total_size(4) /* IP_TUN_SRC */
281+
+ nla_total_size(1) /* IP_TUN_TOS */
282+
+ nla_total_size(1) /* IP_TUN_TTL */
283+
+ nla_total_size(2) /* IP_TUN_SPORT */
284+
+ nla_total_size(2) /* IP_TUN_DPORT */
285+
+ nla_total_size(2); /* IP_TUN_FLAGS */
286+
}
287+
288+
static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
289+
.build_state = ip_tun_build_state,
290+
.fill_encap = ip_tun_fill_encap_info,
291+
.get_encap_size = ip_tun_encap_nlsize,
292+
};
293+
294+
static int __init ip_tunnel_core_init(void)
295+
{
296+
lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
297+
298+
return 0;
299+
}
300+
module_init(ip_tunnel_core_init);
301+
302+
static void __exit ip_tunnel_core_exit(void)
303+
{
304+
lwtunnel_encap_del_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
305+
}
306+
module_exit(ip_tunnel_core_exit);

net/ipv4/route.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1693,7 +1693,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
16931693
by fib_lookup.
16941694
*/
16951695

1696-
tun_info = skb_tunnel_info(skb);
1696+
tun_info = skb_tunnel_info(skb, AF_INET);
16971697
if (tun_info && tun_info->mode == IP_TUNNEL_INFO_RX)
16981698
fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
16991699
else

net/openvswitch/vport.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <linux/skbuff.h>
2828
#include <linux/spinlock.h>
2929
#include <linux/u64_stats_sync.h>
30+
#include <net/route.h>
3031

3132
#include "datapath.h"
3233

0 commit comments

Comments
 (0)