Skip to content

Commit 9c98b13

Browse files
Petar PenkovAlexei Starovoitov
authored andcommitted
flow_dissector: implements eBPF parser
This eBPF program extracts basic/control/ip address/ports keys from incoming packets. It supports recursive parsing for IP encapsulation, and VLAN, along with IPv4/IPv6 and extension headers. This program is meant to show how flow dissection and key extraction can be done in eBPF. Link: http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf Signed-off-by: Petar Penkov <ppenkov@google.com> Signed-off-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
1 parent c22fbae commit 9c98b13

File tree

2 files changed

+374
-1
lines changed

2 files changed

+374
-1
lines changed

tools/testing/selftests/bpf/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
3535
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
3636
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
3737
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
38-
test_skb_cgroup_id_kern.o
38+
test_skb_cgroup_id_kern.o bpf_flow.o
3939

4040
# Order correspond to 'make run_tests' order
4141
TEST_PROGS := test_kmod.sh \
Lines changed: 373 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,373 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <limits.h>
3+
#include <stddef.h>
4+
#include <stdbool.h>
5+
#include <string.h>
6+
#include <linux/pkt_cls.h>
7+
#include <linux/bpf.h>
8+
#include <linux/in.h>
9+
#include <linux/if_ether.h>
10+
#include <linux/icmp.h>
11+
#include <linux/ip.h>
12+
#include <linux/ipv6.h>
13+
#include <linux/tcp.h>
14+
#include <linux/udp.h>
15+
#include <linux/if_packet.h>
16+
#include <sys/socket.h>
17+
#include <linux/if_tunnel.h>
18+
#include <linux/mpls.h>
19+
#include "bpf_helpers.h"
20+
#include "bpf_endian.h"
21+
22+
int _version SEC("version") = 1;
23+
#define PROG(F) SEC(#F) int bpf_func_##F
24+
25+
/* These are the identifiers of the BPF programs that will be used in tail
26+
* calls. Name is limited to 16 characters, with the terminating character and
27+
* bpf_func_ above, we have only 6 to work with, anything after will be cropped.
28+
*/
29+
enum {
30+
IP,
31+
IPV6,
32+
IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
33+
IPV6FR, /* Fragmentation IPv6 Extension Header */
34+
MPLS,
35+
VLAN,
36+
};
37+
38+
#define IP_MF 0x2000
39+
#define IP_OFFSET 0x1FFF
40+
#define IP6_MF 0x0001
41+
#define IP6_OFFSET 0xFFF8
42+
43+
struct vlan_hdr {
44+
__be16 h_vlan_TCI;
45+
__be16 h_vlan_encapsulated_proto;
46+
};
47+
48+
struct gre_hdr {
49+
__be16 flags;
50+
__be16 proto;
51+
};
52+
53+
struct frag_hdr {
54+
__u8 nexthdr;
55+
__u8 reserved;
56+
__be16 frag_off;
57+
__be32 identification;
58+
};
59+
60+
struct bpf_map_def SEC("maps") jmp_table = {
61+
.type = BPF_MAP_TYPE_PROG_ARRAY,
62+
.key_size = sizeof(__u32),
63+
.value_size = sizeof(__u32),
64+
.max_entries = 8
65+
};
66+
67+
static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
68+
__u16 hdr_size,
69+
void *buffer)
70+
{
71+
void *data_end = (void *)(long)skb->data_end;
72+
void *data = (void *)(long)skb->data;
73+
__u16 nhoff = skb->flow_keys->nhoff;
74+
__u8 *hdr;
75+
76+
/* Verifies this variable offset does not overflow */
77+
if (nhoff > (USHRT_MAX - hdr_size))
78+
return NULL;
79+
80+
hdr = data + nhoff;
81+
if (hdr + hdr_size <= data_end)
82+
return hdr;
83+
84+
if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
85+
return NULL;
86+
87+
return buffer;
88+
}
89+
90+
/* Dispatches on ETHERTYPE */
91+
static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
92+
{
93+
struct bpf_flow_keys *keys = skb->flow_keys;
94+
95+
keys->n_proto = proto;
96+
switch (proto) {
97+
case bpf_htons(ETH_P_IP):
98+
bpf_tail_call(skb, &jmp_table, IP);
99+
break;
100+
case bpf_htons(ETH_P_IPV6):
101+
bpf_tail_call(skb, &jmp_table, IPV6);
102+
break;
103+
case bpf_htons(ETH_P_MPLS_MC):
104+
case bpf_htons(ETH_P_MPLS_UC):
105+
bpf_tail_call(skb, &jmp_table, MPLS);
106+
break;
107+
case bpf_htons(ETH_P_8021Q):
108+
case bpf_htons(ETH_P_8021AD):
109+
bpf_tail_call(skb, &jmp_table, VLAN);
110+
break;
111+
default:
112+
/* Protocol not supported */
113+
return BPF_DROP;
114+
}
115+
116+
return BPF_DROP;
117+
}
118+
119+
SEC("dissect")
120+
int dissect(struct __sk_buff *skb)
121+
{
122+
if (!skb->vlan_present)
123+
return parse_eth_proto(skb, skb->protocol);
124+
else
125+
return parse_eth_proto(skb, skb->vlan_proto);
126+
}
127+
128+
/* Parses on IPPROTO_* */
129+
static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
130+
{
131+
struct bpf_flow_keys *keys = skb->flow_keys;
132+
void *data_end = (void *)(long)skb->data_end;
133+
struct icmphdr *icmp, _icmp;
134+
struct gre_hdr *gre, _gre;
135+
struct ethhdr *eth, _eth;
136+
struct tcphdr *tcp, _tcp;
137+
struct udphdr *udp, _udp;
138+
139+
keys->ip_proto = proto;
140+
switch (proto) {
141+
case IPPROTO_ICMP:
142+
icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
143+
if (!icmp)
144+
return BPF_DROP;
145+
return BPF_OK;
146+
case IPPROTO_IPIP:
147+
keys->is_encap = true;
148+
return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
149+
case IPPROTO_IPV6:
150+
keys->is_encap = true;
151+
return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
152+
case IPPROTO_GRE:
153+
gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
154+
if (!gre)
155+
return BPF_DROP;
156+
157+
if (bpf_htons(gre->flags & GRE_VERSION))
158+
/* Only inspect standard GRE packets with version 0 */
159+
return BPF_OK;
160+
161+
keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
162+
if (GRE_IS_CSUM(gre->flags))
163+
keys->nhoff += 4; /* Step over chksum and Padding */
164+
if (GRE_IS_KEY(gre->flags))
165+
keys->nhoff += 4; /* Step over key */
166+
if (GRE_IS_SEQ(gre->flags))
167+
keys->nhoff += 4; /* Step over sequence number */
168+
169+
keys->is_encap = true;
170+
171+
if (gre->proto == bpf_htons(ETH_P_TEB)) {
172+
eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
173+
&_eth);
174+
if (!eth)
175+
return BPF_DROP;
176+
177+
keys->nhoff += sizeof(*eth);
178+
179+
return parse_eth_proto(skb, eth->h_proto);
180+
} else {
181+
return parse_eth_proto(skb, gre->proto);
182+
}
183+
case IPPROTO_TCP:
184+
tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
185+
if (!tcp)
186+
return BPF_DROP;
187+
188+
if (tcp->doff < 5)
189+
return BPF_DROP;
190+
191+
if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
192+
return BPF_DROP;
193+
194+
keys->thoff = keys->nhoff;
195+
keys->sport = tcp->source;
196+
keys->dport = tcp->dest;
197+
return BPF_OK;
198+
case IPPROTO_UDP:
199+
case IPPROTO_UDPLITE:
200+
udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
201+
if (!udp)
202+
return BPF_DROP;
203+
204+
keys->thoff = keys->nhoff;
205+
keys->sport = udp->source;
206+
keys->dport = udp->dest;
207+
return BPF_OK;
208+
default:
209+
return BPF_DROP;
210+
}
211+
212+
return BPF_DROP;
213+
}
214+
215+
static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
216+
{
217+
struct bpf_flow_keys *keys = skb->flow_keys;
218+
219+
keys->ip_proto = nexthdr;
220+
switch (nexthdr) {
221+
case IPPROTO_HOPOPTS:
222+
case IPPROTO_DSTOPTS:
223+
bpf_tail_call(skb, &jmp_table, IPV6OP);
224+
break;
225+
case IPPROTO_FRAGMENT:
226+
bpf_tail_call(skb, &jmp_table, IPV6FR);
227+
break;
228+
default:
229+
return parse_ip_proto(skb, nexthdr);
230+
}
231+
232+
return BPF_DROP;
233+
}
234+
235+
PROG(IP)(struct __sk_buff *skb)
236+
{
237+
void *data_end = (void *)(long)skb->data_end;
238+
struct bpf_flow_keys *keys = skb->flow_keys;
239+
void *data = (void *)(long)skb->data;
240+
struct iphdr *iph, _iph;
241+
bool done = false;
242+
243+
iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
244+
if (!iph)
245+
return BPF_DROP;
246+
247+
/* IP header cannot be smaller than 20 bytes */
248+
if (iph->ihl < 5)
249+
return BPF_DROP;
250+
251+
keys->addr_proto = ETH_P_IP;
252+
keys->ipv4_src = iph->saddr;
253+
keys->ipv4_dst = iph->daddr;
254+
255+
keys->nhoff += iph->ihl << 2;
256+
if (data + keys->nhoff > data_end)
257+
return BPF_DROP;
258+
259+
if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
260+
keys->is_frag = true;
261+
if (iph->frag_off & bpf_htons(IP_OFFSET))
262+
/* From second fragment on, packets do not have headers
263+
* we can parse.
264+
*/
265+
done = true;
266+
else
267+
keys->is_first_frag = true;
268+
}
269+
270+
if (done)
271+
return BPF_OK;
272+
273+
return parse_ip_proto(skb, iph->protocol);
274+
}
275+
276+
PROG(IPV6)(struct __sk_buff *skb)
277+
{
278+
struct bpf_flow_keys *keys = skb->flow_keys;
279+
struct ipv6hdr *ip6h, _ip6h;
280+
281+
ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
282+
if (!ip6h)
283+
return BPF_DROP;
284+
285+
keys->addr_proto = ETH_P_IPV6;
286+
memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
287+
288+
keys->nhoff += sizeof(struct ipv6hdr);
289+
290+
return parse_ipv6_proto(skb, ip6h->nexthdr);
291+
}
292+
293+
PROG(IPV6OP)(struct __sk_buff *skb)
294+
{
295+
struct ipv6_opt_hdr *ip6h, _ip6h;
296+
297+
ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
298+
if (!ip6h)
299+
return BPF_DROP;
300+
301+
/* hlen is in 8-octets and does not include the first 8 bytes
302+
* of the header
303+
*/
304+
skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
305+
306+
return parse_ipv6_proto(skb, ip6h->nexthdr);
307+
}
308+
309+
PROG(IPV6FR)(struct __sk_buff *skb)
310+
{
311+
struct bpf_flow_keys *keys = skb->flow_keys;
312+
struct frag_hdr *fragh, _fragh;
313+
314+
fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
315+
if (!fragh)
316+
return BPF_DROP;
317+
318+
keys->nhoff += sizeof(*fragh);
319+
keys->is_frag = true;
320+
if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
321+
keys->is_first_frag = true;
322+
323+
return parse_ipv6_proto(skb, fragh->nexthdr);
324+
}
325+
326+
PROG(MPLS)(struct __sk_buff *skb)
327+
{
328+
struct mpls_label *mpls, _mpls;
329+
330+
mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
331+
if (!mpls)
332+
return BPF_DROP;
333+
334+
return BPF_OK;
335+
}
336+
337+
PROG(VLAN)(struct __sk_buff *skb)
338+
{
339+
struct bpf_flow_keys *keys = skb->flow_keys;
340+
struct vlan_hdr *vlan, _vlan;
341+
__be16 proto;
342+
343+
/* Peek back to see if single or double-tagging */
344+
if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
345+
sizeof(proto)))
346+
return BPF_DROP;
347+
348+
/* Account for double-tagging */
349+
if (proto == bpf_htons(ETH_P_8021AD)) {
350+
vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
351+
if (!vlan)
352+
return BPF_DROP;
353+
354+
if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
355+
return BPF_DROP;
356+
357+
keys->nhoff += sizeof(*vlan);
358+
}
359+
360+
vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
361+
if (!vlan)
362+
return BPF_DROP;
363+
364+
keys->nhoff += sizeof(*vlan);
365+
/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
366+
if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
367+
vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
368+
return BPF_DROP;
369+
370+
return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
371+
}
372+
373+
char __license[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)