Skip to content

Commit 1b80f86

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-04-21 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Initial work on BPF Type Format (BTF) is added, which is a meta data format which describes the data types of BPF programs / maps. BTF has its roots from CTF (Compact C-Type format) with a number of changes to it. First use case is to provide a generic pretty print capability for BPF maps inspection, later work will also add BTF to bpftool. pahole support to convert dwarf to BTF will be upstreamed as well (https://github.com/iamkafai/pahole/tree/btf), from Martin. 2) Add a new xdp_bpf_adjust_tail() BPF helper for XDP that allows for changing the data_end pointer. Only shrinking is currently supported which helps for crafting ICMP control messages. Minor changes in drivers have been added where needed so they recalc the packet's length also when data_end was adjusted, from Nikita. 3) Improve bpftool to make it easier to feed hex bytes via cmdline for map operations, from Quentin. 4) Add support for various missing BPF prog types and attach types that have been added to kernel recently but neither to bpftool nor libbpf yet. Doc and bash completion updates have been added as well for bpftool, from Andrey. 5) Proper fix for avoiding to leak info stored in frame data on page reuse for the two bpf_xdp_adjust_{head,meta} helpers by disallowing to move the pointers into struct xdp_frame area, from Jesper. 6) Follow-up compile fix from BTF in order to include stdbool.h in libbpf, from Björn. 7) Few fixes in BPF sample code, that is, a typo on the netdevice in a comment and fixup proper dump of XDP action code in the tracepoint exception, from Wang and Jesper. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents cf1a1e0 + 878a4d3 commit 1b80f86

File tree

45 files changed

+5591
-89
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+5591
-89
lines changed

drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
113113
if (tx_avail != bp->tx_ring_size)
114114
*event &= ~BNXT_RX_EVENT;
115115

116+
*len = xdp.data_end - xdp.data;
116117
if (orig_data != xdp.data) {
117118
offset = xdp.data - xdp.data_hard_start;
118119
*data_ptr = xdp.data_hard_start + offset;
119-
*len = xdp.data_end - xdp.data;
120120
}
121121
switch (act) {
122122
case XDP_PASS:

drivers/net/ethernet/cavium/thunder/nicvf_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -538,9 +538,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
538538
action = bpf_prog_run_xdp(prog, &xdp);
539539
rcu_read_unlock();
540540

541+
len = xdp.data_end - xdp.data;
541542
/* Check if XDP program has changed headers */
542543
if (orig_data != xdp.data) {
543-
len = xdp.data_end - xdp.data;
544544
offset = orig_data - xdp.data;
545545
dma_addr -= offset;
546546
}

drivers/net/ethernet/mellanox/mlx4/en_rx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -775,8 +775,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
775775

776776
act = bpf_prog_run_xdp(xdp_prog, &xdp);
777777

778+
length = xdp.data_end - xdp.data;
778779
if (xdp.data != orig_data) {
779-
length = xdp.data_end - xdp.data;
780780
frags[0].page_offset = xdp.data -
781781
xdp.data_hard_start;
782782
va = xdp.data;

drivers/net/ethernet/netronome/nfp/nfp_net_common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
17221722

17231723
act = bpf_prog_run_xdp(xdp_prog, &xdp);
17241724

1725-
pkt_len -= xdp.data - orig_data;
1725+
pkt_len = xdp.data_end - xdp.data;
17261726
pkt_off += xdp.data - orig_data;
17271727

17281728
switch (act) {

drivers/net/tun.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1690,6 +1690,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
16901690
return NULL;
16911691
case XDP_PASS:
16921692
delta = orig_data - xdp.data;
1693+
len = xdp.data_end - xdp.data;
16931694
break;
16941695
default:
16951696
bpf_warn_invalid_xdp_action(act);
@@ -1710,7 +1711,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
17101711
}
17111712

17121713
skb_reserve(skb, pad - delta);
1713-
skb_put(skb, len + delta);
1714+
skb_put(skb, len);
17141715
get_page(alloc_frag->page);
17151716
alloc_frag->offset += buflen;
17161717

drivers/net/virtio_net.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
606606
case XDP_PASS:
607607
/* Recalculate length in case bpf program changed it */
608608
delta = orig_data - xdp.data;
609+
len = xdp.data_end - xdp.data;
609610
break;
610611
case XDP_TX:
611612
xdpf = convert_to_xdp_frame(&xdp);
@@ -642,7 +643,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
642643
goto err;
643644
}
644645
skb_reserve(skb, headroom - delta);
645-
skb_put(skb, len + delta);
646+
skb_put(skb, len);
646647
if (!delta) {
647648
buf += header_offset;
648649
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
@@ -757,6 +758,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
757758
offset = xdp.data -
758759
page_address(xdp_page) - vi->hdr_len;
759760

761+
/* recalculate len if xdp.data or xdp.data_end were
762+
* adjusted
763+
*/
764+
len = xdp.data_end - xdp.data;
760765
/* We can only create skb based on xdp_page. */
761766
if (unlikely(xdp_page != page)) {
762767
rcu_read_unlock();

include/linux/bpf.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ struct perf_event;
2222
struct bpf_prog;
2323
struct bpf_map;
2424
struct sock;
25+
struct seq_file;
26+
struct btf;
2527

2628
/* map is generic key/value storage optionally accesible by eBPF programs */
2729
struct bpf_map_ops {
@@ -43,10 +45,14 @@ struct bpf_map_ops {
4345
void (*map_fd_put_ptr)(void *ptr);
4446
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
4547
u32 (*map_fd_sys_lookup_elem)(void *ptr);
48+
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
49+
struct seq_file *m);
50+
int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
51+
u32 key_type_id, u32 value_type_id);
4652
};
4753

4854
struct bpf_map {
49-
/* 1st cacheline with read-mostly members of which some
55+
/* The first two cachelines with read-mostly members of which some
5056
* are also accessed in fast-path (e.g. ops, max_entries).
5157
*/
5258
const struct bpf_map_ops *ops ____cacheline_aligned;
@@ -62,10 +68,13 @@ struct bpf_map {
6268
u32 pages;
6369
u32 id;
6470
int numa_node;
71+
u32 btf_key_id;
72+
u32 btf_value_id;
73+
struct btf *btf;
6574
bool unpriv_array;
66-
/* 7 bytes hole */
75+
/* 55 bytes hole */
6776

68-
/* 2nd cacheline with misc members to avoid false sharing
77+
/* The 3rd and 4th cacheline with misc members to avoid false sharing
6978
* particularly with refcounting.
7079
*/
7180
struct user_struct *user ____cacheline_aligned;
@@ -100,6 +109,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
100109
return container_of(map, struct bpf_offloaded_map, map);
101110
}
102111

112+
static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
113+
{
114+
return map->ops->map_seq_show_elem && map->ops->map_check_btf;
115+
}
116+
103117
extern const struct bpf_map_ops bpf_map_offload_ops;
104118

105119
/* function argument constraints */

include/linux/btf.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/* Copyright (c) 2018 Facebook */
3+
4+
#ifndef _LINUX_BTF_H
5+
#define _LINUX_BTF_H 1
6+
7+
#include <linux/types.h>
8+
9+
struct btf;
10+
struct btf_type;
11+
union bpf_attr;
12+
13+
extern const struct file_operations btf_fops;
14+
15+
void btf_put(struct btf *btf);
16+
int btf_new_fd(const union bpf_attr *attr);
17+
struct btf *btf_get_by_fd(int fd);
18+
int btf_get_info_by_fd(const struct btf *btf,
19+
const union bpf_attr *attr,
20+
union bpf_attr __user *uattr);
21+
/* Figure out the size of a type_id. If type_id is a modifier
22+
* (e.g. const), it will be resolved to find out the type with size.
23+
*
24+
* For example:
25+
* In describing "const void *", type_id is "const" and "const"
26+
* refers to "void *". The return type will be "void *".
27+
*
28+
* If type_id is a simple "int", then return type will be "int".
29+
*
30+
* @btf: struct btf object
31+
* @type_id: Find out the size of type_id. The type_id of the return
32+
* type is set to *type_id.
33+
* @ret_size: It can be NULL. If not NULL, the size of the return
34+
* type is set to *ret_size.
35+
* Return: The btf_type (resolved to another type with size info if needed).
36+
* NULL is returned if type_id itself does not have size info
37+
* (e.g. void) or it cannot be resolved to another type that
38+
* has size info.
39+
* *type_id and *ret_size will not be changed in the
40+
* NULL return case.
41+
*/
42+
const struct btf_type *btf_type_id_size(const struct btf *btf,
43+
u32 *type_id,
44+
u32 *ret_size);
45+
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
46+
struct seq_file *m);
47+
48+
#endif

include/uapi/linux/bpf.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ enum bpf_cmd {
9595
BPF_OBJ_GET_INFO_BY_FD,
9696
BPF_PROG_QUERY,
9797
BPF_RAW_TRACEPOINT_OPEN,
98+
BPF_BTF_LOAD,
9899
};
99100

100101
enum bpf_map_type {
@@ -279,6 +280,9 @@ union bpf_attr {
279280
*/
280281
char map_name[BPF_OBJ_NAME_LEN];
281282
__u32 map_ifindex; /* ifindex of netdev to create on */
283+
__u32 btf_fd; /* fd pointing to a BTF type data */
284+
__u32 btf_key_id; /* BTF type_id of the key */
285+
__u32 btf_value_id; /* BTF type_id of the value */
282286
};
283287

284288
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -363,6 +367,14 @@ union bpf_attr {
363367
__u64 name;
364368
__u32 prog_fd;
365369
} raw_tracepoint;
370+
371+
struct { /* anonymous struct for BPF_BTF_LOAD */
372+
__aligned_u64 btf;
373+
__aligned_u64 btf_log_buf;
374+
__u32 btf_size;
375+
__u32 btf_log_size;
376+
__u32 btf_log_level;
377+
};
366378
} __attribute__((aligned(8)));
367379

368380
/* BPF helper function descriptions:
@@ -755,6 +767,13 @@ union bpf_attr {
755767
* @addr: pointer to struct sockaddr to bind socket to
756768
* @addr_len: length of sockaddr structure
757769
* Return: 0 on success or negative error code
770+
*
771+
* int bpf_xdp_adjust_tail(xdp_md, delta)
772+
* Adjust the xdp_md.data_end by delta. Only shrinking of packet's
773+
* size is supported.
774+
* @xdp_md: pointer to xdp_md
775+
* @delta: A negative integer to be added to xdp_md.data_end
776+
* Return: 0 on success or negative on error
758777
*/
759778
#define __BPF_FUNC_MAPPER(FN) \
760779
FN(unspec), \
@@ -821,7 +840,8 @@ union bpf_attr {
821840
FN(msg_apply_bytes), \
822841
FN(msg_cork_bytes), \
823842
FN(msg_pull_data), \
824-
FN(bind),
843+
FN(bind), \
844+
FN(xdp_adjust_tail),
825845

826846
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
827847
* function eBPF program intends to call

include/uapi/linux/btf.h

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2+
/* Copyright (c) 2018 Facebook */
3+
#ifndef _UAPI__LINUX_BTF_H__
4+
#define _UAPI__LINUX_BTF_H__
5+
6+
#include <linux/types.h>
7+
8+
#define BTF_MAGIC 0xeB9F
9+
#define BTF_MAGIC_SWAP 0x9FeB
10+
#define BTF_VERSION 1
11+
#define BTF_FLAGS_COMPR 0x01
12+
13+
struct btf_header {
14+
__u16 magic;
15+
__u8 version;
16+
__u8 flags;
17+
18+
__u32 parent_label;
19+
__u32 parent_name;
20+
21+
/* All offsets are in bytes relative to the end of this header */
22+
__u32 label_off; /* offset of label section */
23+
__u32 object_off; /* offset of data object section*/
24+
__u32 func_off; /* offset of function section */
25+
__u32 type_off; /* offset of type section */
26+
__u32 str_off; /* offset of string section */
27+
__u32 str_len; /* length of string section */
28+
};
29+
30+
/* Max # of type identifier */
31+
#define BTF_MAX_TYPE 0x7fffffff
32+
/* Max offset into the string section */
33+
#define BTF_MAX_NAME_OFFSET 0x7fffffff
34+
/* Max # of struct/union/enum members or func args */
35+
#define BTF_MAX_VLEN 0xffff
36+
37+
/* The type id is referring to a parent BTF */
38+
#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
39+
#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
40+
41+
/* String is in the ELF string section */
42+
#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
43+
#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
44+
45+
struct btf_type {
46+
__u32 name;
47+
/* "info" bits arrangement
48+
* bits 0-15: vlen (e.g. # of struct's members)
49+
* bits 16-23: unused
50+
* bits 24-28: kind (e.g. int, ptr, array...etc)
51+
* bits 29-30: unused
52+
* bits 31: root
53+
*/
54+
__u32 info;
55+
/* "size" is used by INT, ENUM, STRUCT and UNION.
56+
* "size" tells the size of the type it is describing.
57+
*
58+
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
59+
* "type" is a type_id referring to another type.
60+
*/
61+
union {
62+
__u32 size;
63+
__u32 type;
64+
};
65+
};
66+
67+
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
68+
#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
69+
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
70+
71+
#define BTF_KIND_UNKN 0 /* Unknown */
72+
#define BTF_KIND_INT 1 /* Integer */
73+
#define BTF_KIND_PTR 2 /* Pointer */
74+
#define BTF_KIND_ARRAY 3 /* Array */
75+
#define BTF_KIND_STRUCT 4 /* Struct */
76+
#define BTF_KIND_UNION 5 /* Union */
77+
#define BTF_KIND_ENUM 6 /* Enumeration */
78+
#define BTF_KIND_FWD 7 /* Forward */
79+
#define BTF_KIND_TYPEDEF 8 /* Typedef */
80+
#define BTF_KIND_VOLATILE 9 /* Volatile */
81+
#define BTF_KIND_CONST 10 /* Const */
82+
#define BTF_KIND_RESTRICT 11 /* Restrict */
83+
#define BTF_KIND_MAX 11
84+
#define NR_BTF_KINDS 12
85+
86+
/* For some specific BTF_KIND, "struct btf_type" is immediately
87+
* followed by extra data.
88+
*/
89+
90+
/* BTF_KIND_INT is followed by a u32 and the following
91+
* is the 32 bits arrangement:
92+
*/
93+
#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
94+
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
95+
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
96+
97+
/* Attributes stored in the BTF_INT_ENCODING */
98+
#define BTF_INT_SIGNED 0x1
99+
#define BTF_INT_CHAR 0x2
100+
#define BTF_INT_BOOL 0x4
101+
#define BTF_INT_VARARGS 0x8
102+
103+
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
104+
* The exact number of btf_enum is stored in the vlen (of the
105+
* info in "struct btf_type").
106+
*/
107+
struct btf_enum {
108+
__u32 name;
109+
__s32 val;
110+
};
111+
112+
/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
113+
struct btf_array {
114+
__u32 type;
115+
__u32 index_type;
116+
__u32 nelems;
117+
};
118+
119+
/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
120+
* by multiple "struct btf_member". The exact number
121+
* of btf_member is stored in the vlen (of the info in
122+
* "struct btf_type").
123+
*/
124+
struct btf_member {
125+
__u32 name;
126+
__u32 type;
127+
__u32 offset; /* offset in bits */
128+
};
129+
130+
#endif /* _UAPI__LINUX_BTF_H__ */

kernel/bpf/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ obj-y := core.o
44
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
55
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
66
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
7+
obj-$(CONFIG_BPF_SYSCALL) += btf.o
78
ifeq ($(CONFIG_NET),y)
89
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
910
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o

0 commit comments

Comments
 (0)