Skip to content

Commit 9c270af

Browse files
netoptimizerdavem330
authored andcommitted
bpf: XDP_REDIRECT enable use of cpumap
This patch connects cpumap to the xdp_do_redirect_map infrastructure. Still no SKB allocation are done yet. The XDP frames are transferred to the other CPU, but they are simply refcnt decremented on the remote CPU. This served as a good benchmark for measuring the overhead of remote refcnt decrement. If driver page recycle cache is not efficient then this, exposes a bottleneck in the page allocator. A shout-out to MST's ptr_ring, which is the secret behind is being so efficient to transfer memory pointers between CPUs, without constantly bouncing cache-lines between CPUs. V3: Handle !CONFIG_BPF_SYSCALL pointed out by kbuild test robot. V4: Make Generic-XDP aware of cpumap type, but don't allow redirect yet, as implementation require a separate upstream discussion. V5: - Fix a maybe-uninitialized pointed out by kbuild test robot. - Restrict bpf-prog side access to cpumap, open when use-cases appear - Implement cpu_map_enqueue() as a more simple void pointer enqueue V6: - Allow cpumap type for usage in helper bpf_redirect_map, general bpf-prog side restriction moved to earlier patch. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 6710e11 commit 9c270af

File tree

5 files changed

+172
-34
lines changed

5 files changed

+172
-34
lines changed

include/linux/bpf.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,14 +355,21 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
355355
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
356356
void __dev_map_flush(struct bpf_map *map);
357357

358+
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
359+
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
360+
void __cpu_map_flush(struct bpf_map *map);
361+
struct xdp_buff;
362+
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
363+
struct net_device *dev_rx);
364+
358365
/* Return map's numa specified by userspace */
359366
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
360367
{
361368
return (attr->map_flags & BPF_F_NUMA_NODE) ?
362369
attr->numa_node : NUMA_NO_NODE;
363370
}
364371

365-
#else
372+
#else /* !CONFIG_BPF_SYSCALL */
366373
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
367374
{
368375
return ERR_PTR(-EOPNOTSUPP);
@@ -425,6 +432,28 @@ static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
425432
static inline void __dev_map_flush(struct bpf_map *map)
426433
{
427434
}
435+
436+
static inline
437+
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
438+
{
439+
return NULL;
440+
}
441+
442+
static inline void __cpu_map_insert_ctx(struct bpf_map *map, u32 index)
443+
{
444+
}
445+
446+
static inline void __cpu_map_flush(struct bpf_map *map)
447+
{
448+
}
449+
450+
struct xdp_buff;
451+
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
452+
struct xdp_buff *xdp,
453+
struct net_device *dev_rx)
454+
{
455+
return 0;
456+
}
428457
#endif /* CONFIG_BPF_SYSCALL */
429458

430459
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL)

include/trace/events/xdp.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
136136
__entry->map_id, __entry->map_index)
137137
);
138138

139+
#define devmap_ifindex(fwd, map) \
140+
(!fwd ? 0 : \
141+
(!map ? 0 : \
142+
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
143+
((struct net_device *)fwd)->ifindex : 0)))
144+
139145
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
140-
trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \
146+
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
141147
0, map, idx)
142148

143149
#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \
144-
trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \
150+
trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \
145151
err, map, idx)
146152

147153
#endif /* _TRACE_XDP_H */

kernel/bpf/cpumap.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ struct xdp_pkt {
500500
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
501501
* Thus, safe percpu variable access.
502502
*/
503-
int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
503+
static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
504504
{
505505
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
506506

@@ -520,6 +520,26 @@ int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
520520
return 0;
521521
}
522522

523+
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
524+
struct net_device *dev_rx)
525+
{
526+
struct xdp_pkt *xdp_pkt;
527+
int headroom;
528+
529+
/* For now this is just used as a void pointer to data_hard_start.
530+
* Followup patch will generalize this.
531+
*/
532+
xdp_pkt = xdp->data_hard_start;
533+
534+
/* Fake writing into xdp_pkt->data to measure overhead */
535+
headroom = xdp->data - xdp->data_hard_start;
536+
if (headroom < sizeof(*xdp_pkt))
537+
xdp_pkt->data = xdp->data;
538+
539+
bq_enqueue(rcpu, xdp_pkt);
540+
return 0;
541+
}
542+
523543
void __cpu_map_insert_ctx(struct bpf_map *map, u32 bit)
524544
{
525545
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);

kernel/bpf/verifier.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1486,7 +1486,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
14861486
goto error;
14871487
break;
14881488
case BPF_FUNC_redirect_map:
1489-
if (map->map_type != BPF_MAP_TYPE_DEVMAP)
1489+
if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
1490+
map->map_type != BPF_MAP_TYPE_CPUMAP)
14901491
goto error;
14911492
break;
14921493
case BPF_FUNC_sk_redirect_map:

net/core/filter.c

Lines changed: 111 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2526,10 +2526,36 @@ static int __bpf_tx_xdp(struct net_device *dev,
25262526
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
25272527
if (err)
25282528
return err;
2529-
if (map)
2529+
dev->netdev_ops->ndo_xdp_flush(dev);
2530+
return 0;
2531+
}
2532+
2533+
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
2534+
struct bpf_map *map,
2535+
struct xdp_buff *xdp,
2536+
u32 index)
2537+
{
2538+
int err;
2539+
2540+
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2541+
struct net_device *dev = fwd;
2542+
2543+
if (!dev->netdev_ops->ndo_xdp_xmit)
2544+
return -EOPNOTSUPP;
2545+
2546+
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
2547+
if (err)
2548+
return err;
25302549
__dev_map_insert_ctx(map, index);
2531-
else
2532-
dev->netdev_ops->ndo_xdp_flush(dev);
2550+
2551+
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
2552+
struct bpf_cpu_map_entry *rcpu = fwd;
2553+
2554+
err = cpu_map_enqueue(rcpu, xdp, dev_rx);
2555+
if (err)
2556+
return err;
2557+
__cpu_map_insert_ctx(map, index);
2558+
}
25332559
return 0;
25342560
}
25352561

@@ -2539,11 +2565,33 @@ void xdp_do_flush_map(void)
25392565
struct bpf_map *map = ri->map_to_flush;
25402566

25412567
ri->map_to_flush = NULL;
2542-
if (map)
2543-
__dev_map_flush(map);
2568+
if (map) {
2569+
switch (map->map_type) {
2570+
case BPF_MAP_TYPE_DEVMAP:
2571+
__dev_map_flush(map);
2572+
break;
2573+
case BPF_MAP_TYPE_CPUMAP:
2574+
__cpu_map_flush(map);
2575+
break;
2576+
default:
2577+
break;
2578+
}
2579+
}
25442580
}
25452581
EXPORT_SYMBOL_GPL(xdp_do_flush_map);
25462582

2583+
static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
2584+
{
2585+
switch (map->map_type) {
2586+
case BPF_MAP_TYPE_DEVMAP:
2587+
return __dev_map_lookup_elem(map, index);
2588+
case BPF_MAP_TYPE_CPUMAP:
2589+
return __cpu_map_lookup_elem(map, index);
2590+
default:
2591+
return NULL;
2592+
}
2593+
}
2594+
25472595
static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
25482596
unsigned long aux)
25492597
{
@@ -2556,8 +2604,8 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
25562604
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
25572605
unsigned long map_owner = ri->map_owner;
25582606
struct bpf_map *map = ri->map;
2559-
struct net_device *fwd = NULL;
25602607
u32 index = ri->ifindex;
2608+
void *fwd = NULL;
25612609
int err;
25622610

25632611
ri->ifindex = 0;
@@ -2570,15 +2618,15 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
25702618
goto err;
25712619
}
25722620

2573-
fwd = __dev_map_lookup_elem(map, index);
2621+
fwd = __xdp_map_lookup_elem(map, index);
25742622
if (!fwd) {
25752623
err = -EINVAL;
25762624
goto err;
25772625
}
25782626
if (ri->map_to_flush && ri->map_to_flush != map)
25792627
xdp_do_flush_map();
25802628

2581-
err = __bpf_tx_xdp(fwd, map, xdp, index);
2629+
err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
25822630
if (unlikely(err))
25832631
goto err;
25842632

@@ -2620,54 +2668,88 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
26202668
}
26212669
EXPORT_SYMBOL_GPL(xdp_do_redirect);
26222670

2623-
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
2624-
struct bpf_prog *xdp_prog)
2671+
static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
2672+
{
2673+
unsigned int len;
2674+
2675+
if (unlikely(!(fwd->flags & IFF_UP)))
2676+
return -ENETDOWN;
2677+
2678+
len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
2679+
if (skb->len > len)
2680+
return -EMSGSIZE;
2681+
2682+
return 0;
2683+
}
2684+
2685+
int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
2686+
struct bpf_prog *xdp_prog)
26252687
{
26262688
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
26272689
unsigned long map_owner = ri->map_owner;
26282690
struct bpf_map *map = ri->map;
26292691
struct net_device *fwd = NULL;
26302692
u32 index = ri->ifindex;
2631-
unsigned int len;
26322693
int err = 0;
26332694

26342695
ri->ifindex = 0;
26352696
ri->map = NULL;
26362697
ri->map_owner = 0;
26372698

2638-
if (map) {
2639-
if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
2640-
err = -EFAULT;
2641-
map = NULL;
2642-
goto err;
2643-
}
2644-
fwd = __dev_map_lookup_elem(map, index);
2645-
} else {
2646-
fwd = dev_get_by_index_rcu(dev_net(dev), index);
2699+
if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
2700+
err = -EFAULT;
2701+
map = NULL;
2702+
goto err;
26472703
}
2704+
fwd = __xdp_map_lookup_elem(map, index);
26482705
if (unlikely(!fwd)) {
26492706
err = -EINVAL;
26502707
goto err;
26512708
}
26522709

2653-
if (unlikely(!(fwd->flags & IFF_UP))) {
2654-
err = -ENETDOWN;
2710+
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2711+
if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2712+
goto err;
2713+
skb->dev = fwd;
2714+
} else {
2715+
/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
2716+
err = -EBADRQC;
26552717
goto err;
26562718
}
26572719

2658-
len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
2659-
if (skb->len > len) {
2660-
err = -EMSGSIZE;
2720+
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
2721+
return 0;
2722+
err:
2723+
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
2724+
return err;
2725+
}
2726+
2727+
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
2728+
struct bpf_prog *xdp_prog)
2729+
{
2730+
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2731+
u32 index = ri->ifindex;
2732+
struct net_device *fwd;
2733+
int err = 0;
2734+
2735+
if (ri->map)
2736+
return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
2737+
2738+
ri->ifindex = 0;
2739+
fwd = dev_get_by_index_rcu(dev_net(dev), index);
2740+
if (unlikely(!fwd)) {
2741+
err = -EINVAL;
26612742
goto err;
26622743
}
26632744

2745+
if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2746+
goto err;
2747+
26642748
skb->dev = fwd;
2665-
map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index)
2666-
: _trace_xdp_redirect(dev, xdp_prog, index);
2749+
_trace_xdp_redirect(dev, xdp_prog, index);
26672750
return 0;
26682751
err:
2669-
map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err)
2670-
: _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2752+
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
26712753
return err;
26722754
}
26732755
EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);

0 commit comments

Comments
 (0)