Skip to content

Commit cb86d0f

Browse files
committed
Merge branch 'bpf-per-cpu-cgroup-storage'
Roman Gushchin says: ==================== This patchset implements per-cpu cgroup local storage and provides an example how per-cpu and shared cgroup local storage can be used for efficient accounting of network traffic. v4->v3: 1) incorporated Alexei's feedback v3->v2: 1) incorporated Song's feedback 2) rebased on top of current bpf-next v2->v1: 1) added a selftest implementing network counters 2) added a missing free() in cgroup local storage selftest ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2 parents 5bf7a60 + 371e4fc commit cb86d0f

File tree

20 files changed

+786
-99
lines changed

20 files changed

+786
-99
lines changed

include/linux/bpf-cgroup.h

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#ifndef _BPF_CGROUP_H
33
#define _BPF_CGROUP_H
44

5+
#include <linux/bpf.h>
56
#include <linux/errno.h>
67
#include <linux/jump_label.h>
78
#include <linux/percpu.h>
@@ -22,7 +23,11 @@ struct bpf_cgroup_storage;
2223
extern struct static_key_false cgroup_bpf_enabled_key;
2324
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
2425

25-
DECLARE_PER_CPU(void*, bpf_cgroup_storage);
26+
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
27+
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
28+
29+
#define for_each_cgroup_storage_type(stype) \
30+
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
2631

2732
struct bpf_cgroup_storage_map;
2833

@@ -32,7 +37,10 @@ struct bpf_storage_buffer {
3237
};
3338

3439
struct bpf_cgroup_storage {
35-
struct bpf_storage_buffer *buf;
40+
union {
41+
struct bpf_storage_buffer *buf;
42+
void __percpu *percpu_buf;
43+
};
3644
struct bpf_cgroup_storage_map *map;
3745
struct bpf_cgroup_storage_key key;
3846
struct list_head list;
@@ -43,7 +51,7 @@ struct bpf_cgroup_storage {
4351
struct bpf_prog_list {
4452
struct list_head node;
4553
struct bpf_prog *prog;
46-
struct bpf_cgroup_storage *storage;
54+
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
4755
};
4856

4957
struct bpf_prog_array;
@@ -101,18 +109,26 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
101109
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
102110
short access, enum bpf_attach_type type);
103111

104-
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
112+
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
113+
struct bpf_map *map)
105114
{
106-
struct bpf_storage_buffer *buf;
115+
if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
116+
return BPF_CGROUP_STORAGE_PERCPU;
117+
118+
return BPF_CGROUP_STORAGE_SHARED;
119+
}
107120

108-
if (!storage)
109-
return;
121+
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
122+
*storage[MAX_BPF_CGROUP_STORAGE_TYPE])
123+
{
124+
enum bpf_cgroup_storage_type stype;
110125

111-
buf = READ_ONCE(storage->buf);
112-
this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
126+
for_each_cgroup_storage_type(stype)
127+
this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
113128
}
114129

115-
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
130+
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
131+
enum bpf_cgroup_storage_type stype);
116132
void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
117133
void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
118134
struct cgroup *cgroup,
@@ -121,6 +137,10 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
121137
int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
122138
void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
123139

140+
int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
141+
int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
142+
void *value, u64 flags);
143+
124144
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
125145
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
126146
({ \
@@ -265,15 +285,24 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
265285
return -EINVAL;
266286
}
267287

268-
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
288+
static inline void bpf_cgroup_storage_set(
289+
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
269290
static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
270291
struct bpf_map *map) { return 0; }
271292
static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
272293
struct bpf_map *map) {}
273294
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
274-
struct bpf_prog *prog) { return 0; }
295+
struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
275296
static inline void bpf_cgroup_storage_free(
276297
struct bpf_cgroup_storage *storage) {}
298+
static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
299+
void *value) {
300+
return 0;
301+
}
302+
static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
303+
void *key, void *value, u64 flags) {
304+
return 0;
305+
}
277306

278307
#define cgroup_bpf_enabled (0)
279308
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
@@ -293,6 +322,8 @@ static inline void bpf_cgroup_storage_free(
293322
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
294323
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
295324

325+
#define for_each_cgroup_storage_type(stype) for (; false; )
326+
296327
#endif /* CONFIG_CGROUP_BPF */
297328

298329
#endif /* _BPF_CGROUP_H */

include/linux/bpf.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,14 @@ struct bpf_prog_offload {
272272
u32 jited_len;
273273
};
274274

275+
enum bpf_cgroup_storage_type {
276+
BPF_CGROUP_STORAGE_SHARED,
277+
BPF_CGROUP_STORAGE_PERCPU,
278+
__BPF_CGROUP_STORAGE_MAX
279+
};
280+
281+
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
282+
275283
struct bpf_prog_aux {
276284
atomic_t refcnt;
277285
u32 used_map_cnt;
@@ -289,7 +297,7 @@ struct bpf_prog_aux {
289297
struct bpf_prog *prog;
290298
struct user_struct *user;
291299
u64 load_time; /* ns since boottime */
292-
struct bpf_map *cgroup_storage;
300+
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
293301
char name[BPF_OBJ_NAME_LEN];
294302
#ifdef CONFIG_SECURITY
295303
void *security;
@@ -358,7 +366,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
358366
*/
359367
struct bpf_prog_array_item {
360368
struct bpf_prog *prog;
361-
struct bpf_cgroup_storage *cgroup_storage;
369+
struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
362370
};
363371

364372
struct bpf_prog_array {

include/linux/bpf_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
4343
#endif
4444
#ifdef CONFIG_CGROUP_BPF
4545
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
46+
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
4647
#endif
4748
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
4849
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)

include/uapi/linux/bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ enum bpf_map_type {
127127
BPF_MAP_TYPE_SOCKHASH,
128128
BPF_MAP_TYPE_CGROUP_STORAGE,
129129
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
130+
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
130131
};
131132

132133
enum bpf_prog_type {

kernel/bpf/cgroup.c

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
2525
*/
2626
void cgroup_bpf_put(struct cgroup *cgrp)
2727
{
28+
enum bpf_cgroup_storage_type stype;
2829
unsigned int type;
2930

3031
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -34,8 +35,10 @@ void cgroup_bpf_put(struct cgroup *cgrp)
3435
list_for_each_entry_safe(pl, tmp, progs, node) {
3536
list_del(&pl->node);
3637
bpf_prog_put(pl->prog);
37-
bpf_cgroup_storage_unlink(pl->storage);
38-
bpf_cgroup_storage_free(pl->storage);
38+
for_each_cgroup_storage_type(stype) {
39+
bpf_cgroup_storage_unlink(pl->storage[stype]);
40+
bpf_cgroup_storage_free(pl->storage[stype]);
41+
}
3942
kfree(pl);
4043
static_branch_dec(&cgroup_bpf_enabled_key);
4144
}
@@ -97,6 +100,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
97100
enum bpf_attach_type type,
98101
struct bpf_prog_array __rcu **array)
99102
{
103+
enum bpf_cgroup_storage_type stype;
100104
struct bpf_prog_array *progs;
101105
struct bpf_prog_list *pl;
102106
struct cgroup *p = cgrp;
@@ -125,7 +129,9 @@ static int compute_effective_progs(struct cgroup *cgrp,
125129
continue;
126130

127131
progs->items[cnt].prog = pl->prog;
128-
progs->items[cnt].cgroup_storage = pl->storage;
132+
for_each_cgroup_storage_type(stype)
133+
progs->items[cnt].cgroup_storage[stype] =
134+
pl->storage[stype];
129135
cnt++;
130136
}
131137
} while ((p = cgroup_parent(p)));
@@ -232,7 +238,9 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
232238
{
233239
struct list_head *progs = &cgrp->bpf.progs[type];
234240
struct bpf_prog *old_prog = NULL;
235-
struct bpf_cgroup_storage *storage, *old_storage = NULL;
241+
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
242+
*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
243+
enum bpf_cgroup_storage_type stype;
236244
struct bpf_prog_list *pl;
237245
bool pl_was_allocated;
238246
int err;
@@ -254,47 +262,60 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
254262
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
255263
return -E2BIG;
256264

257-
storage = bpf_cgroup_storage_alloc(prog);
258-
if (IS_ERR(storage))
259-
return -ENOMEM;
265+
for_each_cgroup_storage_type(stype) {
266+
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
267+
if (IS_ERR(storage[stype])) {
268+
storage[stype] = NULL;
269+
for_each_cgroup_storage_type(stype)
270+
bpf_cgroup_storage_free(storage[stype]);
271+
return -ENOMEM;
272+
}
273+
}
260274

261275
if (flags & BPF_F_ALLOW_MULTI) {
262276
list_for_each_entry(pl, progs, node) {
263277
if (pl->prog == prog) {
264278
/* disallow attaching the same prog twice */
265-
bpf_cgroup_storage_free(storage);
279+
for_each_cgroup_storage_type(stype)
280+
bpf_cgroup_storage_free(storage[stype]);
266281
return -EINVAL;
267282
}
268283
}
269284

270285
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
271286
if (!pl) {
272-
bpf_cgroup_storage_free(storage);
287+
for_each_cgroup_storage_type(stype)
288+
bpf_cgroup_storage_free(storage[stype]);
273289
return -ENOMEM;
274290
}
275291

276292
pl_was_allocated = true;
277293
pl->prog = prog;
278-
pl->storage = storage;
294+
for_each_cgroup_storage_type(stype)
295+
pl->storage[stype] = storage[stype];
279296
list_add_tail(&pl->node, progs);
280297
} else {
281298
if (list_empty(progs)) {
282299
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
283300
if (!pl) {
284-
bpf_cgroup_storage_free(storage);
301+
for_each_cgroup_storage_type(stype)
302+
bpf_cgroup_storage_free(storage[stype]);
285303
return -ENOMEM;
286304
}
287305
pl_was_allocated = true;
288306
list_add_tail(&pl->node, progs);
289307
} else {
290308
pl = list_first_entry(progs, typeof(*pl), node);
291309
old_prog = pl->prog;
292-
old_storage = pl->storage;
293-
bpf_cgroup_storage_unlink(old_storage);
310+
for_each_cgroup_storage_type(stype) {
311+
old_storage[stype] = pl->storage[stype];
312+
bpf_cgroup_storage_unlink(old_storage[stype]);
313+
}
294314
pl_was_allocated = false;
295315
}
296316
pl->prog = prog;
297-
pl->storage = storage;
317+
for_each_cgroup_storage_type(stype)
318+
pl->storage[stype] = storage[stype];
298319
}
299320

300321
cgrp->bpf.flags[type] = flags;
@@ -304,21 +325,27 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
304325
goto cleanup;
305326

306327
static_branch_inc(&cgroup_bpf_enabled_key);
307-
if (old_storage)
308-
bpf_cgroup_storage_free(old_storage);
328+
for_each_cgroup_storage_type(stype) {
329+
if (!old_storage[stype])
330+
continue;
331+
bpf_cgroup_storage_free(old_storage[stype]);
332+
}
309333
if (old_prog) {
310334
bpf_prog_put(old_prog);
311335
static_branch_dec(&cgroup_bpf_enabled_key);
312336
}
313-
bpf_cgroup_storage_link(storage, cgrp, type);
337+
for_each_cgroup_storage_type(stype)
338+
bpf_cgroup_storage_link(storage[stype], cgrp, type);
314339
return 0;
315340

316341
cleanup:
317342
/* and cleanup the prog list */
318343
pl->prog = old_prog;
319-
bpf_cgroup_storage_free(pl->storage);
320-
pl->storage = old_storage;
321-
bpf_cgroup_storage_link(old_storage, cgrp, type);
344+
for_each_cgroup_storage_type(stype) {
345+
bpf_cgroup_storage_free(pl->storage[stype]);
346+
pl->storage[stype] = old_storage[stype];
347+
bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
348+
}
322349
if (pl_was_allocated) {
323350
list_del(&pl->node);
324351
kfree(pl);
@@ -339,6 +366,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
339366
enum bpf_attach_type type, u32 unused_flags)
340367
{
341368
struct list_head *progs = &cgrp->bpf.progs[type];
369+
enum bpf_cgroup_storage_type stype;
342370
u32 flags = cgrp->bpf.flags[type];
343371
struct bpf_prog *old_prog = NULL;
344372
struct bpf_prog_list *pl;
@@ -385,8 +413,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
385413

386414
/* now can actually delete it from this cgroup list */
387415
list_del(&pl->node);
388-
bpf_cgroup_storage_unlink(pl->storage);
389-
bpf_cgroup_storage_free(pl->storage);
416+
for_each_cgroup_storage_type(stype) {
417+
bpf_cgroup_storage_unlink(pl->storage[stype]);
418+
bpf_cgroup_storage_free(pl->storage[stype]);
419+
}
390420
kfree(pl);
391421
if (list_empty(progs))
392422
/* last program was detached, reset flags to zero */

kernel/bpf/helpers.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -194,16 +194,28 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
194194
.ret_type = RET_INTEGER,
195195
};
196196

197-
DECLARE_PER_CPU(void*, bpf_cgroup_storage);
197+
#ifdef CONFIG_CGROUP_BPF
198+
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
199+
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
198200

199201
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
200202
{
201-
/* map and flags arguments are not used now,
202-
* but provide an ability to extend the API
203-
* for other types of local storages.
204-
* verifier checks that their values are correct.
203+
/* flags argument is not used now,
204+
* but provides an ability to extend the API.
205+
* verifier checks that its value is correct.
205206
*/
206-
return (unsigned long) this_cpu_read(bpf_cgroup_storage);
207+
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
208+
struct bpf_cgroup_storage *storage;
209+
void *ptr;
210+
211+
storage = this_cpu_read(bpf_cgroup_storage[stype]);
212+
213+
if (stype == BPF_CGROUP_STORAGE_SHARED)
214+
ptr = &READ_ONCE(storage->buf)->data[0];
215+
else
216+
ptr = this_cpu_ptr(storage->percpu_buf);
217+
218+
return (unsigned long)ptr;
207219
}
208220

209221
const struct bpf_func_proto bpf_get_local_storage_proto = {
@@ -214,3 +226,4 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
214226
.arg2_type = ARG_ANYTHING,
215227
};
216228
#endif
229+
#endif

0 commit comments

Comments
 (0)