Skip to content

Commit bae77c5

Browse files
liu-song-6borkmann
authored andcommitted
bpf: enable stackmap with build_id in nmi context
Currently, we cannot parse build_id in nmi context because of up_read(&current->mm->mmap_sem), this makes stackmap with build_id less useful. This patch enables parsing build_id in nmi by putting the up_read() call in irq_work. To avoid memory allocation in nmi context, we use per cpu variable for the irq_work. As a result, only one irq_work per cpu is allowed. If the irq_work is in-use, we fallback to only report ips. Cc: Alexei Starovoitov <ast@kernel.org> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
1 parent a84880e commit bae77c5

File tree

2 files changed

+54
-6
lines changed

2 files changed

+54
-6
lines changed

init/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,6 +1391,7 @@ config BPF_SYSCALL
13911391
bool "Enable bpf() system call"
13921392
select ANON_INODES
13931393
select BPF
1394+
select IRQ_WORK
13941395
default n
13951396
help
13961397
Enable the bpf() system call that allows to manipulate eBPF

kernel/bpf/stackmap.c

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/perf_event.h>
1212
#include <linux/elf.h>
1313
#include <linux/pagemap.h>
14+
#include <linux/irq_work.h>
1415
#include "percpu_freelist.h"
1516

1617
#define STACK_CREATE_FLAG_MASK \
@@ -32,6 +33,23 @@ struct bpf_stack_map {
3233
struct stack_map_bucket *buckets[];
3334
};
3435

36+
/* irq_work to run up_read() for build_id lookup in nmi context */
37+
struct stack_map_irq_work {
38+
struct irq_work irq_work;
39+
struct rw_semaphore *sem;
40+
};
41+
42+
static void do_up_read(struct irq_work *entry)
43+
{
44+
struct stack_map_irq_work *work;
45+
46+
work = container_of(entry, struct stack_map_irq_work, irq_work);
47+
up_read(work->sem);
48+
work->sem = NULL;
49+
}
50+
51+
static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
52+
3553
static inline bool stack_map_use_build_id(struct bpf_map *map)
3654
{
3755
return (map->map_flags & BPF_F_STACK_BUILD_ID);
@@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
267285
{
268286
int i;
269287
struct vm_area_struct *vma;
288+
bool in_nmi_ctx = in_nmi();
289+
bool irq_work_busy = false;
290+
struct stack_map_irq_work *work;
291+
292+
if (in_nmi_ctx) {
293+
work = this_cpu_ptr(&up_read_work);
294+
if (work->irq_work.flags & IRQ_WORK_BUSY)
295+
/* cannot queue more up_read, fallback */
296+
irq_work_busy = true;
297+
}
270298

271299
/*
272-
* We cannot do up_read() in nmi context, so build_id lookup is
273-
* only supported for non-nmi events. If at some point, it is
274-
* possible to run find_vma() without taking the semaphore, we
275-
* would like to allow build_id lookup in nmi context.
300+
* We cannot do up_read() in nmi context. To do build_id lookup
301+
* in nmi context, we need to run up_read() in irq_work. We use
302+
* a percpu variable to do the irq_work. If the irq_work is
303+
* already used by another lookup, we fall back to report ips.
276304
*
277305
* Same fallback is used for kernel stack (!user) on a stackmap
278306
* with build_id.
279307
*/
280-
if (!user || !current || !current->mm || in_nmi() ||
308+
if (!user || !current || !current->mm || irq_work_busy ||
281309
down_read_trylock(&current->mm->mmap_sem) == 0) {
282310
/* cannot access current->mm, fall back to ips */
283311
for (i = 0; i < trace_nr; i++) {
@@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
299327
- vma->vm_start;
300328
id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
301329
}
302-
up_read(&current->mm->mmap_sem);
330+
331+
if (!in_nmi_ctx) {
332+
up_read(&current->mm->mmap_sem);
333+
} else {
334+
work->sem = &current->mm->mmap_sem;
335+
irq_work_queue(&work->irq_work);
336+
}
303337
}
304338

305339
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
@@ -575,3 +609,16 @@ const struct bpf_map_ops stack_map_ops = {
575609
.map_update_elem = stack_map_update_elem,
576610
.map_delete_elem = stack_map_delete_elem,
577611
};
612+
613+
static int __init stack_map_init(void)
614+
{
615+
int cpu;
616+
struct stack_map_irq_work *work;
617+
618+
for_each_possible_cpu(cpu) {
619+
work = per_cpu_ptr(&up_read_work, cpu);
620+
init_irq_work(&work->irq_work, do_up_read);
621+
}
622+
return 0;
623+
}
624+
subsys_initcall(stack_map_init);

0 commit comments

Comments
 (0)