Skip to content

Commit c6e5b73

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
perf: Synchronously clean up child events
The orphan cleanup workqueue doesn't always catch orphans, for example, if they never schedule after they are orphaned. IOW, the event leak is still very real. It also wouldn't work for kernel counters. Doing it synchonously is a little hairy due to lock inversion issues, but is made to work. Patch based on work by Alexander Shishkin. Suggested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: vince@deater.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 60beda8 commit c6e5b73

File tree

2 files changed

+84
-93
lines changed

2 files changed

+84
-93
lines changed

include/linux/perf_event.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -634,9 +634,6 @@ struct perf_event_context {
634634
int nr_cgroups; /* cgroup evts */
635635
void *task_ctx_data; /* pmu specific data */
636636
struct rcu_head rcu_head;
637-
638-
struct delayed_work orphans_remove;
639-
bool orphans_remove_sched;
640637
};
641638

642639
/*

kernel/events/core.c

Lines changed: 84 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@
4949

5050
#include <asm/irq_regs.h>
5151

52-
static struct workqueue_struct *perf_wq;
53-
5452
typedef int (*remote_function_f)(void *);
5553

5654
struct remote_function_call {
@@ -1645,45 +1643,11 @@ static void perf_group_detach(struct perf_event *event)
16451643
perf_event__header_size(tmp);
16461644
}
16471645

1648-
/*
1649-
* User event without the task.
1650-
*/
16511646
static bool is_orphaned_event(struct perf_event *event)
16521647
{
1653-
return event && event->state == PERF_EVENT_STATE_EXIT;
1654-
}
1655-
1656-
/*
1657-
* Event has a parent but parent's task finished and it's
1658-
* alive only because of children holding refference.
1659-
*/
1660-
static bool is_orphaned_child(struct perf_event *event)
1661-
{
1662-
return is_orphaned_event(event->parent);
1663-
}
1664-
1665-
static void orphans_remove_work(struct work_struct *work);
1666-
1667-
static void schedule_orphans_remove(struct perf_event_context *ctx)
1668-
{
1669-
if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
1670-
return;
1671-
1672-
if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
1673-
get_ctx(ctx);
1674-
ctx->orphans_remove_sched = true;
1675-
}
1676-
}
1677-
1678-
static int __init perf_workqueue_init(void)
1679-
{
1680-
perf_wq = create_singlethread_workqueue("perf");
1681-
WARN(!perf_wq, "failed to create perf workqueue\n");
1682-
return perf_wq ? 0 : -1;
1648+
return event->state == PERF_EVENT_STATE_EXIT;
16831649
}
16841650

1685-
core_initcall(perf_workqueue_init);
1686-
16871651
static inline int pmu_filter_match(struct perf_event *event)
16881652
{
16891653
struct pmu *pmu = event->pmu;
@@ -1744,9 +1708,6 @@ event_sched_out(struct perf_event *event,
17441708
if (event->attr.exclusive || !cpuctx->active_oncpu)
17451709
cpuctx->exclusive = 0;
17461710

1747-
if (is_orphaned_child(event))
1748-
schedule_orphans_remove(ctx);
1749-
17501711
perf_pmu_enable(event->pmu);
17511712
}
17521713

@@ -1984,9 +1945,6 @@ event_sched_in(struct perf_event *event,
19841945
if (event->attr.exclusive)
19851946
cpuctx->exclusive = 1;
19861947

1987-
if (is_orphaned_child(event))
1988-
schedule_orphans_remove(ctx);
1989-
19901948
out:
19911949
perf_pmu_enable(event->pmu);
19921950

@@ -3369,7 +3327,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
33693327
INIT_LIST_HEAD(&ctx->flexible_groups);
33703328
INIT_LIST_HEAD(&ctx->event_list);
33713329
atomic_set(&ctx->refcount, 1);
3372-
INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
33733330
}
33743331

33753332
static struct perf_event_context *
@@ -3782,11 +3739,22 @@ static void perf_remove_from_owner(struct perf_event *event)
37823739

37833740
static void put_event(struct perf_event *event)
37843741
{
3785-
struct perf_event_context *ctx;
3786-
37873742
if (!atomic_long_dec_and_test(&event->refcount))
37883743
return;
37893744

3745+
_free_event(event);
3746+
}
3747+
3748+
/*
3749+
* Kill an event dead; while event:refcount will preserve the event
3750+
* object, it will not preserve its functionality. Once the last 'user'
3751+
* gives up the object, we'll destroy the thing.
3752+
*/
3753+
int perf_event_release_kernel(struct perf_event *event)
3754+
{
3755+
struct perf_event_context *ctx;
3756+
struct perf_event *child, *tmp;
3757+
37903758
if (!is_kernel_event(event))
37913759
perf_remove_from_owner(event);
37923760

@@ -3811,14 +3779,70 @@ static void put_event(struct perf_event *event)
38113779
* At this point we must have event->state == PERF_EVENT_STATE_EXIT,
38123780
* either from the above perf_remove_from_context() or through
38133781
* perf_event_exit_event().
3782+
*
3783+
* Therefore, anybody acquiring event->child_mutex after the below
3784+
* loop _must_ also see this, most importantly inherit_event() which
3785+
* will avoid placing more children on the list.
3786+
*
3787+
* Thus this guarantees that we will in fact observe and kill _ALL_
3788+
* child events.
38143789
*/
38153790
WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
38163791

3817-
_free_event(event);
3818-
}
3792+
again:
3793+
mutex_lock(&event->child_mutex);
3794+
list_for_each_entry(child, &event->child_list, child_list) {
38193795

3820-
int perf_event_release_kernel(struct perf_event *event)
3821-
{
3796+
/*
3797+
* Cannot change, child events are not migrated, see the
3798+
* comment with perf_event_ctx_lock_nested().
3799+
*/
3800+
ctx = lockless_dereference(child->ctx);
3801+
/*
3802+
* Since child_mutex nests inside ctx::mutex, we must jump
3803+
* through hoops. We start by grabbing a reference on the ctx.
3804+
*
3805+
* Since the event cannot get freed while we hold the
3806+
* child_mutex, the context must also exist and have a !0
3807+
* reference count.
3808+
*/
3809+
get_ctx(ctx);
3810+
3811+
/*
3812+
* Now that we have a ctx ref, we can drop child_mutex, and
3813+
* acquire ctx::mutex without fear of it going away. Then we
3814+
* can re-acquire child_mutex.
3815+
*/
3816+
mutex_unlock(&event->child_mutex);
3817+
mutex_lock(&ctx->mutex);
3818+
mutex_lock(&event->child_mutex);
3819+
3820+
/*
3821+
* Now that we hold ctx::mutex and child_mutex, revalidate our
3822+
* state, if child is still the first entry, it didn't get freed
3823+
* and we can continue doing so.
3824+
*/
3825+
tmp = list_first_entry_or_null(&event->child_list,
3826+
struct perf_event, child_list);
3827+
if (tmp == child) {
3828+
perf_remove_from_context(child, DETACH_GROUP);
3829+
list_del(&child->child_list);
3830+
free_event(child);
3831+
/*
3832+
* This matches the refcount bump in inherit_event();
3833+
* this can't be the last reference.
3834+
*/
3835+
put_event(event);
3836+
}
3837+
3838+
mutex_unlock(&event->child_mutex);
3839+
mutex_unlock(&ctx->mutex);
3840+
put_ctx(ctx);
3841+
goto again;
3842+
}
3843+
mutex_unlock(&event->child_mutex);
3844+
3845+
/* Must be the last reference */
38223846
put_event(event);
38233847
return 0;
38243848
}
@@ -3829,46 +3853,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
38293853
*/
38303854
static int perf_release(struct inode *inode, struct file *file)
38313855
{
3832-
put_event(file->private_data);
3856+
perf_event_release_kernel(file->private_data);
38333857
return 0;
38343858
}
38353859

3836-
/*
3837-
* Remove all orphanes events from the context.
3838-
*/
3839-
static void orphans_remove_work(struct work_struct *work)
3840-
{
3841-
struct perf_event_context *ctx;
3842-
struct perf_event *event, *tmp;
3843-
3844-
ctx = container_of(work, struct perf_event_context,
3845-
orphans_remove.work);
3846-
3847-
mutex_lock(&ctx->mutex);
3848-
list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
3849-
struct perf_event *parent_event = event->parent;
3850-
3851-
if (!is_orphaned_child(event))
3852-
continue;
3853-
3854-
perf_remove_from_context(event, DETACH_GROUP);
3855-
3856-
mutex_lock(&parent_event->child_mutex);
3857-
list_del_init(&event->child_list);
3858-
mutex_unlock(&parent_event->child_mutex);
3859-
3860-
free_event(event);
3861-
put_event(parent_event);
3862-
}
3863-
3864-
raw_spin_lock_irq(&ctx->lock);
3865-
ctx->orphans_remove_sched = false;
3866-
raw_spin_unlock_irq(&ctx->lock);
3867-
mutex_unlock(&ctx->mutex);
3868-
3869-
put_ctx(ctx);
3870-
}
3871-
38723860
u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
38733861
{
38743862
struct perf_event *child;
@@ -8719,7 +8707,7 @@ perf_event_exit_event(struct perf_event *child_event,
87198707
if (parent_event)
87208708
perf_group_detach(child_event);
87218709
list_del_event(child_event, child_ctx);
8722-
child_event->state = PERF_EVENT_STATE_EXIT;
8710+
child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
87238711
raw_spin_unlock_irq(&child_ctx->lock);
87248712

87258713
/*
@@ -8977,8 +8965,16 @@ inherit_event(struct perf_event *parent_event,
89778965
if (IS_ERR(child_event))
89788966
return child_event;
89798967

8968+
/*
8969+
* is_orphaned_event() and list_add_tail(&parent_event->child_list)
8970+
* must be under the same lock in order to serialize against
8971+
* perf_event_release_kernel(), such that either we must observe
8972+
* is_orphaned_event() or they will observe us on the child_list.
8973+
*/
8974+
mutex_lock(&parent_event->child_mutex);
89808975
if (is_orphaned_event(parent_event) ||
89818976
!atomic_long_inc_not_zero(&parent_event->refcount)) {
8977+
mutex_unlock(&parent_event->child_mutex);
89828978
free_event(child_event);
89838979
return NULL;
89848980
}
@@ -9026,8 +9022,6 @@ inherit_event(struct perf_event *parent_event,
90269022
/*
90279023
* Link this into the parent event's child list
90289024
*/
9029-
WARN_ON_ONCE(parent_event->ctx->parent_ctx);
9030-
mutex_lock(&parent_event->child_mutex);
90319025
list_add_tail(&child_event->child_list, &parent_event->child_list);
90329026
mutex_unlock(&parent_event->child_mutex);
90339027

0 commit comments

Comments
 (0)