49
49
50
50
#include <asm/irq_regs.h>
51
51
52
- static struct workqueue_struct * perf_wq ;
53
-
54
52
typedef int (* remote_function_f )(void * );
55
53
56
54
struct remote_function_call {
@@ -1645,45 +1643,11 @@ static void perf_group_detach(struct perf_event *event)
1645
1643
perf_event__header_size (tmp );
1646
1644
}
1647
1645
1648
- /*
1649
- * User event without the task.
1650
- */
1651
1646
static bool is_orphaned_event (struct perf_event * event )
1652
1647
{
1653
- return event && event -> state == PERF_EVENT_STATE_EXIT ;
1654
- }
1655
-
1656
- /*
1657
- * Event has a parent but parent's task finished and it's
1658
- * alive only because of children holding refference.
1659
- */
1660
- static bool is_orphaned_child (struct perf_event * event )
1661
- {
1662
- return is_orphaned_event (event -> parent );
1663
- }
1664
-
1665
- static void orphans_remove_work (struct work_struct * work );
1666
-
1667
- static void schedule_orphans_remove (struct perf_event_context * ctx )
1668
- {
1669
- if (!ctx -> task || ctx -> orphans_remove_sched || !perf_wq )
1670
- return ;
1671
-
1672
- if (queue_delayed_work (perf_wq , & ctx -> orphans_remove , 1 )) {
1673
- get_ctx (ctx );
1674
- ctx -> orphans_remove_sched = true;
1675
- }
1676
- }
1677
-
1678
- static int __init perf_workqueue_init (void )
1679
- {
1680
- perf_wq = create_singlethread_workqueue ("perf" );
1681
- WARN (!perf_wq , "failed to create perf workqueue\n" );
1682
- return perf_wq ? 0 : -1 ;
1648
+ return event -> state == PERF_EVENT_STATE_EXIT ;
1683
1649
}
1684
1650
1685
- core_initcall (perf_workqueue_init );
1686
-
1687
1651
static inline int pmu_filter_match (struct perf_event * event )
1688
1652
{
1689
1653
struct pmu * pmu = event -> pmu ;
@@ -1744,9 +1708,6 @@ event_sched_out(struct perf_event *event,
1744
1708
if (event -> attr .exclusive || !cpuctx -> active_oncpu )
1745
1709
cpuctx -> exclusive = 0 ;
1746
1710
1747
- if (is_orphaned_child (event ))
1748
- schedule_orphans_remove (ctx );
1749
-
1750
1711
perf_pmu_enable (event -> pmu );
1751
1712
}
1752
1713
@@ -1984,9 +1945,6 @@ event_sched_in(struct perf_event *event,
1984
1945
if (event -> attr .exclusive )
1985
1946
cpuctx -> exclusive = 1 ;
1986
1947
1987
- if (is_orphaned_child (event ))
1988
- schedule_orphans_remove (ctx );
1989
-
1990
1948
out :
1991
1949
perf_pmu_enable (event -> pmu );
1992
1950
@@ -3369,7 +3327,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
3369
3327
INIT_LIST_HEAD (& ctx -> flexible_groups );
3370
3328
INIT_LIST_HEAD (& ctx -> event_list );
3371
3329
atomic_set (& ctx -> refcount , 1 );
3372
- INIT_DELAYED_WORK (& ctx -> orphans_remove , orphans_remove_work );
3373
3330
}
3374
3331
3375
3332
static struct perf_event_context *
@@ -3782,11 +3739,22 @@ static void perf_remove_from_owner(struct perf_event *event)
3782
3739
3783
3740
static void put_event (struct perf_event * event )
3784
3741
{
3785
- struct perf_event_context * ctx ;
3786
-
3787
3742
if (!atomic_long_dec_and_test (& event -> refcount ))
3788
3743
return ;
3789
3744
3745
+ _free_event (event );
3746
+ }
3747
+
3748
+ /*
3749
+ * Kill an event dead; while event:refcount will preserve the event
3750
+ * object, it will not preserve its functionality. Once the last 'user'
3751
+ * gives up the object, we'll destroy the thing.
3752
+ */
3753
+ int perf_event_release_kernel (struct perf_event * event )
3754
+ {
3755
+ struct perf_event_context * ctx ;
3756
+ struct perf_event * child , * tmp ;
3757
+
3790
3758
if (!is_kernel_event (event ))
3791
3759
perf_remove_from_owner (event );
3792
3760
@@ -3811,14 +3779,70 @@ static void put_event(struct perf_event *event)
3811
3779
* At this point we must have event->state == PERF_EVENT_STATE_EXIT,
3812
3780
* either from the above perf_remove_from_context() or through
3813
3781
* perf_event_exit_event().
3782
+ *
3783
+ * Therefore, anybody acquiring event->child_mutex after the below
3784
+ * loop _must_ also see this, most importantly inherit_event() which
3785
+ * will avoid placing more children on the list.
3786
+ *
3787
+ * Thus this guarantees that we will in fact observe and kill _ALL_
3788
+ * child events.
3814
3789
*/
3815
3790
WARN_ON_ONCE (event -> state != PERF_EVENT_STATE_EXIT );
3816
3791
3817
- _free_event (event );
3818
- }
3792
+ again :
3793
+ mutex_lock (& event -> child_mutex );
3794
+ list_for_each_entry (child , & event -> child_list , child_list ) {
3819
3795
3820
- int perf_event_release_kernel (struct perf_event * event )
3821
- {
3796
+ /*
3797
+ * Cannot change, child events are not migrated, see the
3798
+ * comment with perf_event_ctx_lock_nested().
3799
+ */
3800
+ ctx = lockless_dereference (child -> ctx );
3801
+ /*
3802
+ * Since child_mutex nests inside ctx::mutex, we must jump
3803
+ * through hoops. We start by grabbing a reference on the ctx.
3804
+ *
3805
+ * Since the event cannot get freed while we hold the
3806
+ * child_mutex, the context must also exist and have a !0
3807
+ * reference count.
3808
+ */
3809
+ get_ctx (ctx );
3810
+
3811
+ /*
3812
+ * Now that we have a ctx ref, we can drop child_mutex, and
3813
+ * acquire ctx::mutex without fear of it going away. Then we
3814
+ * can re-acquire child_mutex.
3815
+ */
3816
+ mutex_unlock (& event -> child_mutex );
3817
+ mutex_lock (& ctx -> mutex );
3818
+ mutex_lock (& event -> child_mutex );
3819
+
3820
+ /*
3821
+ * Now that we hold ctx::mutex and child_mutex, revalidate our
3822
+ * state, if child is still the first entry, it didn't get freed
3823
+ * and we can continue doing so.
3824
+ */
3825
+ tmp = list_first_entry_or_null (& event -> child_list ,
3826
+ struct perf_event , child_list );
3827
+ if (tmp == child ) {
3828
+ perf_remove_from_context (child , DETACH_GROUP );
3829
+ list_del (& child -> child_list );
3830
+ free_event (child );
3831
+ /*
3832
+ * This matches the refcount bump in inherit_event();
3833
+ * this can't be the last reference.
3834
+ */
3835
+ put_event (event );
3836
+ }
3837
+
3838
+ mutex_unlock (& event -> child_mutex );
3839
+ mutex_unlock (& ctx -> mutex );
3840
+ put_ctx (ctx );
3841
+ goto again ;
3842
+ }
3843
+ mutex_unlock (& event -> child_mutex );
3844
+
3845
+ /* Must be the last reference */
3822
3846
put_event (event );
3823
3847
return 0 ;
3824
3848
}
@@ -3829,46 +3853,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
3829
3853
*/
3830
3854
static int perf_release (struct inode * inode , struct file * file )
3831
3855
{
3832
- put_event (file -> private_data );
3856
+ perf_event_release_kernel (file -> private_data );
3833
3857
return 0 ;
3834
3858
}
3835
3859
3836
- /*
3837
- * Remove all orphanes events from the context.
3838
- */
3839
- static void orphans_remove_work (struct work_struct * work )
3840
- {
3841
- struct perf_event_context * ctx ;
3842
- struct perf_event * event , * tmp ;
3843
-
3844
- ctx = container_of (work , struct perf_event_context ,
3845
- orphans_remove .work );
3846
-
3847
- mutex_lock (& ctx -> mutex );
3848
- list_for_each_entry_safe (event , tmp , & ctx -> event_list , event_entry ) {
3849
- struct perf_event * parent_event = event -> parent ;
3850
-
3851
- if (!is_orphaned_child (event ))
3852
- continue ;
3853
-
3854
- perf_remove_from_context (event , DETACH_GROUP );
3855
-
3856
- mutex_lock (& parent_event -> child_mutex );
3857
- list_del_init (& event -> child_list );
3858
- mutex_unlock (& parent_event -> child_mutex );
3859
-
3860
- free_event (event );
3861
- put_event (parent_event );
3862
- }
3863
-
3864
- raw_spin_lock_irq (& ctx -> lock );
3865
- ctx -> orphans_remove_sched = false;
3866
- raw_spin_unlock_irq (& ctx -> lock );
3867
- mutex_unlock (& ctx -> mutex );
3868
-
3869
- put_ctx (ctx );
3870
- }
3871
-
3872
3860
u64 perf_event_read_value (struct perf_event * event , u64 * enabled , u64 * running )
3873
3861
{
3874
3862
struct perf_event * child ;
@@ -8719,7 +8707,7 @@ perf_event_exit_event(struct perf_event *child_event,
8719
8707
if (parent_event )
8720
8708
perf_group_detach (child_event );
8721
8709
list_del_event (child_event , child_ctx );
8722
- child_event -> state = PERF_EVENT_STATE_EXIT ;
8710
+ child_event -> state = PERF_EVENT_STATE_EXIT ; /* see perf_event_release_kernel() */
8723
8711
raw_spin_unlock_irq (& child_ctx -> lock );
8724
8712
8725
8713
/*
@@ -8977,8 +8965,16 @@ inherit_event(struct perf_event *parent_event,
8977
8965
if (IS_ERR (child_event ))
8978
8966
return child_event ;
8979
8967
8968
+ /*
8969
+ * is_orphaned_event() and list_add_tail(&parent_event->child_list)
8970
+ * must be under the same lock in order to serialize against
8971
+ * perf_event_release_kernel(), such that either we must observe
8972
+ * is_orphaned_event() or they will observe us on the child_list.
8973
+ */
8974
+ mutex_lock (& parent_event -> child_mutex );
8980
8975
if (is_orphaned_event (parent_event ) ||
8981
8976
!atomic_long_inc_not_zero (& parent_event -> refcount )) {
8977
+ mutex_unlock (& parent_event -> child_mutex );
8982
8978
free_event (child_event );
8983
8979
return NULL ;
8984
8980
}
@@ -9026,8 +9022,6 @@ inherit_event(struct perf_event *parent_event,
9026
9022
/*
9027
9023
* Link this into the parent event's child list
9028
9024
*/
9029
- WARN_ON_ONCE (parent_event -> ctx -> parent_ctx );
9030
- mutex_lock (& parent_event -> child_mutex );
9031
9025
list_add_tail (& child_event -> child_list , & parent_event -> child_list );
9032
9026
mutex_unlock (& parent_event -> child_mutex );
9033
9027
0 commit comments