@@ -2342,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
2342
2342
2343
2343
__sched_fork (clone_flags , p );
2344
2344
/*
2345
- * We mark the process as running here. This guarantees that
2345
+ * We mark the process as NEW here. This guarantees that
2346
2346
* nobody will actually run it, and a signal or other external
2347
2347
* event cannot wake it up and insert it on the runqueue either.
2348
2348
*/
2349
- p -> state = TASK_RUNNING ;
2349
+ p -> state = TASK_NEW ;
2350
2350
2351
2351
/*
2352
2352
* Make sure we do not leak PI boosting priority to the child.
@@ -2383,8 +2383,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
2383
2383
p -> sched_class = & fair_sched_class ;
2384
2384
}
2385
2385
2386
- if (p -> sched_class -> task_fork )
2387
- p -> sched_class -> task_fork (p );
2386
+ init_entity_runnable_average (& p -> se );
2388
2387
2389
2388
/*
2390
2389
* The child is not yet in the pid-hash so no cgroup attach races,
@@ -2394,7 +2393,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
2394
2393
* Silence PROVE_RCU.
2395
2394
*/
2396
2395
raw_spin_lock_irqsave (& p -> pi_lock , flags );
2397
- set_task_cpu (p , cpu );
2396
+ /*
2397
+ * We're setting the cpu for the first time, we don't migrate,
2398
+ * so use __set_task_cpu().
2399
+ */
2400
+ __set_task_cpu (p , cpu );
2401
+ if (p -> sched_class -> task_fork )
2402
+ p -> sched_class -> task_fork (p );
2398
2403
raw_spin_unlock_irqrestore (& p -> pi_lock , flags );
2399
2404
2400
2405
#ifdef CONFIG_SCHED_INFO
@@ -2526,16 +2531,18 @@ void wake_up_new_task(struct task_struct *p)
2526
2531
struct rq_flags rf ;
2527
2532
struct rq * rq ;
2528
2533
2529
- /* Initialize new task's runnable average */
2530
- init_entity_runnable_average (& p -> se );
2531
2534
raw_spin_lock_irqsave (& p -> pi_lock , rf .flags );
2535
+ p -> state = TASK_RUNNING ;
2532
2536
#ifdef CONFIG_SMP
2533
2537
/*
2534
2538
* Fork balancing, do it here and not earlier because:
2535
2539
* - cpus_allowed can change in the fork path
2536
2540
* - any previously selected cpu might disappear through hotplug
2541
+ *
2542
+ * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
2543
+ * as we're not fully set-up yet.
2537
2544
*/
2538
- set_task_cpu (p , select_task_rq (p , task_cpu (p ), SD_BALANCE_FORK , 0 ));
2545
+ __set_task_cpu (p , select_task_rq (p , task_cpu (p ), SD_BALANCE_FORK , 0 ));
2539
2546
#endif
2540
2547
rq = __task_rq_lock (p , & rf );
2541
2548
post_init_entity_util_avg (& p -> se );
@@ -3161,6 +3168,9 @@ static noinline void __schedule_bug(struct task_struct *prev)
3161
3168
pr_cont ("\n" );
3162
3169
}
3163
3170
#endif
3171
+ if (panic_on_warn )
3172
+ panic ("scheduling while atomic\n" );
3173
+
3164
3174
dump_stack ();
3165
3175
add_taint (TAINT_WARN , LOCKDEP_STILL_OK );
3166
3176
}
@@ -4752,7 +4762,8 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
4752
4762
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
4753
4763
* @user_mask_ptr: user-space pointer to hold the current cpu mask
4754
4764
*
4755
- * Return: 0 on success. An error code otherwise.
4765
+ * Return: size of CPU mask copied to user_mask_ptr on success. An
4766
+ * error code otherwise.
4756
4767
*/
4757
4768
SYSCALL_DEFINE3 (sched_getaffinity , pid_t , pid , unsigned int , len ,
4758
4769
unsigned long __user * , user_mask_ptr )
@@ -7231,7 +7242,6 @@ static void sched_rq_cpu_starting(unsigned int cpu)
7231
7242
struct rq * rq = cpu_rq (cpu );
7232
7243
7233
7244
rq -> calc_load_update = calc_load_update ;
7234
- account_reset_rq (rq );
7235
7245
update_max_interval ();
7236
7246
}
7237
7247
@@ -7711,6 +7721,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
7711
7721
INIT_LIST_HEAD (& tg -> children );
7712
7722
list_add_rcu (& tg -> siblings , & parent -> children );
7713
7723
spin_unlock_irqrestore (& task_group_lock , flags );
7724
+
7725
+ online_fair_sched_group (tg );
7714
7726
}
7715
7727
7716
7728
/* rcu callback to free various structures associated with a task group */
@@ -7739,27 +7751,9 @@ void sched_offline_group(struct task_group *tg)
7739
7751
spin_unlock_irqrestore (& task_group_lock , flags );
7740
7752
}
7741
7753
7742
- /* change task's runqueue when it moves between groups.
7743
- * The caller of this function should have put the task in its new group
7744
- * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
7745
- * reflect its new group.
7746
- */
7747
- void sched_move_task (struct task_struct * tsk )
7754
+ static void sched_change_group (struct task_struct * tsk , int type )
7748
7755
{
7749
7756
struct task_group * tg ;
7750
- int queued , running ;
7751
- struct rq_flags rf ;
7752
- struct rq * rq ;
7753
-
7754
- rq = task_rq_lock (tsk , & rf );
7755
-
7756
- running = task_current (rq , tsk );
7757
- queued = task_on_rq_queued (tsk );
7758
-
7759
- if (queued )
7760
- dequeue_task (rq , tsk , DEQUEUE_SAVE | DEQUEUE_MOVE );
7761
- if (unlikely (running ))
7762
- put_prev_task (rq , tsk );
7763
7757
7764
7758
/*
7765
7759
* All callers are synchronized by task_rq_lock(); we do not use RCU
@@ -7772,11 +7766,37 @@ void sched_move_task(struct task_struct *tsk)
7772
7766
tsk -> sched_task_group = tg ;
7773
7767
7774
7768
#ifdef CONFIG_FAIR_GROUP_SCHED
7775
- if (tsk -> sched_class -> task_move_group )
7776
- tsk -> sched_class -> task_move_group (tsk );
7769
+ if (tsk -> sched_class -> task_change_group )
7770
+ tsk -> sched_class -> task_change_group (tsk , type );
7777
7771
else
7778
7772
#endif
7779
7773
set_task_rq (tsk , task_cpu (tsk ));
7774
+ }
7775
+
7776
+ /*
7777
+ * Change task's runqueue when it moves between groups.
7778
+ *
7779
+ * The caller of this function should have put the task in its new group by
7780
+ * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
7781
+ * its new group.
7782
+ */
7783
+ void sched_move_task (struct task_struct * tsk )
7784
+ {
7785
+ int queued , running ;
7786
+ struct rq_flags rf ;
7787
+ struct rq * rq ;
7788
+
7789
+ rq = task_rq_lock (tsk , & rf );
7790
+
7791
+ running = task_current (rq , tsk );
7792
+ queued = task_on_rq_queued (tsk );
7793
+
7794
+ if (queued )
7795
+ dequeue_task (rq , tsk , DEQUEUE_SAVE | DEQUEUE_MOVE );
7796
+ if (unlikely (running ))
7797
+ put_prev_task (rq , tsk );
7798
+
7799
+ sched_change_group (tsk , TASK_MOVE_GROUP );
7780
7800
7781
7801
if (unlikely (running ))
7782
7802
tsk -> sched_class -> set_curr_task (rq );
@@ -8204,15 +8224,27 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
8204
8224
sched_free_group (tg );
8205
8225
}
8206
8226
8227
+ /*
8228
+ * This is called before wake_up_new_task(), therefore we really only
8229
+ * have to set its group bits, all the other stuff does not apply.
8230
+ */
8207
8231
static void cpu_cgroup_fork (struct task_struct * task )
8208
8232
{
8209
- sched_move_task (task );
8233
+ struct rq_flags rf ;
8234
+ struct rq * rq ;
8235
+
8236
+ rq = task_rq_lock (task , & rf );
8237
+
8238
+ sched_change_group (task , TASK_SET_GROUP );
8239
+
8240
+ task_rq_unlock (rq , task , & rf );
8210
8241
}
8211
8242
8212
8243
static int cpu_cgroup_can_attach (struct cgroup_taskset * tset )
8213
8244
{
8214
8245
struct task_struct * task ;
8215
8246
struct cgroup_subsys_state * css ;
8247
+ int ret = 0 ;
8216
8248
8217
8249
cgroup_taskset_for_each (task , css , tset ) {
8218
8250
#ifdef CONFIG_RT_GROUP_SCHED
@@ -8223,8 +8255,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
8223
8255
if (task -> sched_class != & fair_sched_class )
8224
8256
return - EINVAL ;
8225
8257
#endif
8258
+ /*
8259
+ * Serialize against wake_up_new_task() such that if its
8260
+ * running, we're sure to observe its full state.
8261
+ */
8262
+ raw_spin_lock_irq (& task -> pi_lock );
8263
+ /*
8264
+ * Avoid calling sched_move_task() before wake_up_new_task()
8265
+ * has happened. This would lead to problems with PELT, due to
8266
+ * move wanting to detach+attach while we're not attached yet.
8267
+ */
8268
+ if (task -> state == TASK_NEW )
8269
+ ret = - EINVAL ;
8270
+ raw_spin_unlock_irq (& task -> pi_lock );
8271
+
8272
+ if (ret )
8273
+ break ;
8226
8274
}
8227
- return 0 ;
8275
+ return ret ;
8228
8276
}
8229
8277
8230
8278
static void cpu_cgroup_attach (struct cgroup_taskset * tset )
0 commit comments