diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 128 |
1 files changed, 89 insertions, 39 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 51d7105f529a..5c883fe8e440 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1937,7 +1937,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * chain to provide order. Instead we do: * * 1) smp_store_release(X->on_cpu, 0) - * 2) smp_cond_acquire(!X->on_cpu) + * 2) smp_cond_load_acquire(!X->on_cpu) * * Example: * @@ -1948,7 +1948,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * sched-out X * smp_store_release(X->on_cpu, 0); * - * smp_cond_acquire(!X->on_cpu); + * smp_cond_load_acquire(&X->on_cpu, !VAL); * X->state = WAKING * set_task_cpu(X,2) * @@ -1974,7 +1974,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * This means that any means of doing remote wakeups must order the CPU doing * the wakeup against the CPU the task is going to end up running on. This, * however, is already required for the regular Program-Order guarantee above, - * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire). + * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire). * */ @@ -2047,7 +2047,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * This ensures that tasks getting woken will be fully ordered against * their previous state and preserve Program Order. */ - smp_cond_acquire(!p->on_cpu); + smp_cond_load_acquire(&p->on_cpu, !VAL); p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; @@ -2342,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) __sched_fork(clone_flags, p); /* - * We mark the process as running here. This guarantees that + * We mark the process as NEW here. This guarantees that * nobody will actually run it, and a signal or other external * event cannot wake it up and insert it on the runqueue either. */ - p->state = TASK_RUNNING; + p->state = TASK_NEW; /* * Make sure we do not leak PI boosting priority to the child. @@ -2383,8 +2383,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->sched_class = &fair_sched_class; } - if (p->sched_class->task_fork) - p->sched_class->task_fork(p); + init_entity_runnable_average(&p->se); /* * The child is not yet in the pid-hash so no cgroup attach races, @@ -2394,7 +2393,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) * Silence PROVE_RCU. */ raw_spin_lock_irqsave(&p->pi_lock, flags); - set_task_cpu(p, cpu); + /* + * We're setting the cpu for the first time, we don't migrate, + * so use __set_task_cpu(). + */ + __set_task_cpu(p, cpu); + if (p->sched_class->task_fork) + p->sched_class->task_fork(p); raw_spin_unlock_irqrestore(&p->pi_lock, flags); #ifdef CONFIG_SCHED_INFO @@ -2526,16 +2531,18 @@ void wake_up_new_task(struct task_struct *p) struct rq_flags rf; struct rq *rq; - /* Initialize new task's runnable average */ - init_entity_runnable_average(&p->se); raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + p->state = TASK_RUNNING; #ifdef CONFIG_SMP /* * Fork balancing, do it here and not earlier because: * - cpus_allowed can change in the fork path * - any previously selected cpu might disappear through hotplug + * + * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, + * as we're not fully set-up yet. */ - set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); + __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); #endif rq = __task_rq_lock(p, &rf); post_init_entity_util_avg(&p->se); @@ -3161,6 +3168,9 @@ static noinline void __schedule_bug(struct task_struct *prev) pr_cont("\n"); } #endif + if (panic_on_warn) + panic("scheduling while atomic\n"); + dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } @@ -4752,7 +4762,8 @@ out_unlock: * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask * - * Return: 0 on success. An error code otherwise. + * Return: size of CPU mask copied to user_mask_ptr on success. An + * error code otherwise. */ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, unsigned long __user *, user_mask_ptr) @@ -5394,13 +5405,15 @@ void idle_task_exit(void) /* * Since this CPU is going 'away' for a while, fold any nr_active delta * we might have. Assumes we're called after migrate_tasks() so that the - * nr_active count is stable. + * nr_active count is stable. We need to take the teardown thread which + * is calling this into account, so we hand in adjust = 1 to the load + * calculation. * * Also see the comment "Global load-average calculations". */ static void calc_load_migrate(struct rq *rq) { - long delta = calc_load_fold_active(rq); + long delta = calc_load_fold_active(rq, 1); if (delta) atomic_long_add(delta, &calc_load_tasks); } @@ -7231,7 +7244,6 @@ static void sched_rq_cpu_starting(unsigned int cpu) struct rq *rq = cpu_rq(cpu); rq->calc_load_update = calc_load_update; - account_reset_rq(rq); update_max_interval(); } @@ -7711,6 +7723,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) INIT_LIST_HEAD(&tg->children); list_add_rcu(&tg->siblings, &parent->children); spin_unlock_irqrestore(&task_group_lock, flags); + + online_fair_sched_group(tg); } /* rcu callback to free various structures associated with a task group */ @@ -7739,27 +7753,9 @@ void sched_offline_group(struct task_group *tg) spin_unlock_irqrestore(&task_group_lock, flags); } -/* change task's runqueue when it moves between groups. - * The caller of this function should have put the task in its new group - * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to - * reflect its new group. - */ -void sched_move_task(struct task_struct *tsk) +static void sched_change_group(struct task_struct *tsk, int type) { struct task_group *tg; - int queued, running; - struct rq_flags rf; - struct rq *rq; - - rq = task_rq_lock(tsk, &rf); - - running = task_current(rq, tsk); - queued = task_on_rq_queued(tsk); - - if (queued) - dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); - if (unlikely(running)) - put_prev_task(rq, tsk); /* * All callers are synchronized by task_rq_lock(); we do not use RCU @@ -7772,11 +7768,37 @@ void sched_move_task(struct task_struct *tsk) tsk->sched_task_group = tg; #ifdef CONFIG_FAIR_GROUP_SCHED - if (tsk->sched_class->task_move_group) - tsk->sched_class->task_move_group(tsk); + if (tsk->sched_class->task_change_group) + tsk->sched_class->task_change_group(tsk, type); else #endif set_task_rq(tsk, task_cpu(tsk)); +} + +/* + * Change task's runqueue when it moves between groups. + * + * The caller of this function should have put the task in its new group by + * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect + * its new group. + */ +void sched_move_task(struct task_struct *tsk) +{ + int queued, running; + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(tsk, &rf); + + running = task_current(rq, tsk); + queued = task_on_rq_queued(tsk); + + if (queued) + dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); + if (unlikely(running)) + put_prev_task(rq, tsk); + + sched_change_group(tsk, TASK_MOVE_GROUP); if (unlikely(running)) tsk->sched_class->set_curr_task(rq); @@ -8204,15 +8226,27 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) sched_free_group(tg); } +/* + * This is called before wake_up_new_task(), therefore we really only + * have to set its group bits, all the other stuff does not apply. + */ static void cpu_cgroup_fork(struct task_struct *task) { - sched_move_task(task); + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(task, &rf); + + sched_change_group(task, TASK_SET_GROUP); + + task_rq_unlock(rq, task, &rf); } static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *css; + int ret = 0; cgroup_taskset_for_each(task, css, tset) { #ifdef CONFIG_RT_GROUP_SCHED @@ -8223,8 +8257,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) if (task->sched_class != &fair_sched_class) return -EINVAL; #endif + /* + * Serialize against wake_up_new_task() such that if its + * running, we're sure to observe its full state. + */ + raw_spin_lock_irq(&task->pi_lock); + /* + * Avoid calling sched_move_task() before wake_up_new_task() + * has happened. This would lead to problems with PELT, due to + * move wanting to detach+attach while we're not attached yet. + */ + if (task->state == TASK_NEW) + ret = -EINVAL; + raw_spin_unlock_irq(&task->pi_lock); + + if (ret) + break; } - return 0; + return ret; } static void cpu_cgroup_attach(struct cgroup_taskset *tset) |