aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c149
1 files changed, 84 insertions, 65 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ec1a286684a5..a814b3c88029 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -333,9 +333,12 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
for (;;) {
rq = task_rq(p);
raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p)))
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
return rq;
raw_spin_unlock(&rq->lock);
+
+ while (unlikely(task_on_rq_migrating(p)))
+ cpu_relax();
}
}
@@ -352,10 +355,13 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
raw_spin_lock_irqsave(&p->pi_lock, *flags);
rq = task_rq(p);
raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p)))
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
return rq;
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+
+ while (unlikely(task_on_rq_migrating(p)))
+ cpu_relax();
}
}
@@ -449,7 +455,15 @@ static void __hrtick_start(void *arg)
void hrtick_start(struct rq *rq, u64 delay)
{
struct hrtimer *timer = &rq->hrtick_timer;
- ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
+ ktime_t time;
+ s64 delta;
+
+ /*
+ * Don't schedule slices shorter than 10000ns, that just
+ * doesn't make sense and can cause timer DoS.
+ */
+ delta = max_t(s64, delay, 10000LL);
+ time = ktime_add_ns(timer->base->get_time(), delta);
hrtimer_set_expires(timer, time);
@@ -1043,7 +1057,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
* A queue event has occurred, and we're going to schedule. In
* this case, we can save a useless back to back clock update.
*/
- if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
+ if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
rq->skip_clock_update = 1;
}
@@ -1088,7 +1102,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
static void __migrate_swap_task(struct task_struct *p, int cpu)
{
- if (p->on_rq) {
+ if (task_on_rq_queued(p)) {
struct rq *src_rq, *dst_rq;
src_rq = task_rq(p);
@@ -1214,7 +1228,7 @@ static int migration_cpu_stop(void *data);
unsigned long wait_task_inactive(struct task_struct *p, long match_state)
{
unsigned long flags;
- int running, on_rq;
+ int running, queued;
unsigned long ncsw;
struct rq *rq;
@@ -1252,7 +1266,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
rq = task_rq_lock(p, &flags);
trace_sched_wait_task(p);
running = task_running(rq, p);
- on_rq = p->on_rq;
+ queued = task_on_rq_queued(p);
ncsw = 0;
if (!match_state || p->state == match_state)
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
@@ -1284,7 +1298,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
* running right now), it's preempted, and we should
* yield - it could be a while.
*/
- if (unlikely(on_rq)) {
+ if (unlikely(queued)) {
ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
set_current_state(TASK_UNINTERRUPTIBLE);
@@ -1478,7 +1492,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
{
activate_task(rq, p, en_flags);
- p->on_rq = 1;
+ p->on_rq = TASK_ON_RQ_QUEUED;
/* if a worker is waking up, notify workqueue */
if (p->flags & PF_WQ_WORKER)
@@ -1537,7 +1551,7 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
int ret = 0;
rq = __task_rq_lock(p);
- if (p->on_rq) {
+ if (task_on_rq_queued(p)) {
/* check_preempt_curr() may use rq clock */
update_rq_clock(rq);
ttwu_do_wakeup(rq, p, wake_flags);
@@ -1742,7 +1756,7 @@ static void try_to_wake_up_local(struct task_struct *p)
if (!(p->state & TASK_NORMAL))
goto out;
- if (!p->on_rq)
+ if (!task_on_rq_queued(p))
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
ttwu_do_wakeup(rq, p, 0);
@@ -2095,7 +2109,7 @@ void wake_up_new_task(struct task_struct *p)
init_task_runnable_average(p);
rq = __task_rq_lock(p);
activate_task(rq, p, 0);
- p->on_rq = 1;
+ p->on_rq = TASK_ON_RQ_QUEUED;
trace_sched_wakeup_new(p, true);
check_preempt_curr(rq, p, WF_FORK);
#ifdef CONFIG_SMP
@@ -2451,7 +2465,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
* project cycles that may never be accounted to this
* thread, breaking clock_gettime().
*/
- if (task_current(rq, p) && p->on_rq) {
+ if (task_current(rq, p) && task_on_rq_queued(p)) {
update_rq_clock(rq);
ns = rq_clock_task(rq) - p->se.exec_start;
if ((s64)ns < 0)
@@ -2497,7 +2511,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
* If we see ->on_cpu without ->on_rq, the task is leaving, and has
* been accounted, so we're correct here as well.
*/
- if (!p->on_cpu || !p->on_rq)
+ if (!p->on_cpu || !task_on_rq_queued(p))
return p->se.sum_exec_runtime;
#endif
@@ -2801,7 +2815,7 @@ need_resched:
switch_count = &prev->nvcsw;
}
- if (prev->on_rq || rq->skip_clock_update < 0)
+ if (task_on_rq_queued(prev) || rq->skip_clock_update < 0)
update_rq_clock(rq);
next = pick_next_task(rq, prev);
@@ -2966,7 +2980,7 @@ EXPORT_SYMBOL(default_wake_function);
*/
void rt_mutex_setprio(struct task_struct *p, int prio)
{
- int oldprio, on_rq, running, enqueue_flag = 0;
+ int oldprio, queued, running, enqueue_flag = 0;
struct rq *rq;
const struct sched_class *prev_class;
@@ -2995,9 +3009,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
trace_sched_pi_setprio(p, prio);
oldprio = p->prio;
prev_class = p->sched_class;
- on_rq = p->on_rq;
+ queued = task_on_rq_queued(p);
running = task_current(rq, p);
- if (on_rq)
+ if (queued)
dequeue_task(rq, p, 0);
if (running)
p->sched_class->put_prev_task(rq, p);
@@ -3037,7 +3051,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq)
+ if (queued)
enqueue_task(rq, p, enqueue_flag);
check_class_changed(rq, p, prev_class, oldprio);
@@ -3048,7 +3062,7 @@ out_unlock:
void set_user_nice(struct task_struct *p, long nice)
{
- int old_prio, delta, on_rq;
+ int old_prio, delta, queued;
unsigned long flags;
struct rq *rq;
@@ -3069,8 +3083,8 @@ void set_user_nice(struct task_struct *p, long nice)
p->static_prio = NICE_TO_PRIO(nice);
goto out_unlock;
}
- on_rq = p->on_rq;
- if (on_rq)
+ queued = task_on_rq_queued(p);
+ if (queued)
dequeue_task(rq, p, 0);
p->static_prio = NICE_TO_PRIO(nice);
@@ -3079,7 +3093,7 @@ void set_user_nice(struct task_struct *p, long nice)
p->prio = effective_prio(p);
delta = p->prio - old_prio;
- if (on_rq) {
+ if (queued) {
enqueue_task(rq, p, 0);
/*
* If the task increased its priority or is running and
@@ -3351,7 +3365,7 @@ static int __sched_setscheduler(struct task_struct *p,
{
int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
MAX_RT_PRIO - 1 - attr->sched_priority;
- int retval, oldprio, oldpolicy = -1, on_rq, running;
+ int retval, oldprio, oldpolicy = -1, queued, running;
int policy = attr->sched_policy;
unsigned long flags;
const struct sched_class *prev_class;
@@ -3548,9 +3562,9 @@ change:
return 0;
}
- on_rq = p->on_rq;
+ queued = task_on_rq_queued(p);
running = task_current(rq, p);
- if (on_rq)
+ if (queued)
dequeue_task(rq, p, 0);
if (running)
p->sched_class->put_prev_task(rq, p);
@@ -3560,7 +3574,7 @@ change:
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq) {
+ if (queued) {
/*
* We enqueue to tail when the priority of a task is
* increased (user space view).
@@ -4512,7 +4526,7 @@ void show_state_filter(unsigned long state_filter)
" task PC stack pid father\n");
#endif
rcu_read_lock();
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
/*
* reset the NMI-timeout, listing all files on a slow
* console might take a lot of time:
@@ -4520,7 +4534,7 @@ void show_state_filter(unsigned long state_filter)
touch_nmi_watchdog();
if (!state_filter || (p->state & state_filter))
sched_show_task(p);
- } while_each_thread(g, p);
+ }
touch_all_softlockup_watchdogs();
@@ -4575,7 +4589,7 @@ void init_idle(struct task_struct *idle, int cpu)
rcu_read_unlock();
rq->curr = rq->idle = idle;
- idle->on_rq = 1;
+ idle->on_rq = TASK_ON_RQ_QUEUED;
#if defined(CONFIG_SMP)
idle->on_cpu = 1;
#endif
@@ -4652,7 +4666,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
goto out;
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
- if (p->on_rq) {
+ if (task_on_rq_queued(p)) {
struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock(rq, p, &flags);
@@ -4680,20 +4694,20 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
*/
static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
{
- struct rq *rq_dest, *rq_src;
+ struct rq *rq;
int ret = 0;
if (unlikely(!cpu_active(dest_cpu)))
return ret;
- rq_src = cpu_rq(src_cpu);
- rq_dest = cpu_rq(dest_cpu);
+ rq = cpu_rq(src_cpu);
raw_spin_lock(&p->pi_lock);
- double_rq_lock(rq_src, rq_dest);
+ raw_spin_lock(&rq->lock);
/* Already moved. */
if (task_cpu(p) != src_cpu)
goto done;
+
/* Affinity changed (again). */
if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
goto fail;
@@ -4702,16 +4716,23 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
* If we're not on a rq, the next wake-up will ensure we're
* placed properly.
*/
- if (p->on_rq) {
- dequeue_task(rq_src, p, 0);
+ if (task_on_rq_queued(p)) {
+ dequeue_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(p, dest_cpu);
- enqueue_task(rq_dest, p, 0);
- check_preempt_curr(rq_dest, p, 0);
+ raw_spin_unlock(&rq->lock);
+
+ rq = cpu_rq(dest_cpu);
+ raw_spin_lock(&rq->lock);
+ BUG_ON(task_rq(p) != rq);
+ p->on_rq = TASK_ON_RQ_QUEUED;
+ enqueue_task(rq, p, 0);
+ check_preempt_curr(rq, p, 0);
}
done:
ret = 1;
fail:
- double_rq_unlock(rq_src, rq_dest);
+ raw_spin_unlock(&rq->lock);
raw_spin_unlock(&p->pi_lock);
return ret;
}
@@ -4743,13 +4764,13 @@ void sched_setnuma(struct task_struct *p, int nid)
{
struct rq *rq;
unsigned long flags;
- bool on_rq, running;
+ bool queued, running;
rq = task_rq_lock(p, &flags);
- on_rq = p->on_rq;
+ queued = task_on_rq_queued(p);
running = task_current(rq, p);
- if (on_rq)
+ if (queued)
dequeue_task(rq, p, 0);
if (running)
p->sched_class->put_prev_task(rq, p);
@@ -4758,7 +4779,7 @@ void sched_setnuma(struct task_struct *p, int nid)
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq)
+ if (queued)
enqueue_task(rq, p, 0);
task_rq_unlock(rq, p, &flags);
}
@@ -5746,7 +5767,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
const struct cpumask *span = sched_domain_span(sd);
struct cpumask *covered = sched_domains_tmpmask;
struct sd_data *sdd = sd->private;
- struct sched_domain *child;
+ struct sched_domain *sibling;
int i;
cpumask_clear(covered);
@@ -5757,10 +5778,10 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
if (cpumask_test_cpu(i, covered))
continue;
- child = *per_cpu_ptr(sdd->sd, i);
+ sibling = *per_cpu_ptr(sdd->sd, i);
/* See the comment near build_group_mask(). */
- if (!cpumask_test_cpu(i, sched_domain_span(child)))
+ if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
continue;
sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
@@ -5770,10 +5791,9 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
goto fail;
sg_span = sched_group_cpus(sg);
- if (child->child) {
- child = child->child;
- cpumask_copy(sg_span, sched_domain_span(child));
- } else
+ if (sibling->child)
+ cpumask_copy(sg_span, sched_domain_span(sibling->child));
+ else
cpumask_set_cpu(i, sg_span);
cpumask_or(covered, covered, sg_span);
@@ -7124,13 +7144,13 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
.sched_policy = SCHED_NORMAL,
};
int old_prio = p->prio;
- int on_rq;
+ int queued;
- on_rq = p->on_rq;
- if (on_rq)
+ queued = task_on_rq_queued(p);
+ if (queued)
dequeue_task(rq, p, 0);
__setscheduler(rq, p, &attr);
- if (on_rq) {
+ if (queued) {
enqueue_task(rq, p, 0);
resched_curr(rq);
}
@@ -7145,7 +7165,7 @@ void normalize_rt_tasks(void)
struct rq *rq;
read_lock_irqsave(&tasklist_lock, flags);
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
/*
* Only normalize user tasks:
*/
@@ -7176,8 +7196,7 @@ void normalize_rt_tasks(void)
__task_rq_unlock(rq);
raw_spin_unlock(&p->pi_lock);
- } while_each_thread(g, p);
-
+ }
read_unlock_irqrestore(&tasklist_lock, flags);
}
@@ -7318,16 +7337,16 @@ void sched_offline_group(struct task_group *tg)
void sched_move_task(struct task_struct *tsk)
{
struct task_group *tg;
- int on_rq, running;
+ int queued, running;
unsigned long flags;
struct rq *rq;
rq = task_rq_lock(tsk, &flags);
running = task_current(rq, tsk);
- on_rq = tsk->on_rq;
+ queued = task_on_rq_queued(tsk);
- if (on_rq)
+ if (queued)
dequeue_task(rq, tsk, 0);
if (unlikely(running))
tsk->sched_class->put_prev_task(rq, tsk);
@@ -7340,14 +7359,14 @@ void sched_move_task(struct task_struct *tsk)
#ifdef CONFIG_FAIR_GROUP_SCHED
if (tsk->sched_class->task_move_group)
- tsk->sched_class->task_move_group(tsk, on_rq);
+ tsk->sched_class->task_move_group(tsk, queued);
else
#endif
set_task_rq(tsk, task_cpu(tsk));
if (unlikely(running))
tsk->sched_class->set_curr_task(rq);
- if (on_rq)
+ if (queued)
enqueue_task(rq, tsk, 0);
task_rq_unlock(rq, tsk, &flags);
@@ -7365,10 +7384,10 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
{
struct task_struct *g, *p;
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (rt_task(p) && task_rq(p)->rt.tg == tg)
return 1;
- } while_each_thread(g, p);
+ }
return 0;
}