diff options
Diffstat (limited to 'kernel/sched_rt.c')
| -rw-r--r-- | kernel/sched_rt.c | 144 |
1 files changed, 82 insertions, 62 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index f48328ac216f..c914ec747ca6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -183,6 +183,17 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); } +static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) +{ + list_add_rcu(&rt_rq->leaf_rt_rq_list, + &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list); +} + +static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) +{ + list_del_rcu(&rt_rq->leaf_rt_rq_list); +} + #define for_each_leaf_rt_rq(rt_rq, rq) \ list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) @@ -194,17 +205,20 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) return rt_se->my_q; } -static void enqueue_rt_entity(struct sched_rt_entity *rt_se); +static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head); static void dequeue_rt_entity(struct sched_rt_entity *rt_se); static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { + int this_cpu = smp_processor_id(); struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; - struct sched_rt_entity *rt_se = rt_rq->rt_se; + struct sched_rt_entity *rt_se; + + rt_se = rt_rq->tg->rt_se[this_cpu]; if (rt_rq->rt_nr_running) { if (rt_se && !on_rt_rq(rt_se)) - enqueue_rt_entity(rt_se); + enqueue_rt_entity(rt_se, false); if (rt_rq->highest_prio.curr < curr->prio) resched_task(curr); } @@ -212,7 +226,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { - struct sched_rt_entity *rt_se = rt_rq->rt_se; + int this_cpu = smp_processor_id(); + struct sched_rt_entity *rt_se; + + rt_se = rt_rq->tg->rt_se[this_cpu]; if (rt_se && on_rt_rq(rt_se)) dequeue_rt_entity(rt_se); @@ -270,6 +287,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) return ktime_to_ns(def_rt_bandwidth.rt_period); } +static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) +{ +} + +static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) +{ +} + #define for_each_leaf_rt_rq(rt_rq, rq) \ for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) @@ -603,16 +628,16 @@ static void update_curr_rt(struct rq *rq) if (!task_has_rt_policy(curr)) return; - delta_exec = rq->clock - curr->se.exec_start; + delta_exec = rq->clock_task - curr->se.exec_start; if (unlikely((s64)delta_exec < 0)) delta_exec = 0; - schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); + schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); - curr->se.exec_start = rq->clock; + curr->se.exec_start = rq->clock_task; cpuacct_charge(curr, delta_exec); sched_rt_avg_update(rq, delta_exec); @@ -803,7 +828,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) dec_rt_group(rt_se, rt_rq); } -static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) +static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) { struct rt_rq *rt_rq = rt_rq_of_se(rt_se); struct rt_prio_array *array = &rt_rq->active; @@ -819,7 +844,13 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) return; - list_add_tail(&rt_se->run_list, queue); + if (!rt_rq->rt_nr_running) + list_add_leaf_rt_rq(rt_rq); + + if (head) + list_add(&rt_se->run_list, queue); + else + list_add_tail(&rt_se->run_list, queue); __set_bit(rt_se_prio(rt_se), array->bitmap); inc_rt_tasks(rt_se, rt_rq); @@ -835,6 +866,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) __clear_bit(rt_se_prio(rt_se), array->bitmap); dec_rt_tasks(rt_se, rt_rq); + if (!rt_rq->rt_nr_running) + list_del_leaf_rt_rq(rt_rq); } /* @@ -856,11 +889,11 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se) } } -static void enqueue_rt_entity(struct sched_rt_entity *rt_se) +static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) { dequeue_rt_stack(rt_se); for_each_sched_rt_entity(rt_se) - __enqueue_rt_entity(rt_se); + __enqueue_rt_entity(rt_se, head); } static void dequeue_rt_entity(struct sched_rt_entity *rt_se) @@ -871,27 +904,28 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) struct rt_rq *rt_rq = group_rt_rq(rt_se); if (rt_rq && rt_rq->rt_nr_running) - __enqueue_rt_entity(rt_se); + __enqueue_rt_entity(rt_se, false); } } /* * Adding/removing a task to/from a priority array: */ -static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) +static void +enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) { struct sched_rt_entity *rt_se = &p->rt; - if (wakeup) + if (flags & ENQUEUE_WAKEUP) rt_se->timeout = 0; - enqueue_rt_entity(rt_se); + enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } -static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) +static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) { struct sched_rt_entity *rt_se = &p->rt; @@ -938,10 +972,9 @@ static void yield_task_rt(struct rq *rq) #ifdef CONFIG_SMP static int find_lowest_rq(struct task_struct *task); -static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) +static int +select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) { - struct rq *rq = task_rq(p); - if (sd_flag != SD_BALANCE_WAKE) return smp_processor_id(); @@ -951,18 +984,19 @@ static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) * runqueue. Otherwise simply start this RT task * on its current runqueue. * - * We want to avoid overloading runqueues. Even if - * the RT task is of higher priority than the current RT task. - * RT tasks behave differently than other tasks. If - * one gets preempted, we try to push it off to another queue. - * So trying to keep a preempting RT task on the same - * cache hot CPU will force the running RT task to - * a cold CPU. So we waste all the cache for the lower - * RT task in hopes of saving some of a RT task - * that is just being woken and probably will have - * cold cache anyway. + * We want to avoid overloading runqueues. If the woken + * task is a higher priority, then it will stay on this CPU + * and the lower prio task should be moved to another CPU. + * Even though this will probably make the lower prio task + * lose its cache, we do not want to bounce a higher task + * around just because it gave up its CPU, perhaps for a + * lock? + * + * For equal prio tasks, we just let the scheduler sort it out. */ if (unlikely(rt_task(rq->curr)) && + (rq->curr->rt.nr_cpus_allowed < 2 || + rq->curr->prio < p->prio) && (p->rt.nr_cpus_allowed > 1)) { int cpu = find_lowest_rq(p); @@ -1065,7 +1099,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) } while (rt_rq); p = rt_task_of(rt_se); - p->se.exec_start = rq->clock; + p->se.exec_start = rq->clock_task; return p; } @@ -1130,13 +1164,18 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) for_each_leaf_rt_rq(rt_rq, rq) { array = &rt_rq->active; idx = sched_find_first_bit(array->bitmap); - next_idx: +next_idx: if (idx >= MAX_RT_PRIO) continue; if (next && next->prio < idx) continue; list_for_each_entry(rt_se, array->queue + idx, run_list) { - struct task_struct *p = rt_task_of(rt_se); + struct task_struct *p; + + if (!rt_entity_is_task(rt_se)) + continue; + + p = rt_task_of(rt_se); if (pick_rt_task(rq, p, cpu)) { next = p; break; @@ -1301,7 +1340,7 @@ static int push_rt_task(struct rq *rq) if (!next_task) return 0; - retry: +retry: if (unlikely(next_task == rq->curr)) { WARN_ON(1); return 0; @@ -1449,7 +1488,7 @@ static int pull_rt_task(struct rq *this_rq) * but possible) */ } - skip: +skip: double_unlock_balance(this_rq, src_rq); } @@ -1477,28 +1516,13 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && has_pushable_tasks(rq) && - p->rt.nr_cpus_allowed > 1) + p->rt.nr_cpus_allowed > 1 && + rt_task(rq->curr) && + (rq->curr->rt.nr_cpus_allowed < 2 || + rq->curr->prio < p->prio)) push_rt_tasks(rq); } -static unsigned long -load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio) -{ - /* don't touch RT tasks */ - return 0; -} - -static int -move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle) -{ - /* don't touch RT tasks */ - return 0; -} - static void set_cpus_allowed_rt(struct task_struct *p, const struct cpumask *new_mask) { @@ -1667,11 +1691,9 @@ static void watchdog(struct rq *rq, struct task_struct *p) { unsigned long soft, hard; - if (!p->signal) - return; - - soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur; - hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max; + /* max may change after cur was read, this will be fixed next tick */ + soft = task_rlimit(p, RLIMIT_RTTIME); + hard = task_rlimit_max(p, RLIMIT_RTTIME); if (soft != RLIM_INFINITY) { unsigned long next; @@ -1715,13 +1737,13 @@ static void set_curr_task_rt(struct rq *rq) { struct task_struct *p = rq->curr; - p->se.exec_start = rq->clock; + p->se.exec_start = rq->clock_task; /* The running task is never eligible for pushing */ dequeue_pushable_task(rq, p); } -unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) +static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) { /* * Time slice is 0 for SCHED_FIFO tasks @@ -1746,8 +1768,6 @@ static const struct sched_class rt_sched_class = { #ifdef CONFIG_SMP .select_task_rq = select_task_rq_rt, - .load_balance = load_balance_rt, - .move_one_task = move_one_task_rt, .set_cpus_allowed = set_cpus_allowed_rt, .rq_online = rq_online_rt, .rq_offline = rq_offline_rt, |