aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched_rt.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r--kernel/sched_rt.c144
1 files changed, 82 insertions, 62 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index f48328ac216f..c914ec747ca6 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -183,6 +183,17 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
}
+static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+ list_add_rcu(&rt_rq->leaf_rt_rq_list,
+ &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
+}
+
+static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+ list_del_rcu(&rt_rq->leaf_rt_rq_list);
+}
+
#define for_each_leaf_rt_rq(rt_rq, rq) \
list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
@@ -194,17 +205,20 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
return rt_se->my_q;
}
-static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
+ int this_cpu = smp_processor_id();
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
- struct sched_rt_entity *rt_se = rt_rq->rt_se;
+ struct sched_rt_entity *rt_se;
+
+ rt_se = rt_rq->tg->rt_se[this_cpu];
if (rt_rq->rt_nr_running) {
if (rt_se && !on_rt_rq(rt_se))
- enqueue_rt_entity(rt_se);
+ enqueue_rt_entity(rt_se, false);
if (rt_rq->highest_prio.curr < curr->prio)
resched_task(curr);
}
@@ -212,7 +226,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
- struct sched_rt_entity *rt_se = rt_rq->rt_se;
+ int this_cpu = smp_processor_id();
+ struct sched_rt_entity *rt_se;
+
+ rt_se = rt_rq->tg->rt_se[this_cpu];
if (rt_se && on_rt_rq(rt_se))
dequeue_rt_entity(rt_se);
@@ -270,6 +287,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
return ktime_to_ns(def_rt_bandwidth.rt_period);
}
+static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+}
+
+static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+}
+
#define for_each_leaf_rt_rq(rt_rq, rq) \
for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
@@ -603,16 +628,16 @@ static void update_curr_rt(struct rq *rq)
if (!task_has_rt_policy(curr))
return;
- delta_exec = rq->clock - curr->se.exec_start;
+ delta_exec = rq->clock_task - curr->se.exec_start;
if (unlikely((s64)delta_exec < 0))
delta_exec = 0;
- schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
+ schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
- curr->se.exec_start = rq->clock;
+ curr->se.exec_start = rq->clock_task;
cpuacct_charge(curr, delta_exec);
sched_rt_avg_update(rq, delta_exec);
@@ -803,7 +828,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
dec_rt_group(rt_se, rt_rq);
}
-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
+static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
@@ -819,7 +844,13 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
return;
- list_add_tail(&rt_se->run_list, queue);
+ if (!rt_rq->rt_nr_running)
+ list_add_leaf_rt_rq(rt_rq);
+
+ if (head)
+ list_add(&rt_se->run_list, queue);
+ else
+ list_add_tail(&rt_se->run_list, queue);
__set_bit(rt_se_prio(rt_se), array->bitmap);
inc_rt_tasks(rt_se, rt_rq);
@@ -835,6 +866,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
__clear_bit(rt_se_prio(rt_se), array->bitmap);
dec_rt_tasks(rt_se, rt_rq);
+ if (!rt_rq->rt_nr_running)
+ list_del_leaf_rt_rq(rt_rq);
}
/*
@@ -856,11 +889,11 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
}
}
-static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
{
dequeue_rt_stack(rt_se);
for_each_sched_rt_entity(rt_se)
- __enqueue_rt_entity(rt_se);
+ __enqueue_rt_entity(rt_se, head);
}
static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
@@ -871,27 +904,28 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
struct rt_rq *rt_rq = group_rt_rq(rt_se);
if (rt_rq && rt_rq->rt_nr_running)
- __enqueue_rt_entity(rt_se);
+ __enqueue_rt_entity(rt_se, false);
}
}
/*
* Adding/removing a task to/from a priority array:
*/
-static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
+static void
+enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
- if (wakeup)
+ if (flags & ENQUEUE_WAKEUP)
rt_se->timeout = 0;
- enqueue_rt_entity(rt_se);
+ enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
}
-static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
@@ -938,10 +972,9 @@ static void yield_task_rt(struct rq *rq)
#ifdef CONFIG_SMP
static int find_lowest_rq(struct task_struct *task);
-static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
+static int
+select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
{
- struct rq *rq = task_rq(p);
-
if (sd_flag != SD_BALANCE_WAKE)
return smp_processor_id();
@@ -951,18 +984,19 @@ static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
* runqueue. Otherwise simply start this RT task
* on its current runqueue.
*
- * We want to avoid overloading runqueues. Even if
- * the RT task is of higher priority than the current RT task.
- * RT tasks behave differently than other tasks. If
- * one gets preempted, we try to push it off to another queue.
- * So trying to keep a preempting RT task on the same
- * cache hot CPU will force the running RT task to
- * a cold CPU. So we waste all the cache for the lower
- * RT task in hopes of saving some of a RT task
- * that is just being woken and probably will have
- * cold cache anyway.
+ * We want to avoid overloading runqueues. If the woken
+ * task is a higher priority, then it will stay on this CPU
+ * and the lower prio task should be moved to another CPU.
+ * Even though this will probably make the lower prio task
+ * lose its cache, we do not want to bounce a higher task
+ * around just because it gave up its CPU, perhaps for a
+ * lock?
+ *
+ * For equal prio tasks, we just let the scheduler sort it out.
*/
if (unlikely(rt_task(rq->curr)) &&
+ (rq->curr->rt.nr_cpus_allowed < 2 ||
+ rq->curr->prio < p->prio) &&
(p->rt.nr_cpus_allowed > 1)) {
int cpu = find_lowest_rq(p);
@@ -1065,7 +1099,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
} while (rt_rq);
p = rt_task_of(rt_se);
- p->se.exec_start = rq->clock;
+ p->se.exec_start = rq->clock_task;
return p;
}
@@ -1130,13 +1164,18 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
for_each_leaf_rt_rq(rt_rq, rq) {
array = &rt_rq->active;
idx = sched_find_first_bit(array->bitmap);
- next_idx:
+next_idx:
if (idx >= MAX_RT_PRIO)
continue;
if (next && next->prio < idx)
continue;
list_for_each_entry(rt_se, array->queue + idx, run_list) {
- struct task_struct *p = rt_task_of(rt_se);
+ struct task_struct *p;
+
+ if (!rt_entity_is_task(rt_se))
+ continue;
+
+ p = rt_task_of(rt_se);
if (pick_rt_task(rq, p, cpu)) {
next = p;
break;
@@ -1301,7 +1340,7 @@ static int push_rt_task(struct rq *rq)
if (!next_task)
return 0;
- retry:
+retry:
if (unlikely(next_task == rq->curr)) {
WARN_ON(1);
return 0;
@@ -1449,7 +1488,7 @@ static int pull_rt_task(struct rq *this_rq)
* but possible)
*/
}
- skip:
+skip:
double_unlock_balance(this_rq, src_rq);
}
@@ -1477,28 +1516,13 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
if (!task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
has_pushable_tasks(rq) &&
- p->rt.nr_cpus_allowed > 1)
+ p->rt.nr_cpus_allowed > 1 &&
+ rt_task(rq->curr) &&
+ (rq->curr->rt.nr_cpus_allowed < 2 ||
+ rq->curr->prio < p->prio))
push_rt_tasks(rq);
}
-static unsigned long
-load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
- unsigned long max_load_move,
- struct sched_domain *sd, enum cpu_idle_type idle,
- int *all_pinned, int *this_best_prio)
-{
- /* don't touch RT tasks */
- return 0;
-}
-
-static int
-move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
- struct sched_domain *sd, enum cpu_idle_type idle)
-{
- /* don't touch RT tasks */
- return 0;
-}
-
static void set_cpus_allowed_rt(struct task_struct *p,
const struct cpumask *new_mask)
{
@@ -1667,11 +1691,9 @@ static void watchdog(struct rq *rq, struct task_struct *p)
{
unsigned long soft, hard;
- if (!p->signal)
- return;
-
- soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur;
- hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max;
+ /* max may change after cur was read, this will be fixed next tick */
+ soft = task_rlimit(p, RLIMIT_RTTIME);
+ hard = task_rlimit_max(p, RLIMIT_RTTIME);
if (soft != RLIM_INFINITY) {
unsigned long next;
@@ -1715,13 +1737,13 @@ static void set_curr_task_rt(struct rq *rq)
{
struct task_struct *p = rq->curr;
- p->se.exec_start = rq->clock;
+ p->se.exec_start = rq->clock_task;
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);
}
-unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
+static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
{
/*
* Time slice is 0 for SCHED_FIFO tasks
@@ -1746,8 +1768,6 @@ static const struct sched_class rt_sched_class = {
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_rt,
- .load_balance = load_balance_rt,
- .move_one_task = move_one_task_rt,
.set_cpus_allowed = set_cpus_allowed_rt,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,