diff options
-rw-r--r-- | include/linux/sched.h | 12 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 301 | ||||
-rw-r--r-- | kernel/sched/fair.c | 24 | ||||
-rw-r--r-- | kernel/sched/idle.c | 2 | ||||
-rw-r--r-- | kernel/sched/sched.h | 4 |
5 files changed, 298 insertions, 45 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c771ea4481d..4edd7e2096fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -641,12 +641,24 @@ struct sched_dl_entity { * overruns. * * @dl_server tells if this is a server entity. + * + * @dl_defer tells if this is a deferred or regular server. For + * now only defer server exists. + * + * @dl_defer_armed tells if the deferrable server is waiting + * for the replenishment timer to activate it. + * + * @dl_defer_running tells if the deferrable server is actually + * running, skipping the defer phase. */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; unsigned int dl_server : 1; + unsigned int dl_defer : 1; + unsigned int dl_defer_armed : 1; + unsigned int dl_defer_running : 1; /* * Bandwidth enforcement timer. Each -deadline task has its diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index f5b531372e3f..1b295314bc93 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -771,6 +771,15 @@ static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se, /* for non-boosted task, pi_of(dl_se) == dl_se */ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; dl_se->runtime = pi_of(dl_se)->dl_runtime; + + /* + * If it is a deferred reservation, and the server + * is not handling an starvation case, defer it. + */ + if (dl_se->dl_defer & !dl_se->dl_defer_running) { + dl_se->dl_throttled = 1; + dl_se->dl_defer_armed = 1; + } } /* @@ -809,6 +818,9 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) replenish_dl_new_period(dl_se, rq); } +static int start_dl_timer(struct sched_dl_entity *dl_se); +static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t); + /* * Pure Earliest Deadline First (EDF) scheduling does not deal with the * possibility of a entity lasting more than what it declared, and thus @@ -837,9 +849,18 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) /* * This could be the case for a !-dl task that is boosted. * Just go with full inherited parameters. + * + * Or, it could be the case of a deferred reservation that + * was not able to consume its runtime in background and + * reached this point with current u > U. + * + * In both cases, set a new period. */ - if (dl_se->dl_deadline == 0) - replenish_dl_new_period(dl_se, rq); + if (dl_se->dl_deadline == 0 || + (dl_se->dl_defer_armed && dl_entity_overflow(dl_se, rq_clock(rq)))) { + dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; + dl_se->runtime = pi_of(dl_se)->dl_runtime; + } if (dl_se->dl_yielded && dl_se->runtime > 0) dl_se->runtime = 0; @@ -873,6 +894,44 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) dl_se->dl_yielded = 0; if (dl_se->dl_throttled) dl_se->dl_throttled = 0; + + /* + * If this is the replenishment of a deferred reservation, + * clear the flag and return. + */ + if (dl_se->dl_defer_armed) { + dl_se->dl_defer_armed = 0; + return; + } + + /* + * A this point, if the deferred server is not armed, and the deadline + * is in the future, if it is not running already, throttle the server + * and arm the defer timer. + */ + if (dl_se->dl_defer && !dl_se->dl_defer_running && + dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) { + if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) { + + /* + * Set dl_se->dl_defer_armed and dl_throttled variables to + * inform the start_dl_timer() that this is a deferred + * activation. + */ + dl_se->dl_defer_armed = 1; + dl_se->dl_throttled = 1; + if (!start_dl_timer(dl_se)) { + /* + * If for whatever reason (delays), a previous timer was + * queued but not serviced, cancel it and clean the + * deferrable server variables intended for start_dl_timer(). + */ + hrtimer_try_to_cancel(&dl_se->dl_timer); + dl_se->dl_defer_armed = 0; + dl_se->dl_throttled = 0; + } + } + } } /* @@ -1023,6 +1082,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se) } replenish_dl_new_period(dl_se, rq); + } else if (dl_server(dl_se) && dl_se->dl_defer) { + /* + * The server can still use its previous deadline, so check if + * it left the dl_defer_running state. + */ + if (!dl_se->dl_defer_running) { + dl_se->dl_defer_armed = 1; + dl_se->dl_throttled = 1; + } } } @@ -1055,8 +1123,21 @@ static int start_dl_timer(struct sched_dl_entity *dl_se) * We want the timer to fire at the deadline, but considering * that it is actually coming from rq->clock and not from * hrtimer's time base reading. + * + * The deferred reservation will have its timer set to + * (deadline - runtime). At that point, the CBS rule will decide + * if the current deadline can be used, or if a replenishment is + * required to avoid add too much pressure on the system + * (current u > U). */ - act = ns_to_ktime(dl_next_period(dl_se)); + if (dl_se->dl_defer_armed) { + WARN_ON_ONCE(!dl_se->dl_throttled); + act = ns_to_ktime(dl_se->deadline - dl_se->runtime); + } else { + /* act = deadline - rel-deadline + period */ + act = ns_to_ktime(dl_next_period(dl_se)); + } + now = hrtimer_cb_get_time(timer); delta = ktime_to_ns(now) - rq_clock(rq); act = ktime_add_ns(act, delta); @@ -1106,6 +1187,62 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf) #endif } +/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */ +static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC; + +static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se) +{ + struct rq *rq = rq_of_dl_se(dl_se); + u64 fw; + + scoped_guard (rq_lock, rq) { + struct rq_flags *rf = &scope.rf; + + if (!dl_se->dl_throttled || !dl_se->dl_runtime) + return HRTIMER_NORESTART; + + sched_clock_tick(); + update_rq_clock(rq); + + if (!dl_se->dl_runtime) + return HRTIMER_NORESTART; + + if (!dl_se->server_has_tasks(dl_se)) { + replenish_dl_entity(dl_se); + return HRTIMER_NORESTART; + } + + if (dl_se->dl_defer_armed) { + /* + * First check if the server could consume runtime in background. + * If so, it is possible to push the defer timer for this amount + * of time. The dl_server_min_res serves as a limit to avoid + * forwarding the timer for a too small amount of time. + */ + if (dl_time_before(rq_clock(dl_se->rq), + (dl_se->deadline - dl_se->runtime - dl_server_min_res))) { + + /* reset the defer timer */ + fw = dl_se->deadline - rq_clock(dl_se->rq) - dl_se->runtime; + + hrtimer_forward_now(timer, ns_to_ktime(fw)); + return HRTIMER_RESTART; + } + + dl_se->dl_defer_running = 1; + } + + enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH); + + if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &dl_se->rq->curr->dl)) + resched_curr(rq); + + __push_dl_task(rq, rf); + } + + return HRTIMER_NORESTART; +} + /* * This is the bandwidth enforcement timer callback. If here, we know * a task is not on its dl_rq, since the fact that the timer was running @@ -1128,28 +1265,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) struct rq_flags rf; struct rq *rq; - if (dl_server(dl_se)) { - struct rq *rq = rq_of_dl_se(dl_se); - struct rq_flags rf; - - rq_lock(rq, &rf); - if (dl_se->dl_throttled) { - sched_clock_tick(); - update_rq_clock(rq); - - if (dl_se->server_has_tasks(dl_se)) { - enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH); - resched_curr(rq); - __push_dl_task(rq, &rf); - } else { - replenish_dl_entity(dl_se); - } - - } - rq_unlock(rq, &rf); - - return HRTIMER_NORESTART; - } + if (dl_server(dl_se)) + return dl_server_timer(timer, dl_se); p = dl_task_of(dl_se); rq = task_rq_lock(p, &rf); @@ -1319,22 +1436,10 @@ static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se) return (delta * u_act) >> BW_SHIFT; } -static inline void -update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, - int flags); -static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec) +s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec) { s64 scaled_delta_exec; - if (unlikely(delta_exec <= 0)) { - if (unlikely(dl_se->dl_yielded)) - goto throttle; - return; - } - - if (dl_entity_is_special(dl_se)) - return; - /* * For tasks that participate in GRUB, we implement GRUB-PA: the * spare reclaimed bandwidth is used to clock down frequency. @@ -1353,8 +1458,64 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu); } + return scaled_delta_exec; +} + +static inline void +update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, + int flags); +static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec) +{ + s64 scaled_delta_exec; + + if (unlikely(delta_exec <= 0)) { + if (unlikely(dl_se->dl_yielded)) + goto throttle; + return; + } + + if (dl_server(dl_se) && dl_se->dl_throttled && !dl_se->dl_defer) + return; + + if (dl_entity_is_special(dl_se)) + return; + + scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec); + dl_se->runtime -= scaled_delta_exec; + /* + * The fair server can consume its runtime while throttled (not queued/ + * running as regular CFS). + * + * If the server consumes its entire runtime in this state. The server + * is not required for the current period. Thus, reset the server by + * starting a new period, pushing the activation. + */ + if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) { + /* + * If the server was previously activated - the starving condition + * took place, it this point it went away because the fair scheduler + * was able to get runtime in background. So return to the initial + * state. + */ + dl_se->dl_defer_running = 0; + + hrtimer_try_to_cancel(&dl_se->dl_timer); + + replenish_dl_new_period(dl_se, dl_se->rq); + + /* + * Not being able to start the timer seems problematic. If it could not + * be started for whatever reason, we need to "unthrottle" the DL server + * and queue right away. Otherwise nothing might queue it. That's similar + * to what enqueue_dl_entity() does on start_dl_timer==0. For now, just warn. + */ + WARN_ON_ONCE(!start_dl_timer(dl_se)); + + return; + } + throttle: if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { dl_se->dl_throttled = 1; @@ -1414,9 +1575,46 @@ throttle: } } +/* + * In the non-defer mode, the idle time is not accounted, as the + * server provides a guarantee. + * + * If the dl_server is in defer mode, the idle time is also considered + * as time available for the fair server, avoiding a penalty for the + * rt scheduler that did not consumed that time. + */ +void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) +{ + s64 delta_exec, scaled_delta_exec; + + if (!rq->fair_server.dl_defer) + return; + + /* no need to discount more */ + if (rq->fair_server.runtime < 0) + return; + + delta_exec = rq_clock_task(rq) - p->se.exec_start; + if (delta_exec < 0) + return; + + scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec); + + rq->fair_server.runtime -= scaled_delta_exec; + + if (rq->fair_server.runtime < 0) { + rq->fair_server.dl_defer_running = 0; + rq->fair_server.runtime = 0; + } + + p->se.exec_start = rq_clock_task(rq); +} + void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) { - update_curr_dl_se(dl_se->rq, dl_se, delta_exec); + /* 0 runtime = fair server disabled */ + if (dl_se->dl_runtime) + update_curr_dl_se(dl_se->rq, dl_se, delta_exec); } void dl_server_start(struct sched_dl_entity *dl_se) @@ -1430,6 +1628,7 @@ void dl_server_start(struct sched_dl_entity *dl_se) dl_se->dl_period = 1000 * NSEC_PER_MSEC; dl_se->dl_server = 1; + dl_se->dl_defer = 1; setup_new_dl_entity(dl_se); } @@ -1447,6 +1646,9 @@ void dl_server_stop(struct sched_dl_entity *dl_se) return; dequeue_dl_entity(dl_se, DEQUEUE_SLEEP); + hrtimer_try_to_cancel(&dl_se->dl_timer); + dl_se->dl_defer_armed = 0; + dl_se->dl_throttled = 0; } void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, @@ -1758,7 +1960,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) * be counted in the active utilization; hence, we need to call * add_running_bw(). */ - if (dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) { + if (!dl_se->dl_defer && dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) { if (flags & ENQUEUE_WAKEUP) task_contending(dl_se, flags); @@ -1780,6 +1982,25 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) setup_new_dl_entity(dl_se); } + /* + * If the reservation is still throttled, e.g., it got replenished but is a + * deferred task and still got to wait, don't enqueue. + */ + if (dl_se->dl_throttled && start_dl_timer(dl_se)) + return; + + /* + * We're about to enqueue, make sure we're not ->dl_throttled! + * In case the timer was not started, say because the defer time + * has passed, mark as not throttled and mark unarmed. + * Also cancel earlier timers, since letting those run is pointless. + */ + if (dl_se->dl_throttled) { + hrtimer_try_to_cancel(&dl_se->dl_timer); + dl_se->dl_defer_armed = 0; + dl_se->dl_throttled = 0; + } + __enqueue_dl_entity(dl_se); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index aba23b08e52d..1ea5ec81431a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1156,12 +1156,13 @@ s64 update_curr_common(struct rq *rq) static void update_curr(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; + struct rq *rq = rq_of(cfs_rq); s64 delta_exec; if (unlikely(!curr)) return; - delta_exec = update_curr_se(rq_of(cfs_rq), curr); + delta_exec = update_curr_se(rq, curr); if (unlikely(delta_exec <= 0)) return; @@ -1169,8 +1170,19 @@ static void update_curr(struct cfs_rq *cfs_rq) update_deadline(cfs_rq, curr); update_min_vruntime(cfs_rq); - if (entity_is_task(curr)) - update_curr_task(task_of(curr), delta_exec); + if (entity_is_task(curr)) { + struct task_struct *p = task_of(curr); + + update_curr_task(p, delta_exec); + + /* + * Any fair task that runs outside of fair_server should + * account against fair_server such that it can account for + * this time and possibly avoid running this period. + */ + if (p->dl_server != &rq->fair_server) + dl_server_update(&rq->fair_server, delta_exec); + } account_cfs_rq_runtime(cfs_rq, delta_exec); } @@ -6768,8 +6780,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) */ util_est_enqueue(&rq->cfs, p); - if (!throttled_hierarchy(task_cfs_rq(p)) && !rq->cfs.h_nr_running) + if (!throttled_hierarchy(task_cfs_rq(p)) && !rq->cfs.h_nr_running) { + /* Account for idle runtime */ + if (!rq->nr_running) + dl_server_update_idle_time(rq, rq->curr); dl_server_start(&rq->fair_server); + } /* * If in_iowait is set, the code below may not trigger any cpufreq diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 6e78d071beb5..d560f7ffa463 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -452,12 +452,14 @@ static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags) static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { + dl_server_update_idle_time(rq, prev); } static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first) { update_idle_core(rq); schedstat_inc(rq->sched_goidle); + next->se.exec_start = rq_clock_task(rq); } #ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7416bcd2a549..64fb6776664e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -335,7 +335,7 @@ extern bool __checkparam_dl(const struct sched_attr *attr); extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); extern int dl_bw_check_overflow(int cpu); - +extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec); /* * SCHED_DEADLINE supports servers (nested scheduling) with the following * interface: @@ -363,6 +363,8 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, dl_server_has_tasks_f has_tasks, dl_server_pick_f pick); +extern void dl_server_update_idle_time(struct rq *rq, + struct task_struct *p); extern void fair_server_init(struct rq *rq); #ifdef CONFIG_CGROUP_SCHED |