diff options
Diffstat (limited to 'kernel/sched/deadline.c')
| -rw-r--r-- | kernel/sched/deadline.c | 894 | 
1 files changed, 851 insertions, 43 deletions
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index a2ce59015642..a84299f44b5d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -17,6 +17,7 @@  #include "sched.h"  #include <linux/slab.h> +#include <uapi/linux/sched/types.h>  struct dl_bandwidth def_dl_bandwidth; @@ -43,6 +44,254 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)  	return !RB_EMPTY_NODE(&dl_se->rb_node);  } +#ifdef CONFIG_SMP +static inline struct dl_bw *dl_bw_of(int i) +{ +	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), +			 "sched RCU must be held"); +	return &cpu_rq(i)->rd->dl_bw; +} + +static inline int dl_bw_cpus(int i) +{ +	struct root_domain *rd = cpu_rq(i)->rd; +	int cpus = 0; + +	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), +			 "sched RCU must be held"); +	for_each_cpu_and(i, rd->span, cpu_active_mask) +		cpus++; + +	return cpus; +} +#else +static inline struct dl_bw *dl_bw_of(int i) +{ +	return &cpu_rq(i)->dl.dl_bw; +} + +static inline int dl_bw_cpus(int i) +{ +	return 1; +} +#endif + +static inline +void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) +{ +	u64 old = dl_rq->running_bw; + +	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock); +	dl_rq->running_bw += dl_bw; +	SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */ +	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); +} + +static inline +void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) +{ +	u64 old = dl_rq->running_bw; + +	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock); +	dl_rq->running_bw -= dl_bw; +	SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */ +	if (dl_rq->running_bw > old) +		dl_rq->running_bw = 0; +} + +static inline +void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) +{ +	u64 old = dl_rq->this_bw; + +	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock); +	dl_rq->this_bw += dl_bw; +	SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */ +} + +static inline +void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) +{ +	u64 old = dl_rq->this_bw; + +	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock); +	dl_rq->this_bw -= dl_bw; +	SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */ +	if (dl_rq->this_bw > old) +		dl_rq->this_bw = 0; +	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); +} + +void dl_change_utilization(struct task_struct *p, u64 new_bw) +{ +	struct rq *rq; + +	if (task_on_rq_queued(p)) +		return; + +	rq = task_rq(p); +	if (p->dl.dl_non_contending) { +		sub_running_bw(p->dl.dl_bw, &rq->dl); +		p->dl.dl_non_contending = 0; +		/* +		 * If the timer handler is currently running and the +		 * timer cannot be cancelled, inactive_task_timer() +		 * will see that dl_not_contending is not set, and +		 * will not touch the rq's active utilization, +		 * so we are still safe. +		 */ +		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) +			put_task_struct(p); +	} +	sub_rq_bw(p->dl.dl_bw, &rq->dl); +	add_rq_bw(new_bw, &rq->dl); +} + +/* + * The utilization of a task cannot be immediately removed from + * the rq active utilization (running_bw) when the task blocks. + * Instead, we have to wait for the so called "0-lag time". + * + * If a task blocks before the "0-lag time", a timer (the inactive + * timer) is armed, and running_bw is decreased when the timer + * fires. + * + * If the task wakes up again before the inactive timer fires, + * the timer is cancelled, whereas if the task wakes up after the + * inactive timer fired (and running_bw has been decreased) the + * task's utilization has to be added to running_bw again. + * A flag in the deadline scheduling entity (dl_non_contending) + * is used to avoid race conditions between the inactive timer handler + * and task wakeups. + * + * The following diagram shows how running_bw is updated. A task is + * "ACTIVE" when its utilization contributes to running_bw; an + * "ACTIVE contending" task is in the TASK_RUNNING state, while an + * "ACTIVE non contending" task is a blocked task for which the "0-lag time" + * has not passed yet. An "INACTIVE" task is a task for which the "0-lag" + * time already passed, which does not contribute to running_bw anymore. + *                              +------------------+ + *             wakeup           |    ACTIVE        | + *          +------------------>+   contending     | + *          | add_running_bw    |                  | + *          |                   +----+------+------+ + *          |                        |      ^ + *          |                dequeue |      | + * +--------+-------+                |      | + * |                |   t >= 0-lag   |      | wakeup + * |    INACTIVE    |<---------------+      | + * |                | sub_running_bw |      | + * +--------+-------+                |      | + *          ^                        |      | + *          |              t < 0-lag |      | + *          |                        |      | + *          |                        V      | + *          |                   +----+------+------+ + *          | sub_running_bw    |    ACTIVE        | + *          +-------------------+                  | + *            inactive timer    |  non contending  | + *            fired             +------------------+ + * + * The task_non_contending() function is invoked when a task + * blocks, and checks if the 0-lag time already passed or + * not (in the first case, it directly updates running_bw; + * in the second case, it arms the inactive timer). + * + * The task_contending() function is invoked when a task wakes + * up, and checks if the task is still in the "ACTIVE non contending" + * state or not (in the second case, it updates running_bw). + */ +static void task_non_contending(struct task_struct *p) +{ +	struct sched_dl_entity *dl_se = &p->dl; +	struct hrtimer *timer = &dl_se->inactive_timer; +	struct dl_rq *dl_rq = dl_rq_of_se(dl_se); +	struct rq *rq = rq_of_dl_rq(dl_rq); +	s64 zerolag_time; + +	/* +	 * If this is a non-deadline task that has been boosted, +	 * do nothing +	 */ +	if (dl_se->dl_runtime == 0) +		return; + +	WARN_ON(hrtimer_active(&dl_se->inactive_timer)); +	WARN_ON(dl_se->dl_non_contending); + +	zerolag_time = dl_se->deadline - +		 div64_long((dl_se->runtime * dl_se->dl_period), +			dl_se->dl_runtime); + +	/* +	 * Using relative times instead of the absolute "0-lag time" +	 * allows to simplify the code +	 */ +	zerolag_time -= rq_clock(rq); + +	/* +	 * If the "0-lag time" already passed, decrease the active +	 * utilization now, instead of starting a timer +	 */ +	if (zerolag_time < 0) { +		if (dl_task(p)) +			sub_running_bw(dl_se->dl_bw, dl_rq); +		if (!dl_task(p) || p->state == TASK_DEAD) { +			struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); + +			if (p->state == TASK_DEAD) +				sub_rq_bw(p->dl.dl_bw, &rq->dl); +			raw_spin_lock(&dl_b->lock); +			__dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); +			__dl_clear_params(p); +			raw_spin_unlock(&dl_b->lock); +		} + +		return; +	} + +	dl_se->dl_non_contending = 1; +	get_task_struct(p); +	hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL); +} + +static void task_contending(struct sched_dl_entity *dl_se, int flags) +{ +	struct dl_rq *dl_rq = dl_rq_of_se(dl_se); + +	/* +	 * If this is a non-deadline task that has been boosted, +	 * do nothing +	 */ +	if (dl_se->dl_runtime == 0) +		return; + +	if (flags & ENQUEUE_MIGRATED) +		add_rq_bw(dl_se->dl_bw, dl_rq); + +	if (dl_se->dl_non_contending) { +		dl_se->dl_non_contending = 0; +		/* +		 * If the timer handler is currently running and the +		 * timer cannot be cancelled, inactive_task_timer() +		 * will see that dl_not_contending is not set, and +		 * will not touch the rq's active utilization, +		 * so we are still safe. +		 */ +		if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) +			put_task_struct(dl_task_of(dl_se)); +	} else { +		/* +		 * Since "dl_non_contending" is not set, the +		 * task's utilization has already been removed from +		 * active utilization (either when the task blocked, +		 * when the "inactive timer" fired). +		 * So, add it back. +		 */ +		add_running_bw(dl_se->dl_bw, dl_rq); +	} +} +  static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)  {  	struct sched_dl_entity *dl_se = &p->dl; @@ -83,6 +332,10 @@ void init_dl_rq(struct dl_rq *dl_rq)  #else  	init_dl_bw(&dl_rq->dl_bw);  #endif + +	dl_rq->running_bw = 0; +	dl_rq->this_bw = 0; +	init_dl_rq_bw_ratio(dl_rq);  }  #ifdef CONFIG_SMP @@ -484,13 +737,84 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,  }  /* - * When a -deadline entity is queued back on the runqueue, its runtime and - * deadline might need updating. + * Revised wakeup rule [1]: For self-suspending tasks, rather then + * re-initializing task's runtime and deadline, the revised wakeup + * rule adjusts the task's runtime to avoid the task to overrun its + * density. + * + * Reasoning: a task may overrun the density if: + *    runtime / (deadline - t) > dl_runtime / dl_deadline + * + * Therefore, runtime can be adjusted to: + *     runtime = (dl_runtime / dl_deadline) * (deadline - t) + * + * In such way that runtime will be equal to the maximum density + * the task can use without breaking any rule. + * + * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant + * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24. + */ +static void +update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq) +{ +	u64 laxity = dl_se->deadline - rq_clock(rq); + +	/* +	 * If the task has deadline < period, and the deadline is in the past, +	 * it should already be throttled before this check. +	 * +	 * See update_dl_entity() comments for further details. +	 */ +	WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq))); + +	dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT; +} + +/* + * Regarding the deadline, a task with implicit deadline has a relative + * deadline == relative period. A task with constrained deadline has a + * relative deadline <= relative period. + * + * We support constrained deadline tasks. However, there are some restrictions + * applied only for tasks which do not have an implicit deadline. See + * update_dl_entity() to know more about such restrictions. + * + * The dl_is_implicit() returns true if the task has an implicit deadline. + */ +static inline bool dl_is_implicit(struct sched_dl_entity *dl_se) +{ +	return dl_se->dl_deadline == dl_se->dl_period; +} + +/* + * When a deadline entity is placed in the runqueue, its runtime and deadline + * might need to be updated. This is done by a CBS wake up rule. There are two + * different rules: 1) the original CBS; and 2) the Revisited CBS. + * + * When the task is starting a new period, the Original CBS is used. In this + * case, the runtime is replenished and a new absolute deadline is set. + * + * When a task is queued before the begin of the next period, using the + * remaining runtime and deadline could make the entity to overflow, see + * dl_entity_overflow() to find more about runtime overflow. When such case + * is detected, the runtime and deadline need to be updated. + * + * If the task has an implicit deadline, i.e., deadline == period, the Original + * CBS is applied. the runtime is replenished and a new absolute deadline is + * set, as in the previous cases. + * + * However, the Original CBS does not work properly for tasks with + * deadline < period, which are said to have a constrained deadline. By + * applying the Original CBS, a constrained deadline task would be able to run + * runtime/deadline in a period. With deadline < period, the task would + * overrun the runtime/period allowed bandwidth, breaking the admission test.   * - * The policy here is that we update the deadline of the entity only if: - *  - the current deadline is in the past, - *  - using the remaining runtime with the current deadline would make - *    the entity exceed its bandwidth. + * In order to prevent this misbehave, the Revisited CBS is used for + * constrained deadline tasks when a runtime overflow is detected. In the + * Revisited CBS, rather than replenishing & setting a new absolute deadline, + * the remaining runtime of the task is reduced to avoid runtime overflow. + * Please refer to the comments update_dl_revised_wakeup() function to find + * more about the Revised CBS rule.   */  static void update_dl_entity(struct sched_dl_entity *dl_se,  			     struct sched_dl_entity *pi_se) @@ -500,6 +824,14 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,  	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||  	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { + +		if (unlikely(!dl_is_implicit(dl_se) && +			     !dl_time_before(dl_se->deadline, rq_clock(rq)) && +			     !dl_se->dl_boosted)){ +			update_dl_revised_wakeup(dl_se, rq); +			return; +		} +  		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;  		dl_se->runtime = pi_se->dl_runtime;  	} @@ -593,10 +925,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)  	 * The task might have changed its scheduling policy to something  	 * different than SCHED_DEADLINE (through switched_from_dl()).  	 */ -	if (!dl_task(p)) { -		__dl_clear_params(p); +	if (!dl_task(p))  		goto unlock; -	}  	/*  	 * The task might have been boosted by someone else and might be in the @@ -723,6 +1053,8 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)  		if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))  			return;  		dl_se->dl_throttled = 1; +		if (dl_se->runtime > 0) +			dl_se->runtime = 0;  	}  } @@ -735,6 +1067,47 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)  extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);  /* + * This function implements the GRUB accounting rule: + * according to the GRUB reclaiming algorithm, the runtime is + * not decreased as "dq = -dt", but as + * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt", + * where u is the utilization of the task, Umax is the maximum reclaimable + * utilization, Uinact is the (per-runqueue) inactive utilization, computed + * as the difference between the "total runqueue utilization" and the + * runqueue active utilization, and Uextra is the (per runqueue) extra + * reclaimable utilization. + * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations + * multiplied by 2^BW_SHIFT, the result has to be shifted right by + * BW_SHIFT. + * Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT, + * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT. + * Since delta is a 64 bit variable, to have an overflow its value + * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds. + * So, overflow is not an issue here. + */ +u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se) +{ +	u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */ +	u64 u_act; +	u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT; + +	/* +	 * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)}, +	 * we compare u_inact + rq->dl.extra_bw with +	 * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because +	 * u_inact + rq->dl.extra_bw can be larger than +	 * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative +	 * leading to wrong results) +	 */ +	if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min) +		u_act = u_act_min; +	else +		u_act = BW_UNIT - u_inact - rq->dl.extra_bw; + +	return (delta * u_act) >> BW_SHIFT; +} + +/*   * Update the current task's runtime statistics (provided it is still   * a -deadline task and has not been removed from the dl_rq).   */ @@ -776,6 +1149,8 @@ static void update_curr_dl(struct rq *rq)  	sched_rt_avg_update(rq, delta_exec); +	if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) +		delta_exec = grub_reclaim(delta_exec, rq, &curr->dl);  	dl_se->runtime -= delta_exec;  throttle: @@ -815,6 +1190,56 @@ throttle:  	}  } +static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) +{ +	struct sched_dl_entity *dl_se = container_of(timer, +						     struct sched_dl_entity, +						     inactive_timer); +	struct task_struct *p = dl_task_of(dl_se); +	struct rq_flags rf; +	struct rq *rq; + +	rq = task_rq_lock(p, &rf); + +	if (!dl_task(p) || p->state == TASK_DEAD) { +		struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); + +		if (p->state == TASK_DEAD && dl_se->dl_non_contending) { +			sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl)); +			sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl)); +			dl_se->dl_non_contending = 0; +		} + +		raw_spin_lock(&dl_b->lock); +		__dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); +		raw_spin_unlock(&dl_b->lock); +		__dl_clear_params(p); + +		goto unlock; +	} +	if (dl_se->dl_non_contending == 0) +		goto unlock; + +	sched_clock_tick(); +	update_rq_clock(rq); + +	sub_running_bw(dl_se->dl_bw, &rq->dl); +	dl_se->dl_non_contending = 0; +unlock: +	task_rq_unlock(rq, p, &rf); +	put_task_struct(p); + +	return HRTIMER_NORESTART; +} + +void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se) +{ +	struct hrtimer *timer = &dl_se->inactive_timer; + +	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); +	timer->function = inactive_task_timer; +} +  #ifdef CONFIG_SMP  static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) @@ -946,10 +1371,12 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,  	 * parameters of the task might need updating. Otherwise,  	 * we want a replenishment of its runtime.  	 */ -	if (flags & ENQUEUE_WAKEUP) +	if (flags & ENQUEUE_WAKEUP) { +		task_contending(dl_se, flags);  		update_dl_entity(dl_se, pi_se); -	else if (flags & ENQUEUE_REPLENISH) +	} else if (flags & ENQUEUE_REPLENISH) {  		replenish_dl_entity(dl_se, pi_se); +	}  	__enqueue_dl_entity(dl_se);  } @@ -959,11 +1386,6 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)  	__dequeue_dl_entity(dl_se);  } -static inline bool dl_is_constrained(struct sched_dl_entity *dl_se) -{ -	return dl_se->dl_deadline < dl_se->dl_period; -} -  static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)  {  	struct task_struct *pi_task = rt_mutex_get_top_task(p); @@ -995,17 +1417,32 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)  	 * If that is the case, the task will be throttled and  	 * the replenishment timer will be set to the next period.  	 */ -	if (!p->dl.dl_throttled && dl_is_constrained(&p->dl)) +	if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))  		dl_check_constrained_dl(&p->dl); +	if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) { +		add_rq_bw(p->dl.dl_bw, &rq->dl); +		add_running_bw(p->dl.dl_bw, &rq->dl); +	} +  	/* -	 * If p is throttled, we do nothing. In fact, if it exhausted +	 * If p is throttled, we do not enqueue it. In fact, if it exhausted  	 * its budget it needs a replenishment and, since it now is on  	 * its rq, the bandwidth timer callback (which clearly has not  	 * run yet) will take care of this. +	 * However, the active utilization does not depend on the fact +	 * that the task is on the runqueue or not (but depends on the +	 * task's state - in GRUB parlance, "inactive" vs "active contending"). +	 * In other words, even if a task is throttled its utilization must +	 * be counted in the active utilization; hence, we need to call +	 * add_running_bw().  	 */ -	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) +	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) { +		if (flags & ENQUEUE_WAKEUP) +			task_contending(&p->dl, flags); +  		return; +	}  	enqueue_dl_entity(&p->dl, pi_se, flags); @@ -1023,6 +1460,23 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)  {  	update_curr_dl(rq);  	__dequeue_task_dl(rq, p, flags); + +	if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) { +		sub_running_bw(p->dl.dl_bw, &rq->dl); +		sub_rq_bw(p->dl.dl_bw, &rq->dl); +	} + +	/* +	 * This check allows to start the inactive timer (or to immediately +	 * decrease the active utilization, if needed) in two cases: +	 * when the task blocks and when it is terminating +	 * (p->state == TASK_DEAD). We can handle the two cases in the same +	 * way, because from GRUB's point of view the same thing is happening +	 * (the task moves from "active contending" to "active non contending" +	 * or "inactive") +	 */ +	if (flags & DEQUEUE_SLEEP) +		task_non_contending(p);  }  /* @@ -1100,6 +1554,37 @@ out:  	return cpu;  } +static void migrate_task_rq_dl(struct task_struct *p) +{ +	struct rq *rq; + +	if (p->state != TASK_WAKING) +		return; + +	rq = task_rq(p); +	/* +	 * Since p->state == TASK_WAKING, set_task_cpu() has been called +	 * from try_to_wake_up(). Hence, p->pi_lock is locked, but +	 * rq->lock is not... So, lock it +	 */ +	raw_spin_lock(&rq->lock); +	if (p->dl.dl_non_contending) { +		sub_running_bw(p->dl.dl_bw, &rq->dl); +		p->dl.dl_non_contending = 0; +		/* +		 * If the timer handler is currently running and the +		 * timer cannot be cancelled, inactive_task_timer() +		 * will see that dl_not_contending is not set, and +		 * will not touch the rq's active utilization, +		 * so we are still safe. +		 */ +		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) +			put_task_struct(p); +	} +	sub_rq_bw(p->dl.dl_bw, &rq->dl); +	raw_spin_unlock(&rq->lock); +} +  static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)  {  	/* @@ -1255,19 +1740,6 @@ static void task_fork_dl(struct task_struct *p)  	 */  } -static void task_dead_dl(struct task_struct *p) -{ -	struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); - -	/* -	 * Since we are TASK_DEAD we won't slip out of the domain! -	 */ -	raw_spin_lock_irq(&dl_b->lock); -	/* XXX we should retain the bw until 0-lag */ -	dl_b->total_bw -= p->dl.dl_bw; -	raw_spin_unlock_irq(&dl_b->lock); -} -  static void set_curr_task_dl(struct rq *rq)  {  	struct task_struct *p = rq->curr; @@ -1533,7 +2005,7 @@ retry:  		 * then possible that next_task has migrated.  		 */  		task = pick_next_pushable_dl_task(rq); -		if (task_cpu(next_task) == rq->cpu && task == next_task) { +		if (task == next_task) {  			/*  			 * The task is still there. We don't try  			 * again, some other cpu will pull it when ready. @@ -1551,7 +2023,11 @@ retry:  	}  	deactivate_task(rq, next_task, 0); +	sub_running_bw(next_task->dl.dl_bw, &rq->dl); +	sub_rq_bw(next_task->dl.dl_bw, &rq->dl);  	set_task_cpu(next_task, later_rq->cpu); +	add_rq_bw(next_task->dl.dl_bw, &later_rq->dl); +	add_running_bw(next_task->dl.dl_bw, &later_rq->dl);  	activate_task(later_rq, next_task, 0);  	ret = 1; @@ -1639,7 +2115,11 @@ static void pull_dl_task(struct rq *this_rq)  			resched = true;  			deactivate_task(src_rq, p, 0); +			sub_running_bw(p->dl.dl_bw, &src_rq->dl); +			sub_rq_bw(p->dl.dl_bw, &src_rq->dl);  			set_task_cpu(p, this_cpu); +			add_rq_bw(p->dl.dl_bw, &this_rq->dl); +			add_running_bw(p->dl.dl_bw, &this_rq->dl);  			activate_task(this_rq, p, 0);  			dmin = p->dl.deadline; @@ -1695,7 +2175,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,  		 * until we complete the update.  		 */  		raw_spin_lock(&src_dl_b->lock); -		__dl_clear(src_dl_b, p->dl.dl_bw); +		__dl_clear(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));  		raw_spin_unlock(&src_dl_b->lock);  	} @@ -1737,13 +2217,26 @@ void __init init_sched_dl_class(void)  static void switched_from_dl(struct rq *rq, struct task_struct *p)  {  	/* -	 * Start the deadline timer; if we switch back to dl before this we'll -	 * continue consuming our current CBS slice. If we stay outside of -	 * SCHED_DEADLINE until the deadline passes, the timer will reset the -	 * task. +	 * task_non_contending() can start the "inactive timer" (if the 0-lag +	 * time is in the future). If the task switches back to dl before +	 * the "inactive timer" fires, it can continue to consume its current +	 * runtime using its current deadline. If it stays outside of +	 * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer() +	 * will reset the task parameters.  	 */ -	if (!start_dl_timer(p)) -		__dl_clear_params(p); +	if (task_on_rq_queued(p) && p->dl.dl_runtime) +		task_non_contending(p); + +	if (!task_on_rq_queued(p)) +		sub_rq_bw(p->dl.dl_bw, &rq->dl); + +	/* +	 * We cannot use inactive_task_timer() to invoke sub_running_bw() +	 * at the 0-lag time, because the task could have been migrated +	 * while SCHED_OTHER in the meanwhile. +	 */ +	if (p->dl.dl_non_contending) +		p->dl.dl_non_contending = 0;  	/*  	 * Since this might be the only -deadline task on the rq, @@ -1762,11 +2255,15 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)   */  static void switched_to_dl(struct rq *rq, struct task_struct *p)  { +	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) +		put_task_struct(p);  	/* If p is not queued we will update its parameters at next wakeup. */ -	if (!task_on_rq_queued(p)) -		return; +	if (!task_on_rq_queued(p)) { +		add_rq_bw(p->dl.dl_bw, &rq->dl); +		return; +	}  	/*  	 * If p is boosted we already updated its params in  	 * rt_mutex_setprio()->enqueue_task(..., ENQUEUE_REPLENISH), @@ -1836,6 +2333,7 @@ const struct sched_class dl_sched_class = {  #ifdef CONFIG_SMP  	.select_task_rq		= select_task_rq_dl, +	.migrate_task_rq	= migrate_task_rq_dl,  	.set_cpus_allowed       = set_cpus_allowed_dl,  	.rq_online              = rq_online_dl,  	.rq_offline             = rq_offline_dl, @@ -1845,7 +2343,6 @@ const struct sched_class dl_sched_class = {  	.set_curr_task		= set_curr_task_dl,  	.task_tick		= task_tick_dl,  	.task_fork              = task_fork_dl, -	.task_dead		= task_dead_dl,  	.prio_changed           = prio_changed_dl,  	.switched_from		= switched_from_dl, @@ -1854,6 +2351,317 @@ const struct sched_class dl_sched_class = {  	.update_curr		= update_curr_dl,  }; +int sched_dl_global_validate(void) +{ +	u64 runtime = global_rt_runtime(); +	u64 period = global_rt_period(); +	u64 new_bw = to_ratio(period, runtime); +	struct dl_bw *dl_b; +	int cpu, ret = 0; +	unsigned long flags; + +	/* +	 * Here we want to check the bandwidth not being set to some +	 * value smaller than the currently allocated bandwidth in +	 * any of the root_domains. +	 * +	 * FIXME: Cycling on all the CPUs is overdoing, but simpler than +	 * cycling on root_domains... Discussion on different/better +	 * solutions is welcome! +	 */ +	for_each_possible_cpu(cpu) { +		rcu_read_lock_sched(); +		dl_b = dl_bw_of(cpu); + +		raw_spin_lock_irqsave(&dl_b->lock, flags); +		if (new_bw < dl_b->total_bw) +			ret = -EBUSY; +		raw_spin_unlock_irqrestore(&dl_b->lock, flags); + +		rcu_read_unlock_sched(); + +		if (ret) +			break; +	} + +	return ret; +} + +void init_dl_rq_bw_ratio(struct dl_rq *dl_rq) +{ +	if (global_rt_runtime() == RUNTIME_INF) { +		dl_rq->bw_ratio = 1 << RATIO_SHIFT; +		dl_rq->extra_bw = 1 << BW_SHIFT; +	} else { +		dl_rq->bw_ratio = to_ratio(global_rt_runtime(), +			  global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT); +		dl_rq->extra_bw = to_ratio(global_rt_period(), +						    global_rt_runtime()); +	} +} + +void sched_dl_do_global(void) +{ +	u64 new_bw = -1; +	struct dl_bw *dl_b; +	int cpu; +	unsigned long flags; + +	def_dl_bandwidth.dl_period = global_rt_period(); +	def_dl_bandwidth.dl_runtime = global_rt_runtime(); + +	if (global_rt_runtime() != RUNTIME_INF) +		new_bw = to_ratio(global_rt_period(), global_rt_runtime()); + +	/* +	 * FIXME: As above... +	 */ +	for_each_possible_cpu(cpu) { +		rcu_read_lock_sched(); +		dl_b = dl_bw_of(cpu); + +		raw_spin_lock_irqsave(&dl_b->lock, flags); +		dl_b->bw = new_bw; +		raw_spin_unlock_irqrestore(&dl_b->lock, flags); + +		rcu_read_unlock_sched(); +		init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl); +	} +} + +/* + * We must be sure that accepting a new task (or allowing changing the + * parameters of an existing one) is consistent with the bandwidth + * constraints. If yes, this function also accordingly updates the currently + * allocated bandwidth to reflect the new situation. + * + * This function is called while holding p's rq->lock. + */ +int sched_dl_overflow(struct task_struct *p, int policy, +		      const struct sched_attr *attr) +{ +	struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); +	u64 period = attr->sched_period ?: attr->sched_deadline; +	u64 runtime = attr->sched_runtime; +	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; +	int cpus, err = -1; + +	/* !deadline task may carry old deadline bandwidth */ +	if (new_bw == p->dl.dl_bw && task_has_dl_policy(p)) +		return 0; + +	/* +	 * Either if a task, enters, leave, or stays -deadline but changes +	 * its parameters, we may need to update accordingly the total +	 * allocated bandwidth of the container. +	 */ +	raw_spin_lock(&dl_b->lock); +	cpus = dl_bw_cpus(task_cpu(p)); +	if (dl_policy(policy) && !task_has_dl_policy(p) && +	    !__dl_overflow(dl_b, cpus, 0, new_bw)) { +		if (hrtimer_active(&p->dl.inactive_timer)) +			__dl_clear(dl_b, p->dl.dl_bw, cpus); +		__dl_add(dl_b, new_bw, cpus); +		err = 0; +	} else if (dl_policy(policy) && task_has_dl_policy(p) && +		   !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) { +		/* +		 * XXX this is slightly incorrect: when the task +		 * utilization decreases, we should delay the total +		 * utilization change until the task's 0-lag point. +		 * But this would require to set the task's "inactive +		 * timer" when the task is not inactive. +		 */ +		__dl_clear(dl_b, p->dl.dl_bw, cpus); +		__dl_add(dl_b, new_bw, cpus); +		dl_change_utilization(p, new_bw); +		err = 0; +	} else if (!dl_policy(policy) && task_has_dl_policy(p)) { +		/* +		 * Do not decrease the total deadline utilization here, +		 * switched_from_dl() will take care to do it at the correct +		 * (0-lag) time. +		 */ +		err = 0; +	} +	raw_spin_unlock(&dl_b->lock); + +	return err; +} + +/* + * This function initializes the sched_dl_entity of a newly becoming + * SCHED_DEADLINE task. + * + * Only the static values are considered here, the actual runtime and the + * absolute deadline will be properly calculated when the task is enqueued + * for the first time with its new policy. + */ +void __setparam_dl(struct task_struct *p, const struct sched_attr *attr) +{ +	struct sched_dl_entity *dl_se = &p->dl; + +	dl_se->dl_runtime = attr->sched_runtime; +	dl_se->dl_deadline = attr->sched_deadline; +	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; +	dl_se->flags = attr->sched_flags; +	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); +	dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime); +} + +void __getparam_dl(struct task_struct *p, struct sched_attr *attr) +{ +	struct sched_dl_entity *dl_se = &p->dl; + +	attr->sched_priority = p->rt_priority; +	attr->sched_runtime = dl_se->dl_runtime; +	attr->sched_deadline = dl_se->dl_deadline; +	attr->sched_period = dl_se->dl_period; +	attr->sched_flags = dl_se->flags; +} + +/* + * This function validates the new parameters of a -deadline task. + * We ask for the deadline not being zero, and greater or equal + * than the runtime, as well as the period of being zero or + * greater than deadline. Furthermore, we have to be sure that + * user parameters are above the internal resolution of 1us (we + * check sched_runtime only since it is always the smaller one) and + * below 2^63 ns (we have to check both sched_deadline and + * sched_period, as the latter can be zero). + */ +bool __checkparam_dl(const struct sched_attr *attr) +{ +	/* deadline != 0 */ +	if (attr->sched_deadline == 0) +		return false; + +	/* +	 * Since we truncate DL_SCALE bits, make sure we're at least +	 * that big. +	 */ +	if (attr->sched_runtime < (1ULL << DL_SCALE)) +		return false; + +	/* +	 * Since we use the MSB for wrap-around and sign issues, make +	 * sure it's not set (mind that period can be equal to zero). +	 */ +	if (attr->sched_deadline & (1ULL << 63) || +	    attr->sched_period & (1ULL << 63)) +		return false; + +	/* runtime <= deadline <= period (if period != 0) */ +	if ((attr->sched_period != 0 && +	     attr->sched_period < attr->sched_deadline) || +	    attr->sched_deadline < attr->sched_runtime) +		return false; + +	return true; +} + +/* + * This function clears the sched_dl_entity static params. + */ +void __dl_clear_params(struct task_struct *p) +{ +	struct sched_dl_entity *dl_se = &p->dl; + +	dl_se->dl_runtime = 0; +	dl_se->dl_deadline = 0; +	dl_se->dl_period = 0; +	dl_se->flags = 0; +	dl_se->dl_bw = 0; +	dl_se->dl_density = 0; + +	dl_se->dl_throttled = 0; +	dl_se->dl_yielded = 0; +	dl_se->dl_non_contending = 0; +} + +bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) +{ +	struct sched_dl_entity *dl_se = &p->dl; + +	if (dl_se->dl_runtime != attr->sched_runtime || +	    dl_se->dl_deadline != attr->sched_deadline || +	    dl_se->dl_period != attr->sched_period || +	    dl_se->flags != attr->sched_flags) +		return true; + +	return false; +} + +#ifdef CONFIG_SMP +int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed) +{ +	unsigned int dest_cpu = cpumask_any_and(cpu_active_mask, +							cs_cpus_allowed); +	struct dl_bw *dl_b; +	bool overflow; +	int cpus, ret; +	unsigned long flags; + +	rcu_read_lock_sched(); +	dl_b = dl_bw_of(dest_cpu); +	raw_spin_lock_irqsave(&dl_b->lock, flags); +	cpus = dl_bw_cpus(dest_cpu); +	overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw); +	if (overflow) +		ret = -EBUSY; +	else { +		/* +		 * We reserve space for this task in the destination +		 * root_domain, as we can't fail after this point. +		 * We will free resources in the source root_domain +		 * later on (see set_cpus_allowed_dl()). +		 */ +		__dl_add(dl_b, p->dl.dl_bw, cpus); +		ret = 0; +	} +	raw_spin_unlock_irqrestore(&dl_b->lock, flags); +	rcu_read_unlock_sched(); +	return ret; +} + +int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, +				 const struct cpumask *trial) +{ +	int ret = 1, trial_cpus; +	struct dl_bw *cur_dl_b; +	unsigned long flags; + +	rcu_read_lock_sched(); +	cur_dl_b = dl_bw_of(cpumask_any(cur)); +	trial_cpus = cpumask_weight(trial); + +	raw_spin_lock_irqsave(&cur_dl_b->lock, flags); +	if (cur_dl_b->bw != -1 && +	    cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw) +		ret = 0; +	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags); +	rcu_read_unlock_sched(); +	return ret; +} + +bool dl_cpu_busy(unsigned int cpu) +{ +	unsigned long flags; +	struct dl_bw *dl_b; +	bool overflow; +	int cpus; + +	rcu_read_lock_sched(); +	dl_b = dl_bw_of(cpu); +	raw_spin_lock_irqsave(&dl_b->lock, flags); +	cpus = dl_bw_cpus(cpu); +	overflow = __dl_overflow(dl_b, cpus, 0, 0); +	raw_spin_unlock_irqrestore(&dl_b->lock, flags); +	rcu_read_unlock_sched(); +	return overflow; +} +#endif +  #ifdef CONFIG_SCHED_DEBUG  extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);  |