diff options
Diffstat (limited to 'kernel/sched/fair.c')
| -rw-r--r-- | kernel/sched/fair.c | 141 | 
1 files changed, 47 insertions, 94 deletions
| diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 70ba32e08a23..5c09ddf8c832 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH)   * @@ -5356,91 +5357,62 @@ static int wake_wide(struct task_struct *p)  	return 1;  } -struct llc_stats { -	unsigned long	nr_running; -	unsigned long	load; -	unsigned long	capacity; -	int		has_capacity; -}; +/* + * The purpose of wake_affine() is to quickly determine on which CPU we can run + * soonest. For the purpose of speed we only consider the waking and previous + * CPU. + * + * wake_affine_idle() - only considers 'now', it check if the waking CPU is (or + *			will be) idle. + * + * wake_affine_weight() - considers the weight to reflect the average + *			  scheduling latency of the CPUs. This seems to work + *			  for the overloaded case. + */ -static bool get_llc_stats(struct llc_stats *stats, int cpu) +static bool +wake_affine_idle(struct sched_domain *sd, struct task_struct *p, +		 int this_cpu, int prev_cpu, int sync)  { -	struct sched_domain_shared *sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - -	if (!sds) -		return false; +	if (idle_cpu(this_cpu)) +		return true; -	stats->nr_running	= READ_ONCE(sds->nr_running); -	stats->load		= READ_ONCE(sds->load); -	stats->capacity		= READ_ONCE(sds->capacity); -	stats->has_capacity	= stats->nr_running < per_cpu(sd_llc_size, cpu); +	if (sync && cpu_rq(this_cpu)->nr_running == 1) +		return true; -	return true; +	return false;  } -/* - * Can a task be moved from prev_cpu to this_cpu without causing a load - * imbalance that would trigger the load balancer? - * - * Since we're running on 'stale' values, we might in fact create an imbalance - * but recomputing these values is expensive, as that'd mean iteration 2 cache - * domains worth of CPUs. - */  static bool -wake_affine_llc(struct sched_domain *sd, struct task_struct *p, -		int this_cpu, int prev_cpu, int sync) +wake_affine_weight(struct sched_domain *sd, struct task_struct *p, +		   int this_cpu, int prev_cpu, int sync)  { -	struct llc_stats prev_stats, this_stats;  	s64 this_eff_load, prev_eff_load;  	unsigned long task_load; -	if (!get_llc_stats(&prev_stats, prev_cpu) || -	    !get_llc_stats(&this_stats, this_cpu)) -		return false; +	this_eff_load = target_load(this_cpu, sd->wake_idx); +	prev_eff_load = source_load(prev_cpu, sd->wake_idx); -	/* -	 * If sync wakeup then subtract the (maximum possible) -	 * effect of the currently running task from the load -	 * of the current LLC. -	 */  	if (sync) {  		unsigned long current_load = task_h_load(current); -		/* in this case load hits 0 and this LLC is considered 'idle' */ -		if (current_load > this_stats.load) +		if (current_load > this_eff_load)  			return true; -		this_stats.load -= current_load; +		this_eff_load -= current_load;  	} -	/* -	 * The has_capacity stuff is not SMT aware, but by trying to balance -	 * the nr_running on both ends we try and fill the domain at equal -	 * rates, thereby first consuming cores before siblings. -	 */ - -	/* if the old cache has capacity, stay there */ -	if (prev_stats.has_capacity && prev_stats.nr_running < this_stats.nr_running+1) -		return false; - -	/* if this cache has capacity, come here */ -	if (this_stats.has_capacity && this_stats.nr_running+1 < prev_stats.nr_running) -		return true; - -	/* -	 * Check to see if we can move the load without causing too much -	 * imbalance. -	 */  	task_load = task_h_load(p); -	this_eff_load = 100; -	this_eff_load *= prev_stats.capacity; - -	prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2; -	prev_eff_load *= this_stats.capacity; +	this_eff_load += task_load; +	if (sched_feat(WA_BIAS)) +		this_eff_load *= 100; +	this_eff_load *= capacity_of(prev_cpu); -	this_eff_load *= this_stats.load + task_load; -	prev_eff_load *= prev_stats.load - task_load; +	prev_eff_load -= task_load; +	if (sched_feat(WA_BIAS)) +		prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; +	prev_eff_load *= capacity_of(this_cpu);  	return this_eff_load <= prev_eff_load;  } @@ -5449,22 +5421,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,  		       int prev_cpu, int sync)  {  	int this_cpu = smp_processor_id(); -	bool affine; +	bool affine = false; -	/* -	 * Default to no affine wakeups; wake_affine() should not effect a task -	 * placement the load-balancer feels inclined to undo. The conservative -	 * option is therefore to not move tasks when they wake up. -	 */ -	affine = false; +	if (sched_feat(WA_IDLE) && !affine) +		affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync); -	/* -	 * If the wakeup is across cache domains, try to evaluate if movement -	 * makes sense, otherwise rely on select_idle_siblings() to do -	 * placement inside the cache domain. -	 */ -	if (!cpus_share_cache(prev_cpu, this_cpu)) -		affine = wake_affine_llc(sd, p, this_cpu, prev_cpu, sync); +	if (sched_feat(WA_WEIGHT) && !affine) +		affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);  	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);  	if (affine) { @@ -7600,7 +7563,6 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq)   */  static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)  { -	struct sched_domain_shared *shared = env->sd->shared;  	struct sched_domain *child = env->sd->child;  	struct sched_group *sg = env->sd->groups;  	struct sg_lb_stats *local = &sds->local_stat; @@ -7672,22 +7634,6 @@ next_group:  		if (env->dst_rq->rd->overload != overload)  			env->dst_rq->rd->overload = overload;  	} - -	if (!shared) -		return; - -	/* -	 * Since these are sums over groups they can contain some CPUs -	 * multiple times for the NUMA domains. -	 * -	 * Currently only wake_affine_llc() and find_busiest_group() -	 * uses these numbers, only the last is affected by this problem. -	 * -	 * XXX fix that. -	 */ -	WRITE_ONCE(shared->nr_running,	sds->total_running); -	WRITE_ONCE(shared->load,	sds->total_load); -	WRITE_ONCE(shared->capacity,	sds->total_capacity);  }  /** @@ -8098,6 +8044,13 @@ static int should_we_balance(struct lb_env *env)  	int cpu, balance_cpu = -1;  	/* +	 * Ensure the balancing environment is consistent; can happen +	 * when the softirq triggers 'during' hotplug. +	 */ +	if (!cpumask_test_cpu(env->dst_cpu, env->cpus)) +		return 0; + +	/*  	 * In the newly idle case, we will allow all the cpu's  	 * to do the newly idle load balance.  	 */ |