diff options
Diffstat (limited to 'kernel/sched')
| -rw-r--r-- | kernel/sched/Makefile | 1 | ||||
| -rw-r--r-- | kernel/sched/autogroup.c | 1 | ||||
| -rw-r--r-- | kernel/sched/autogroup.h | 1 | ||||
| -rw-r--r-- | kernel/sched/completion.c | 1 | ||||
| -rw-r--r-- | kernel/sched/core.c | 24 | ||||
| -rw-r--r-- | kernel/sched/cpuacct.c | 1 | ||||
| -rw-r--r-- | kernel/sched/cpuacct.h | 1 | ||||
| -rw-r--r-- | kernel/sched/cpudeadline.h | 1 | ||||
| -rw-r--r-- | kernel/sched/cpupri.h | 1 | ||||
| -rw-r--r-- | kernel/sched/deadline.c | 1 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 2 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 141 | ||||
| -rw-r--r-- | kernel/sched/features.h | 4 | ||||
| -rw-r--r-- | kernel/sched/idle_task.c | 1 | ||||
| -rw-r--r-- | kernel/sched/loadavg.c | 1 | ||||
| -rw-r--r-- | kernel/sched/membarrier.c | 34 | ||||
| -rw-r--r-- | kernel/sched/rt.c | 1 | ||||
| -rw-r--r-- | kernel/sched/sched-pelt.h | 1 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 1 | ||||
| -rw-r--r-- | kernel/sched/stats.c | 1 | ||||
| -rw-r--r-- | kernel/sched/stats.h | 1 | ||||
| -rw-r--r-- | kernel/sched/stop_task.c | 1 | ||||
| -rw-r--r-- | kernel/sched/swait.c | 1 | ||||
| -rw-r--r-- | kernel/sched/topology.c | 1 | 
24 files changed, 123 insertions, 101 deletions
| diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 78f54932ea1d..a9ee16bbc693 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0  ifdef CONFIG_FUNCTION_TRACER  CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)  endif diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index de6d7f4dfcb5..a43df5193538 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include "sched.h"  #include <linux/proc_fs.h> diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h index ce40c810cd5c..27cd22b89824 100644 --- a/kernel/sched/autogroup.h +++ b/kernel/sched/autogroup.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifdef CONFIG_SCHED_AUTOGROUP  #include <linux/kref.h> diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index cc873075c3bd..2ddaec40956f 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Generic wait-for-completion handler;   * diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 18a6966567da..d17c5da523a0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5166,6 +5166,28 @@ void sched_show_task(struct task_struct *p)  	put_task_stack(p);  } +static inline bool +state_filter_match(unsigned long state_filter, struct task_struct *p) +{ +	/* no filter, everything matches */ +	if (!state_filter) +		return true; + +	/* filter, but doesn't match */ +	if (!(p->state & state_filter)) +		return false; + +	/* +	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows +	 * TASK_KILLABLE). +	 */ +	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) +		return false; + +	return true; +} + +  void show_state_filter(unsigned long state_filter)  {  	struct task_struct *g, *p; @@ -5188,7 +5210,7 @@ void show_state_filter(unsigned long state_filter)  		 */  		touch_nmi_watchdog();  		touch_all_softlockup_watchdogs(); -		if (!state_filter || (p->state & state_filter)) +		if (state_filter_match(state_filter, p))  			sched_show_task(p);  	} diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index f95ab29a45d0..44ab32a4fab6 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/cgroup.h>  #include <linux/slab.h>  #include <linux/percpu.h> diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h index ba72807c73d4..a8358a57a316 100644 --- a/kernel/sched/cpuacct.h +++ b/kernel/sched/cpuacct.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifdef CONFIG_CGROUP_CPUACCT  extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index f7da8c55bba0..b010d26e108e 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef _LINUX_CPUDL_H  #define _LINUX_CPUDL_H diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h index 63cbb9ca0496..bab050019071 100644 --- a/kernel/sched/cpupri.h +++ b/kernel/sched/cpupri.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef _LINUX_CPUPRI_H  #define _LINUX_CPUPRI_H diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 0191ec7667c3..4ae5c1ea90e2 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Deadline Scheduling Class (SCHED_DEADLINE)   * diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 01217fb5a5de..2f93e4a2d9f6 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -466,8 +466,6 @@ static char *task_group_path(struct task_group *tg)  }  #endif -static const char stat_nam[] = TASK_STATE_TO_CHAR_STR; -  static void  print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)  { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 70ba32e08a23..5c09ddf8c832 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH)   * @@ -5356,91 +5357,62 @@ static int wake_wide(struct task_struct *p)  	return 1;  } -struct llc_stats { -	unsigned long	nr_running; -	unsigned long	load; -	unsigned long	capacity; -	int		has_capacity; -}; +/* + * The purpose of wake_affine() is to quickly determine on which CPU we can run + * soonest. For the purpose of speed we only consider the waking and previous + * CPU. + * + * wake_affine_idle() - only considers 'now', it check if the waking CPU is (or + *			will be) idle. + * + * wake_affine_weight() - considers the weight to reflect the average + *			  scheduling latency of the CPUs. This seems to work + *			  for the overloaded case. + */ -static bool get_llc_stats(struct llc_stats *stats, int cpu) +static bool +wake_affine_idle(struct sched_domain *sd, struct task_struct *p, +		 int this_cpu, int prev_cpu, int sync)  { -	struct sched_domain_shared *sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - -	if (!sds) -		return false; +	if (idle_cpu(this_cpu)) +		return true; -	stats->nr_running	= READ_ONCE(sds->nr_running); -	stats->load		= READ_ONCE(sds->load); -	stats->capacity		= READ_ONCE(sds->capacity); -	stats->has_capacity	= stats->nr_running < per_cpu(sd_llc_size, cpu); +	if (sync && cpu_rq(this_cpu)->nr_running == 1) +		return true; -	return true; +	return false;  } -/* - * Can a task be moved from prev_cpu to this_cpu without causing a load - * imbalance that would trigger the load balancer? - * - * Since we're running on 'stale' values, we might in fact create an imbalance - * but recomputing these values is expensive, as that'd mean iteration 2 cache - * domains worth of CPUs. - */  static bool -wake_affine_llc(struct sched_domain *sd, struct task_struct *p, -		int this_cpu, int prev_cpu, int sync) +wake_affine_weight(struct sched_domain *sd, struct task_struct *p, +		   int this_cpu, int prev_cpu, int sync)  { -	struct llc_stats prev_stats, this_stats;  	s64 this_eff_load, prev_eff_load;  	unsigned long task_load; -	if (!get_llc_stats(&prev_stats, prev_cpu) || -	    !get_llc_stats(&this_stats, this_cpu)) -		return false; +	this_eff_load = target_load(this_cpu, sd->wake_idx); +	prev_eff_load = source_load(prev_cpu, sd->wake_idx); -	/* -	 * If sync wakeup then subtract the (maximum possible) -	 * effect of the currently running task from the load -	 * of the current LLC. -	 */  	if (sync) {  		unsigned long current_load = task_h_load(current); -		/* in this case load hits 0 and this LLC is considered 'idle' */ -		if (current_load > this_stats.load) +		if (current_load > this_eff_load)  			return true; -		this_stats.load -= current_load; +		this_eff_load -= current_load;  	} -	/* -	 * The has_capacity stuff is not SMT aware, but by trying to balance -	 * the nr_running on both ends we try and fill the domain at equal -	 * rates, thereby first consuming cores before siblings. -	 */ - -	/* if the old cache has capacity, stay there */ -	if (prev_stats.has_capacity && prev_stats.nr_running < this_stats.nr_running+1) -		return false; - -	/* if this cache has capacity, come here */ -	if (this_stats.has_capacity && this_stats.nr_running+1 < prev_stats.nr_running) -		return true; - -	/* -	 * Check to see if we can move the load without causing too much -	 * imbalance. -	 */  	task_load = task_h_load(p); -	this_eff_load = 100; -	this_eff_load *= prev_stats.capacity; - -	prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2; -	prev_eff_load *= this_stats.capacity; +	this_eff_load += task_load; +	if (sched_feat(WA_BIAS)) +		this_eff_load *= 100; +	this_eff_load *= capacity_of(prev_cpu); -	this_eff_load *= this_stats.load + task_load; -	prev_eff_load *= prev_stats.load - task_load; +	prev_eff_load -= task_load; +	if (sched_feat(WA_BIAS)) +		prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; +	prev_eff_load *= capacity_of(this_cpu);  	return this_eff_load <= prev_eff_load;  } @@ -5449,22 +5421,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,  		       int prev_cpu, int sync)  {  	int this_cpu = smp_processor_id(); -	bool affine; +	bool affine = false; -	/* -	 * Default to no affine wakeups; wake_affine() should not effect a task -	 * placement the load-balancer feels inclined to undo. The conservative -	 * option is therefore to not move tasks when they wake up. -	 */ -	affine = false; +	if (sched_feat(WA_IDLE) && !affine) +		affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync); -	/* -	 * If the wakeup is across cache domains, try to evaluate if movement -	 * makes sense, otherwise rely on select_idle_siblings() to do -	 * placement inside the cache domain. -	 */ -	if (!cpus_share_cache(prev_cpu, this_cpu)) -		affine = wake_affine_llc(sd, p, this_cpu, prev_cpu, sync); +	if (sched_feat(WA_WEIGHT) && !affine) +		affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);  	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);  	if (affine) { @@ -7600,7 +7563,6 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq)   */  static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)  { -	struct sched_domain_shared *shared = env->sd->shared;  	struct sched_domain *child = env->sd->child;  	struct sched_group *sg = env->sd->groups;  	struct sg_lb_stats *local = &sds->local_stat; @@ -7672,22 +7634,6 @@ next_group:  		if (env->dst_rq->rd->overload != overload)  			env->dst_rq->rd->overload = overload;  	} - -	if (!shared) -		return; - -	/* -	 * Since these are sums over groups they can contain some CPUs -	 * multiple times for the NUMA domains. -	 * -	 * Currently only wake_affine_llc() and find_busiest_group() -	 * uses these numbers, only the last is affected by this problem. -	 * -	 * XXX fix that. -	 */ -	WRITE_ONCE(shared->nr_running,	sds->total_running); -	WRITE_ONCE(shared->load,	sds->total_load); -	WRITE_ONCE(shared->capacity,	sds->total_capacity);  }  /** @@ -8098,6 +8044,13 @@ static int should_we_balance(struct lb_env *env)  	int cpu, balance_cpu = -1;  	/* +	 * Ensure the balancing environment is consistent; can happen +	 * when the softirq triggers 'during' hotplug. +	 */ +	if (!cpumask_test_cpu(env->dst_cpu, env->cpus)) +		return 0; + +	/*  	 * In the newly idle case, we will allow all the cpu's  	 * to do the newly idle load balance.  	 */ diff --git a/kernel/sched/features.h b/kernel/sched/features.h index d3fb15555291..9552fd5854bf 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  /*   * Only give sleepers 50% of their service deficit. This allows   * them to run sooner, but does not allow tons of sleepers to @@ -81,3 +82,6 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)  SCHED_FEAT(LB_MIN, false)  SCHED_FEAT(ATTACH_AGE_LOAD, true) +SCHED_FEAT(WA_IDLE, true) +SCHED_FEAT(WA_WEIGHT, true) +SCHED_FEAT(WA_BIAS, true) diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 0c00172db63e..d518664cce4f 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include "sched.h"  /* diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index f14716a3522f..89a989e4d758 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * kernel/sched/loadavg.c   * diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index a92fddc22747..dd7908743dab 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -18,6 +18,7 @@  #include <linux/membarrier.h>  #include <linux/tick.h>  #include <linux/cpumask.h> +#include <linux/atomic.h>  #include "sched.h"	/* for cpu_rq(). */ @@ -26,21 +27,26 @@   * except MEMBARRIER_CMD_QUERY.   */  #define MEMBARRIER_CMD_BITMASK	\ -	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) +	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED	\ +	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)  static void ipi_mb(void *info)  {  	smp_mb();	/* IPIs should be serializing but paranoid. */  } -static void membarrier_private_expedited(void) +static int membarrier_private_expedited(void)  {  	int cpu;  	bool fallback = false;  	cpumask_var_t tmpmask; +	if (!(atomic_read(¤t->mm->membarrier_state) +			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) +		return -EPERM; +  	if (num_online_cpus() == 1) -		return; +		return 0;  	/*  	 * Matches memory barriers around rq->curr modification in @@ -94,6 +100,24 @@ static void membarrier_private_expedited(void)  	 * rq->curr modification in scheduler.  	 */  	smp_mb();	/* exit from system call is not a mb */ +	return 0; +} + +static void membarrier_register_private_expedited(void) +{ +	struct task_struct *p = current; +	struct mm_struct *mm = p->mm; + +	/* +	 * We need to consider threads belonging to different thread +	 * groups, which use the same mm. (CLONE_VM but not +	 * CLONE_THREAD). +	 */ +	if (atomic_read(&mm->membarrier_state) +			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) +		return; +	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, +			&mm->membarrier_state);  }  /** @@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)  			synchronize_sched();  		return 0;  	case MEMBARRIER_CMD_PRIVATE_EXPEDITED: -		membarrier_private_expedited(); +		return membarrier_private_expedited(); +	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: +		membarrier_register_private_expedited();  		return 0;  	default:  		return -EINVAL; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 0af5ca9e3e3f..3c96c80e0992 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR   * policies) diff --git a/kernel/sched/sched-pelt.h b/kernel/sched/sched-pelt.h index cd200d16529e..a26473674fb7 100644 --- a/kernel/sched/sched-pelt.h +++ b/kernel/sched/sched-pelt.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  /* Generated by Documentation/scheduler/sched-pelt; do not modify. */  static const u32 runnable_avg_yN_inv[] = { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 14db76cd496f..3b448ba82225 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #include <linux/sched.h>  #include <linux/sched/autogroup.h> diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 87e2c9f0c33e..940b1fa1d2ce 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/slab.h>  #include <linux/fs.h> diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index d5710651043b..baf500d12b7c 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifdef CONFIG_SCHEDSTATS diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 9f69fb630853..45caf90b24cd 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include "sched.h"  /* diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 2227e183e202..9ff1555341ed 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/sched/signal.h>  #include <linux/swait.h> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index f1cf4f306a82..6798276d29af 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Scheduler topology setup/handling methods   */ |