diff options
Diffstat (limited to 'kernel/sched/debug.c')
| -rw-r--r-- | kernel/sched/debug.c | 435 | 
1 files changed, 229 insertions, 206 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 486f403a778b..9c882f20803e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -8,8 +8,6 @@   */  #include "sched.h" -static DEFINE_SPINLOCK(sched_debug_lock); -  /*   * This allows printing both to /proc/sched_debug and   * to the console @@ -169,245 +167,258 @@ static const struct file_operations sched_feat_fops = {  	.release	= single_release,  }; -__read_mostly bool sched_debug_enabled; +#ifdef CONFIG_SMP -static __init int sched_init_debug(void) +static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, +				   size_t cnt, loff_t *ppos)  { -	debugfs_create_file("sched_features", 0644, NULL, NULL, -			&sched_feat_fops); +	char buf[16]; -	debugfs_create_bool("sched_debug", 0644, NULL, -			&sched_debug_enabled); +	if (cnt > 15) +		cnt = 15; -	return 0; +	if (copy_from_user(&buf, ubuf, cnt)) +		return -EFAULT; + +	if (kstrtouint(buf, 10, &sysctl_sched_tunable_scaling)) +		return -EINVAL; + +	if (sched_update_scaling()) +		return -EINVAL; + +	*ppos += cnt; +	return cnt;  } -late_initcall(sched_init_debug); -#ifdef CONFIG_SMP +static int sched_scaling_show(struct seq_file *m, void *v) +{ +	seq_printf(m, "%d\n", sysctl_sched_tunable_scaling); +	return 0; +} -#ifdef CONFIG_SYSCTL +static int sched_scaling_open(struct inode *inode, struct file *filp) +{ +	return single_open(filp, sched_scaling_show, NULL); +} -static struct ctl_table sd_ctl_dir[] = { -	{ -		.procname	= "sched_domain", -		.mode		= 0555, -	}, -	{} +static const struct file_operations sched_scaling_fops = { +	.open		= sched_scaling_open, +	.write		= sched_scaling_write, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= single_release,  }; -static struct ctl_table sd_ctl_root[] = { -	{ -		.procname	= "kernel", -		.mode		= 0555, -		.child		= sd_ctl_dir, -	}, -	{} -}; +#endif /* SMP */ -static struct ctl_table *sd_alloc_ctl_entry(int n) +#ifdef CONFIG_PREEMPT_DYNAMIC + +static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, +				   size_t cnt, loff_t *ppos)  { -	struct ctl_table *entry = -		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL); +	char buf[16]; +	int mode; + +	if (cnt > 15) +		cnt = 15; + +	if (copy_from_user(&buf, ubuf, cnt)) +		return -EFAULT; -	return entry; +	buf[cnt] = 0; +	mode = sched_dynamic_mode(strstrip(buf)); +	if (mode < 0) +		return mode; + +	sched_dynamic_update(mode); + +	*ppos += cnt; + +	return cnt;  } -static void sd_free_ctl_entry(struct ctl_table **tablep) +static int sched_dynamic_show(struct seq_file *m, void *v)  { -	struct ctl_table *entry; - -	/* -	 * In the intermediate directories, both the child directory and -	 * procname are dynamically allocated and could fail but the mode -	 * will always be set. In the lowest directory the names are -	 * static strings and all have proc handlers. -	 */ -	for (entry = *tablep; entry->mode; entry++) { -		if (entry->child) -			sd_free_ctl_entry(&entry->child); -		if (entry->proc_handler == NULL) -			kfree(entry->procname); +	static const char * preempt_modes[] = { +		"none", "voluntary", "full" +	}; +	int i; + +	for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { +		if (preempt_dynamic_mode == i) +			seq_puts(m, "("); +		seq_puts(m, preempt_modes[i]); +		if (preempt_dynamic_mode == i) +			seq_puts(m, ")"); + +		seq_puts(m, " ");  	} -	kfree(*tablep); -	*tablep = NULL; +	seq_puts(m, "\n"); +	return 0;  } -static void -set_table_entry(struct ctl_table *entry, -		const char *procname, void *data, int maxlen, -		umode_t mode, proc_handler *proc_handler) +static int sched_dynamic_open(struct inode *inode, struct file *filp)  { -	entry->procname = procname; -	entry->data = data; -	entry->maxlen = maxlen; -	entry->mode = mode; -	entry->proc_handler = proc_handler; +	return single_open(filp, sched_dynamic_show, NULL);  } -static int sd_ctl_doflags(struct ctl_table *table, int write, -			  void *buffer, size_t *lenp, loff_t *ppos) +static const struct file_operations sched_dynamic_fops = { +	.open		= sched_dynamic_open, +	.write		= sched_dynamic_write, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= single_release, +}; + +#endif /* CONFIG_PREEMPT_DYNAMIC */ + +__read_mostly bool sched_debug_verbose; + +static const struct seq_operations sched_debug_sops; + +static int sched_debug_open(struct inode *inode, struct file *filp)  { -	unsigned long flags = *(unsigned long *)table->data; -	size_t data_size = 0; -	size_t len = 0; -	char *tmp, *buf; -	int idx; +	return seq_open(filp, &sched_debug_sops); +} -	if (write) -		return 0; +static const struct file_operations sched_debug_fops = { +	.open		= sched_debug_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; -	for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { -		char *name = sd_flag_debug[idx].name; +static struct dentry *debugfs_sched; -		/* Name plus whitespace */ -		data_size += strlen(name) + 1; -	} +static __init int sched_init_debug(void) +{ +	struct dentry __maybe_unused *numa; -	if (*ppos > data_size) { -		*lenp = 0; -		return 0; -	} +	debugfs_sched = debugfs_create_dir("sched", NULL); -	buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL); -	if (!buf) -		return -ENOMEM; +	debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops); +	debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose); +#ifdef CONFIG_PREEMPT_DYNAMIC +	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); +#endif -	for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { -		char *name = sd_flag_debug[idx].name; +	debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); +	debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity); +	debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity); -		len += snprintf(buf + len, strlen(name) + 2, "%s ", name); -	} +	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); +	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); -	tmp = buf + *ppos; -	len -= *ppos; +#ifdef CONFIG_SMP +	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); +	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); +	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); -	if (len > *lenp) -		len = *lenp; -	if (len) -		memcpy(buffer, tmp, len); -	if (len < *lenp) { -		((char *)buffer)[len] = '\n'; -		len++; -	} +	mutex_lock(&sched_domains_mutex); +	update_sched_domain_debugfs(); +	mutex_unlock(&sched_domains_mutex); +#endif -	*lenp = len; -	*ppos += len; +#ifdef CONFIG_NUMA_BALANCING +	numa = debugfs_create_dir("numa_balancing", debugfs_sched); -	kfree(buf); +	debugfs_create_u32("scan_delay_ms", 0644, numa, &sysctl_numa_balancing_scan_delay); +	debugfs_create_u32("scan_period_min_ms", 0644, numa, &sysctl_numa_balancing_scan_period_min); +	debugfs_create_u32("scan_period_max_ms", 0644, numa, &sysctl_numa_balancing_scan_period_max); +	debugfs_create_u32("scan_size_mb", 0644, numa, &sysctl_numa_balancing_scan_size); +#endif + +	debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);  	return 0;  } +late_initcall(sched_init_debug); + +#ifdef CONFIG_SMP -static struct ctl_table * -sd_alloc_ctl_domain_table(struct sched_domain *sd) +static cpumask_var_t		sd_sysctl_cpus; +static struct dentry		*sd_dentry; + +static int sd_flags_show(struct seq_file *m, void *v)  { -	struct ctl_table *table = sd_alloc_ctl_entry(9); - -	if (table == NULL) -		return NULL; - -	set_table_entry(&table[0], "min_interval",	  &sd->min_interval,	    sizeof(long), 0644, proc_doulongvec_minmax); -	set_table_entry(&table[1], "max_interval",	  &sd->max_interval,	    sizeof(long), 0644, proc_doulongvec_minmax); -	set_table_entry(&table[2], "busy_factor",	  &sd->busy_factor,	    sizeof(int),  0644, proc_dointvec_minmax); -	set_table_entry(&table[3], "imbalance_pct",	  &sd->imbalance_pct,	    sizeof(int),  0644, proc_dointvec_minmax); -	set_table_entry(&table[4], "cache_nice_tries",	  &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax); -	set_table_entry(&table[5], "flags",		  &sd->flags,		    sizeof(int),  0444, sd_ctl_doflags); -	set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax); -	set_table_entry(&table[7], "name",		  sd->name,	       CORENAME_MAX_SIZE, 0444, proc_dostring); -	/* &table[8] is terminator */ - -	return table; +	unsigned long flags = *(unsigned int *)m->private; +	int idx; + +	for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { +		seq_puts(m, sd_flag_debug[idx].name); +		seq_puts(m, " "); +	} +	seq_puts(m, "\n"); + +	return 0;  } -static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu) +static int sd_flags_open(struct inode *inode, struct file *file)  { -	struct ctl_table *entry, *table; -	struct sched_domain *sd; -	int domain_num = 0, i; -	char buf[32]; - -	for_each_domain(cpu, sd) -		domain_num++; -	entry = table = sd_alloc_ctl_entry(domain_num + 1); -	if (table == NULL) -		return NULL; - -	i = 0; -	for_each_domain(cpu, sd) { -		snprintf(buf, 32, "domain%d", i); -		entry->procname = kstrdup(buf, GFP_KERNEL); -		entry->mode = 0555; -		entry->child = sd_alloc_ctl_domain_table(sd); -		entry++; -		i++; -	} -	return table; +	return single_open(file, sd_flags_show, inode->i_private);  } -static cpumask_var_t		sd_sysctl_cpus; -static struct ctl_table_header	*sd_sysctl_header; +static const struct file_operations sd_flags_fops = { +	.open		= sd_flags_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= single_release, +}; -void register_sched_domain_sysctl(void) +static void register_sd(struct sched_domain *sd, struct dentry *parent)  { -	static struct ctl_table *cpu_entries; -	static struct ctl_table **cpu_idx; -	static bool init_done = false; -	char buf[32]; -	int i; +#define SDM(type, mode, member)	\ +	debugfs_create_##type(#member, mode, parent, &sd->member) -	if (!cpu_entries) { -		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1); -		if (!cpu_entries) -			return; +	SDM(ulong, 0644, min_interval); +	SDM(ulong, 0644, max_interval); +	SDM(u64,   0644, max_newidle_lb_cost); +	SDM(u32,   0644, busy_factor); +	SDM(u32,   0644, imbalance_pct); +	SDM(u32,   0644, cache_nice_tries); +	SDM(str,   0444, name); -		WARN_ON(sd_ctl_dir[0].child); -		sd_ctl_dir[0].child = cpu_entries; -	} +#undef SDM -	if (!cpu_idx) { -		struct ctl_table *e = cpu_entries; - -		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL); -		if (!cpu_idx) -			return; +	debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops); +} -		/* deal with sparse possible map */ -		for_each_possible_cpu(i) { -			cpu_idx[i] = e; -			e++; -		} -	} +void update_sched_domain_debugfs(void) +{ +	int cpu, i;  	if (!cpumask_available(sd_sysctl_cpus)) {  		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))  			return; -	} - -	if (!init_done) { -		init_done = true; -		/* init to possible to not have holes in @cpu_entries */  		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);  	} -	for_each_cpu(i, sd_sysctl_cpus) { -		struct ctl_table *e = cpu_idx[i]; +	if (!sd_dentry) +		sd_dentry = debugfs_create_dir("domains", debugfs_sched); -		if (e->child) -			sd_free_ctl_entry(&e->child); +	for_each_cpu(cpu, sd_sysctl_cpus) { +		struct sched_domain *sd; +		struct dentry *d_cpu; +		char buf[32]; -		if (!e->procname) { -			snprintf(buf, 32, "cpu%d", i); -			e->procname = kstrdup(buf, GFP_KERNEL); +		snprintf(buf, sizeof(buf), "cpu%d", cpu); +		debugfs_remove(debugfs_lookup(buf, sd_dentry)); +		d_cpu = debugfs_create_dir(buf, sd_dentry); + +		i = 0; +		for_each_domain(cpu, sd) { +			struct dentry *d_sd; + +			snprintf(buf, sizeof(buf), "domain%d", i); +			d_sd = debugfs_create_dir(buf, d_cpu); + +			register_sd(sd, d_sd); +			i++;  		} -		e->mode = 0555; -		e->child = sd_alloc_ctl_cpu_table(i); -		__cpumask_clear_cpu(i, sd_sysctl_cpus); +		__cpumask_clear_cpu(cpu, sd_sysctl_cpus);  	} - -	WARN_ON(sd_sysctl_header); -	sd_sysctl_header = register_sysctl_table(sd_ctl_root);  }  void dirty_sched_domain_sysctl(int cpu) @@ -416,13 +427,6 @@ void dirty_sched_domain_sysctl(int cpu)  		__cpumask_set_cpu(cpu, sd_sysctl_cpus);  } -/* may be called multiple times per register */ -void unregister_sched_domain_sysctl(void) -{ -	unregister_sysctl_table(sd_sysctl_header); -	sd_sysctl_header = NULL; -} -#endif /* CONFIG_SYSCTL */  #endif /* CONFIG_SMP */  #ifdef CONFIG_FAIR_GROUP_SCHED @@ -470,16 +474,37 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group  #endif  #ifdef CONFIG_CGROUP_SCHED +static DEFINE_SPINLOCK(sched_debug_lock);  static char group_path[PATH_MAX]; -static char *task_group_path(struct task_group *tg) +static void task_group_path(struct task_group *tg, char *path, int plen)  { -	if (autogroup_path(tg, group_path, PATH_MAX)) -		return group_path; +	if (autogroup_path(tg, path, plen)) +		return; -	cgroup_path(tg->css.cgroup, group_path, PATH_MAX); +	cgroup_path(tg->css.cgroup, path, plen); +} -	return group_path; +/* + * Only 1 SEQ_printf_task_group_path() caller can use the full length + * group_path[] for cgroup path. Other simultaneous callers will have + * to use a shorter stack buffer. A "..." suffix is appended at the end + * of the stack buffer so that it will show up in case the output length + * matches the given buffer size to indicate possible path name truncation. + */ +#define SEQ_printf_task_group_path(m, tg, fmt...)			\ +{									\ +	if (spin_trylock(&sched_debug_lock)) {				\ +		task_group_path(tg, group_path, sizeof(group_path));	\ +		SEQ_printf(m, fmt, group_path);				\ +		spin_unlock(&sched_debug_lock);				\ +	} else {							\ +		char buf[128];						\ +		char *bufend = buf + sizeof(buf) - 3;			\ +		task_group_path(tg, buf, bufend - buf);			\ +		strcpy(bufend - 1, "...");				\ +		SEQ_printf(m, fmt, buf);				\ +	}								\  }  #endif @@ -506,7 +531,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)  	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));  #endif  #ifdef CONFIG_CGROUP_SCHED -	SEQ_printf(m, " %s", task_group_path(task_group(p))); +	SEQ_printf_task_group_path(m, task_group(p), " %s")  #endif  	SEQ_printf(m, "\n"); @@ -543,7 +568,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)  #ifdef CONFIG_FAIR_GROUP_SCHED  	SEQ_printf(m, "\n"); -	SEQ_printf(m, "cfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); +	SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu);  #else  	SEQ_printf(m, "\n");  	SEQ_printf(m, "cfs_rq[%d]:\n", cpu); @@ -614,7 +639,7 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)  {  #ifdef CONFIG_RT_GROUP_SCHED  	SEQ_printf(m, "\n"); -	SEQ_printf(m, "rt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); +	SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu);  #else  	SEQ_printf(m, "\n");  	SEQ_printf(m, "rt_rq[%d]:\n", cpu); @@ -666,7 +691,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)  static void print_cpu(struct seq_file *m, int cpu)  {  	struct rq *rq = cpu_rq(cpu); -	unsigned long flags;  #ifdef CONFIG_X86  	{ @@ -717,13 +741,11 @@ do {									\  	}  #undef P -	spin_lock_irqsave(&sched_debug_lock, flags);  	print_cfs_stats(m, cpu);  	print_rt_stats(m, cpu);  	print_dl_stats(m, cpu);  	print_rq(m, rq, cpu); -	spin_unlock_irqrestore(&sched_debug_lock, flags);  	SEQ_printf(m, "\n");  } @@ -815,7 +837,7 @@ void sysrq_sched_debug_show(void)  }  /* - * This itererator needs some explanation. + * This iterator needs some explanation.   * It returns 1 for the header position.   * This means 2 is CPU 0.   * In a hotplugged system some CPUs, including CPU 0, may be missing so we have @@ -860,15 +882,6 @@ static const struct seq_operations sched_debug_sops = {  	.show		= sched_debug_show,  }; -static int __init init_sched_debug_procfs(void) -{ -	if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops)) -		return -ENOMEM; -	return 0; -} - -__initcall(init_sched_debug_procfs); -  #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))  #define __P(F) __PS(#F, F)  #define   P(F) __PS(#F, p->F) @@ -1033,3 +1046,13 @@ void proc_sched_set_task(struct task_struct *p)  	memset(&p->se.statistics, 0, sizeof(p->se.statistics));  #endif  } + +void resched_latency_warn(int cpu, u64 latency) +{ +	static DEFINE_RATELIMIT_STATE(latency_check_ratelimit, 60 * 60 * HZ, 1); + +	WARN(__ratelimit(&latency_check_ratelimit), +	     "sched: CPU %d need_resched set for > %llu ns (%d ticks) " +	     "without schedule\n", +	     cpu, latency, cpu_rq(cpu)->ticks_without_resched); +}  |