diff options
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
| -rw-r--r-- | drivers/cpufreq/intel_pstate.c | 208 | 
1 files changed, 199 insertions, 9 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 08960a55eb27..ece120da3353 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -221,6 +221,11 @@ struct global_params {   *			preference/bias   * @epp_saved:		Saved EPP/EPB during system suspend or CPU offline   *			operation + * @hwp_req_cached:	Cached value of the last HWP Request MSR + * @hwp_cap_cached:	Cached value of the last HWP Capabilities MSR + * @last_io_update:	Last time when IO wake flag was set + * @sched_flags:	Store scheduler flags for possible cross CPU update + * @hwp_boost_min:	Last HWP boosted min performance   *   * This structure stores per CPU instance data for all CPUs.   */ @@ -253,6 +258,11 @@ struct cpudata {  	s16 epp_policy;  	s16 epp_default;  	s16 epp_saved; +	u64 hwp_req_cached; +	u64 hwp_cap_cached; +	u64 last_io_update; +	unsigned int sched_flags; +	u32 hwp_boost_min;  };  static struct cpudata **all_cpu_data; @@ -284,7 +294,9 @@ struct pstate_funcs {  static struct pstate_funcs pstate_funcs __read_mostly;  static int hwp_active __read_mostly; +static int hwp_mode_bdw __read_mostly;  static bool per_cpu_limits __read_mostly; +static bool hwp_boost __read_mostly;  static struct cpufreq_driver *intel_pstate_driver __read_mostly; @@ -689,6 +701,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,  	u64 cap;  	rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); +	WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);  	if (global.no_turbo)  		*current_max = HWP_GUARANTEED_PERF(cap);  	else @@ -763,6 +776,7 @@ update_epp:  		intel_pstate_set_epb(cpu, epp);  	}  skip_epp: +	WRITE_ONCE(cpu_data->hwp_req_cached, value);  	wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);  } @@ -1020,6 +1034,30 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,  	return count;  } +static ssize_t show_hwp_dynamic_boost(struct kobject *kobj, +				struct attribute *attr, char *buf) +{ +	return sprintf(buf, "%u\n", hwp_boost); +} + +static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b, +				       const char *buf, size_t count) +{ +	unsigned int input; +	int ret; + +	ret = kstrtouint(buf, 10, &input); +	if (ret) +		return ret; + +	mutex_lock(&intel_pstate_driver_lock); +	hwp_boost = !!input; +	intel_pstate_update_policies(); +	mutex_unlock(&intel_pstate_driver_lock); + +	return count; +} +  show_one(max_perf_pct, max_perf_pct);  show_one(min_perf_pct, min_perf_pct); @@ -1029,6 +1067,7 @@ define_one_global_rw(max_perf_pct);  define_one_global_rw(min_perf_pct);  define_one_global_ro(turbo_pct);  define_one_global_ro(num_pstates); +define_one_global_rw(hwp_dynamic_boost);  static struct attribute *intel_pstate_attributes[] = {  	&status.attr, @@ -1069,6 +1108,11 @@ static void __init intel_pstate_sysfs_expose_params(void)  	rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);  	WARN_ON(rc); +	if (hwp_active) { +		rc = sysfs_create_file(intel_pstate_kobject, +				       &hwp_dynamic_boost.attr); +		WARN_ON(rc); +	}  }  /************************** sysfs end ************************/ @@ -1370,7 +1414,15 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)  	cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();  	cpu->pstate.scaling = pstate_funcs.get_scaling();  	cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; -	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + +	if (hwp_active && !hwp_mode_bdw) { +		unsigned int phy_max, current_max; + +		intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max); +		cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; +	} else { +		cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; +	}  	if (pstate_funcs.get_aperf_mperf_shift)  		cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); @@ -1381,6 +1433,116 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)  	intel_pstate_set_min_pstate(cpu);  } +/* + * Long hold time will keep high perf limits for long time, + * which negatively impacts perf/watt for some workloads, + * like specpower. 3ms is based on experiements on some + * workoads. + */ +static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC; + +static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) +{ +	u64 hwp_req = READ_ONCE(cpu->hwp_req_cached); +	u32 max_limit = (hwp_req & 0xff00) >> 8; +	u32 min_limit = (hwp_req & 0xff); +	u32 boost_level1; + +	/* +	 * Cases to consider (User changes via sysfs or boot time): +	 * If, P0 (Turbo max) = P1 (Guaranteed max) = min: +	 *	No boost, return. +	 * If, P0 (Turbo max) > P1 (Guaranteed max) = min: +	 *     Should result in one level boost only for P0. +	 * If, P0 (Turbo max) = P1 (Guaranteed max) > min: +	 *     Should result in two level boost: +	 *         (min + p1)/2 and P1. +	 * If, P0 (Turbo max) > P1 (Guaranteed max) > min: +	 *     Should result in three level boost: +	 *        (min + p1)/2, P1 and P0. +	 */ + +	/* If max and min are equal or already at max, nothing to boost */ +	if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit) +		return; + +	if (!cpu->hwp_boost_min) +		cpu->hwp_boost_min = min_limit; + +	/* level at half way mark between min and guranteed */ +	boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1; + +	if (cpu->hwp_boost_min < boost_level1) +		cpu->hwp_boost_min = boost_level1; +	else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) +		cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached); +	else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) && +		 max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) +		cpu->hwp_boost_min = max_limit; +	else +		return; + +	hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min; +	wrmsrl(MSR_HWP_REQUEST, hwp_req); +	cpu->last_update = cpu->sample.time; +} + +static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu) +{ +	if (cpu->hwp_boost_min) { +		bool expired; + +		/* Check if we are idle for hold time to boost down */ +		expired = time_after64(cpu->sample.time, cpu->last_update + +				       hwp_boost_hold_time_ns); +		if (expired) { +			wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached); +			cpu->hwp_boost_min = 0; +		} +	} +	cpu->last_update = cpu->sample.time; +} + +static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu, +						      u64 time) +{ +	cpu->sample.time = time; + +	if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) { +		bool do_io = false; + +		cpu->sched_flags = 0; +		/* +		 * Set iowait_boost flag and update time. Since IO WAIT flag +		 * is set all the time, we can't just conclude that there is +		 * some IO bound activity is scheduled on this CPU with just +		 * one occurrence. If we receive at least two in two +		 * consecutive ticks, then we treat as boost candidate. +		 */ +		if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC)) +			do_io = true; + +		cpu->last_io_update = time; + +		if (do_io) +			intel_pstate_hwp_boost_up(cpu); + +	} else { +		intel_pstate_hwp_boost_down(cpu); +	} +} + +static inline void intel_pstate_update_util_hwp(struct update_util_data *data, +						u64 time, unsigned int flags) +{ +	struct cpudata *cpu = container_of(data, struct cpudata, update_util); + +	cpu->sched_flags |= flags; + +	if (smp_processor_id() == cpu->cpu) +		intel_pstate_update_util_hwp_local(cpu, time); +} +  static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)  {  	struct sample *sample = &cpu->sample; @@ -1641,6 +1803,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {  	{}  }; +static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { +	ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), +	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs), +	{} +}; +  static int intel_pstate_init_cpu(unsigned int cpunum)  {  	struct cpudata *cpu; @@ -1671,6 +1839,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum)  			intel_pstate_disable_ee(cpunum);  		intel_pstate_hwp_enable(cpu); + +		id = x86_match_cpu(intel_pstate_hwp_boost_ids); +		if (id) +			hwp_boost = true;  	}  	intel_pstate_get_cpu_pstates(cpu); @@ -1684,7 +1856,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)  {  	struct cpudata *cpu = all_cpu_data[cpu_num]; -	if (hwp_active) +	if (hwp_active && !hwp_boost)  		return;  	if (cpu->update_util_set) @@ -1693,7 +1865,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)  	/* Prevent intel_pstate_update_util() from using stale data. */  	cpu->sample.time = 0;  	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, -				     intel_pstate_update_util); +				     (hwp_active ? +				      intel_pstate_update_util_hwp : +				      intel_pstate_update_util));  	cpu->update_util_set = true;  } @@ -1805,8 +1979,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)  		intel_pstate_set_update_util_hook(policy->cpu);  	} -	if (hwp_active) +	if (hwp_active) { +		/* +		 * When hwp_boost was active before and dynamically it +		 * was turned off, in that case we need to clear the +		 * update util hook. +		 */ +		if (!hwp_boost) +			intel_pstate_clear_update_util_hook(policy->cpu);  		intel_pstate_hwp_set(policy->cpu); +	}  	mutex_unlock(&intel_pstate_limits_lock); @@ -2294,28 +2476,36 @@ static inline bool intel_pstate_has_acpi_ppc(void) { return false; }  static inline void intel_pstate_request_control_from_smm(void) {}  #endif /* CONFIG_ACPI */ +#define INTEL_PSTATE_HWP_BROADWELL	0x01 + +#define ICPU_HWP(model, hwp_mode) \ +	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode } +  static const struct x86_cpu_id hwp_support_ids[] __initconst = { -	{ X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, +	ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), +	ICPU_HWP(INTEL_FAM6_BROADWELL_XEON_D, INTEL_PSTATE_HWP_BROADWELL), +	ICPU_HWP(X86_MODEL_ANY, 0),  	{}  };  static int __init intel_pstate_init(void)  { +	const struct x86_cpu_id *id;  	int rc;  	if (no_load)  		return -ENODEV; -	if (x86_match_cpu(hwp_support_ids)) { +	id = x86_match_cpu(hwp_support_ids); +	if (id) {  		copy_cpu_funcs(&core_funcs);  		if (!no_hwp) {  			hwp_active++; +			hwp_mode_bdw = id->driver_data;  			intel_pstate.attr = hwp_cpufreq_attrs;  			goto hwp_cpu_matched;  		}  	} else { -		const struct x86_cpu_id *id; -  		id = x86_match_cpu(intel_pstate_cpu_ids);  		if (!id)  			return -ENODEV; @@ -2339,7 +2529,7 @@ hwp_cpu_matched:  	pr_info("Intel P-state driver initializing\n"); -	all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); +	all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus()));  	if (!all_cpu_data)  		return -ENOMEM;  |