diff options
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 395 |
1 files changed, 134 insertions, 261 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2e31d097def6..cd83d477e32d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -34,14 +34,10 @@ #include <asm/cpu_device_id.h> #include <asm/cpufeature.h> -#if IS_ENABLED(CONFIG_ACPI) -#include <acpi/processor.h> -#endif - -#define BYT_RATIOS 0x66a -#define BYT_VIDS 0x66b -#define BYT_TURBO_RATIOS 0x66c -#define BYT_TURBO_VIDS 0x66d +#define ATOM_RATIOS 0x66a +#define ATOM_VIDS 0x66b +#define ATOM_TURBO_RATIOS 0x66c +#define ATOM_TURBO_VIDS 0x66d #define FRAC_BITS 8 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) @@ -70,6 +66,7 @@ static inline int ceiling_fp(int32_t x) struct sample { int32_t core_pct_busy; + int32_t busy_scaled; u64 aperf; u64 mperf; u64 tsc; @@ -116,10 +113,8 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; + u64 prev_cummulative_iowait; struct sample sample; -#if IS_ENABLED(CONFIG_ACPI) - struct acpi_processor_performance acpi_perf_data; -#endif }; static struct cpudata **all_cpu_data; @@ -140,6 +135,7 @@ struct pstate_funcs { int (*get_scaling)(void); void (*set)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); + int32_t (*get_target_pstate)(struct cpudata *); }; struct cpu_defaults { @@ -147,10 +143,12 @@ struct cpu_defaults { struct pstate_funcs funcs; }; +static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); +static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); + static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; -static int no_acpi_perf; struct perf_limits { int no_turbo; @@ -163,8 +161,6 @@ struct perf_limits { int max_sysfs_pct; int min_policy_pct; int min_sysfs_pct; - int max_perf_ctl; - int min_perf_ctl; }; static struct perf_limits performance_limits = { @@ -191,8 +187,6 @@ static struct perf_limits powersave_limits = { .max_sysfs_pct = 100, .min_policy_pct = 0, .min_sysfs_pct = 0, - .max_perf_ctl = 0, - .min_perf_ctl = 0, }; #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE @@ -201,153 +195,6 @@ static struct perf_limits *limits = &performance_limits; static struct perf_limits *limits = &powersave_limits; #endif -#if IS_ENABLED(CONFIG_ACPI) -/* - * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and - * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and - * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state - * ratio, out of it only high 8 bits are used. For example 0x1700 is setting - * target ratio 0x17. The _PSS control value stores in a format which can be - * directly written to PERF_CTL MSR. But in intel_pstate driver this shift - * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). - * This function converts the _PSS control value to intel pstate driver format - * for comparison and assignment. - */ -static int convert_to_native_pstate_format(struct cpudata *cpu, int index) -{ - return cpu->acpi_perf_data.states[index].control >> 8; -} - -static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) -{ - struct cpudata *cpu; - int ret; - bool turbo_absent = false; - int max_pstate_index; - int min_pss_ctl, max_pss_ctl, turbo_pss_ctl; - int i; - - cpu = all_cpu_data[policy->cpu]; - - pr_debug("intel_pstate: default limits 0x%x 0x%x 0x%x\n", - cpu->pstate.min_pstate, cpu->pstate.max_pstate, - cpu->pstate.turbo_pstate); - - if (!cpu->acpi_perf_data.shared_cpu_map && - zalloc_cpumask_var_node(&cpu->acpi_perf_data.shared_cpu_map, - GFP_KERNEL, cpu_to_node(policy->cpu))) { - return -ENOMEM; - } - - ret = acpi_processor_register_performance(&cpu->acpi_perf_data, - policy->cpu); - if (ret) - return ret; - - /* - * Check if the control value in _PSS is for PERF_CTL MSR, which should - * guarantee that the states returned by it map to the states in our - * list directly. - */ - if (cpu->acpi_perf_data.control_register.space_id != - ACPI_ADR_SPACE_FIXED_HARDWARE) - return -EIO; - - pr_debug("intel_pstate: CPU%u - ACPI _PSS perf data\n", policy->cpu); - for (i = 0; i < cpu->acpi_perf_data.state_count; i++) - pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", - (i == cpu->acpi_perf_data.state ? '*' : ' '), i, - (u32) cpu->acpi_perf_data.states[i].core_frequency, - (u32) cpu->acpi_perf_data.states[i].power, - (u32) cpu->acpi_perf_data.states[i].control); - - /* - * If there is only one entry _PSS, simply ignore _PSS and continue as - * usual without taking _PSS into account - */ - if (cpu->acpi_perf_data.state_count < 2) - return 0; - - turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); - min_pss_ctl = convert_to_native_pstate_format(cpu, - cpu->acpi_perf_data.state_count - 1); - /* Check if there is a turbo freq in _PSS */ - if (turbo_pss_ctl <= cpu->pstate.max_pstate && - turbo_pss_ctl > cpu->pstate.min_pstate) { - pr_debug("intel_pstate: no turbo range exists in _PSS\n"); - limits->no_turbo = limits->turbo_disabled = 1; - cpu->pstate.turbo_pstate = cpu->pstate.max_pstate; - turbo_absent = true; - } - - /* Check if the max non turbo p state < Intel P state max */ - max_pstate_index = turbo_absent ? 0 : 1; - max_pss_ctl = convert_to_native_pstate_format(cpu, max_pstate_index); - if (max_pss_ctl < cpu->pstate.max_pstate && - max_pss_ctl > cpu->pstate.min_pstate) - cpu->pstate.max_pstate = max_pss_ctl; - - /* check If min perf > Intel P State min */ - if (min_pss_ctl > cpu->pstate.min_pstate && - min_pss_ctl < cpu->pstate.max_pstate) { - cpu->pstate.min_pstate = min_pss_ctl; - policy->cpuinfo.min_freq = min_pss_ctl * cpu->pstate.scaling; - } - - if (turbo_absent) - policy->cpuinfo.max_freq = cpu->pstate.max_pstate * - cpu->pstate.scaling; - else { - policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * - cpu->pstate.scaling; - /* - * The _PSS table doesn't contain whole turbo frequency range. - * This just contains +1 MHZ above the max non turbo frequency, - * with control value corresponding to max turbo ratio. But - * when cpufreq set policy is called, it will call with this - * max frequency, which will cause a reduced performance as - * this driver uses real max turbo frequency as the max - * frequeny. So correct this frequency in _PSS table to - * correct max turbo frequency based on the turbo ratio. - * Also need to convert to MHz as _PSS freq is in MHz. - */ - cpu->acpi_perf_data.states[0].core_frequency = - turbo_pss_ctl * 100; - } - - pr_debug("intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x\n", - cpu->pstate.min_pstate, cpu->pstate.max_pstate, - cpu->pstate.turbo_pstate); - pr_debug("intel_pstate: policy max_freq=%d Khz min_freq = %d KHz\n", - policy->cpuinfo.max_freq, policy->cpuinfo.min_freq); - - return 0; -} - -static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) -{ - struct cpudata *cpu; - - if (!no_acpi_perf) - return 0; - - cpu = all_cpu_data[policy->cpu]; - acpi_processor_unregister_performance(policy->cpu); - return 0; -} - -#else -static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) -{ - return 0; -} - -static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) -{ - return 0; -} -#endif - static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { pid->setpoint = setpoint; @@ -687,31 +534,31 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } -static int byt_get_min_pstate(void) +static int atom_get_min_pstate(void) { u64 value; - rdmsrl(BYT_RATIOS, value); + rdmsrl(ATOM_RATIOS, value); return (value >> 8) & 0x7F; } -static int byt_get_max_pstate(void) +static int atom_get_max_pstate(void) { u64 value; - rdmsrl(BYT_RATIOS, value); + rdmsrl(ATOM_RATIOS, value); return (value >> 16) & 0x7F; } -static int byt_get_turbo_pstate(void) +static int atom_get_turbo_pstate(void) { u64 value; - rdmsrl(BYT_TURBO_RATIOS, value); + rdmsrl(ATOM_TURBO_RATIOS, value); return value & 0x7F; } -static void byt_set_pstate(struct cpudata *cpudata, int pstate) +static void atom_set_pstate(struct cpudata *cpudata, int pstate) { u64 val; int32_t vid_fp; @@ -736,27 +583,42 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); } -#define BYT_BCLK_FREQS 5 -static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800}; - -static int byt_get_scaling(void) +static int silvermont_get_scaling(void) { u64 value; int i; + /* Defined in Table 35-6 from SDM (Sept 2015) */ + static int silvermont_freq_table[] = { + 83300, 100000, 133300, 116700, 80000}; rdmsrl(MSR_FSB_FREQ, value); - i = value & 0x3; + i = value & 0x7; + WARN_ON(i > 4); + + return silvermont_freq_table[i]; +} - BUG_ON(i > BYT_BCLK_FREQS); +static int airmont_get_scaling(void) +{ + u64 value; + int i; + /* Defined in Table 35-10 from SDM (Sept 2015) */ + static int airmont_freq_table[] = { + 83300, 100000, 133300, 116700, 80000, + 93300, 90000, 88900, 87500}; - return byt_freq_table[i] * 100; + rdmsrl(MSR_FSB_FREQ, value); + i = value & 0xF; + WARN_ON(i > 8); + + return airmont_freq_table[i]; } -static void byt_get_vid(struct cpudata *cpudata) +static void atom_get_vid(struct cpudata *cpudata) { u64 value; - rdmsrl(BYT_VIDS, value); + rdmsrl(ATOM_VIDS, value); cpudata->vid.min = int_tofp((value >> 8) & 0x7f); cpudata->vid.max = int_tofp((value >> 16) & 0x7f); cpudata->vid.ratio = div_fp( @@ -764,7 +626,7 @@ static void byt_get_vid(struct cpudata *cpudata) int_tofp(cpudata->pstate.max_pstate - cpudata->pstate.min_pstate)); - rdmsrl(BYT_TURBO_VIDS, value); + rdmsrl(ATOM_TURBO_VIDS, value); cpudata->vid.turbo = value & 0x7f; } @@ -882,10 +744,32 @@ static struct cpu_defaults core_params = { .get_turbo = core_get_turbo_pstate, .get_scaling = core_get_scaling, .set = core_set_pstate, + .get_target_pstate = get_target_pstate_use_performance, + }, +}; + +static struct cpu_defaults silvermont_params = { + .pid_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 60, + .p_gain_pct = 14, + .d_gain_pct = 0, + .i_gain_pct = 4, + }, + .funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .set = atom_set_pstate, + .get_scaling = silvermont_get_scaling, + .get_vid = atom_get_vid, + .get_target_pstate = get_target_pstate_use_cpu_load, }, }; -static struct cpu_defaults byt_params = { +static struct cpu_defaults airmont_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -895,13 +779,14 @@ static struct cpu_defaults byt_params = { .i_gain_pct = 4, }, .funcs = { - .get_max = byt_get_max_pstate, - .get_max_physical = byt_get_max_pstate, - .get_min = byt_get_min_pstate, - .get_turbo = byt_get_turbo_pstate, - .set = byt_set_pstate, - .get_scaling = byt_get_scaling, - .get_vid = byt_get_vid, + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .set = atom_set_pstate, + .get_scaling = airmont_get_scaling, + .get_vid = atom_get_vid, + .get_target_pstate = get_target_pstate_use_cpu_load, }, }; @@ -921,6 +806,7 @@ static struct cpu_defaults knl_params = { .get_turbo = knl_get_turbo_pstate, .get_scaling = core_get_scaling, .set = core_set_pstate, + .get_target_pstate = get_target_pstate_use_performance, }, }; @@ -938,23 +824,12 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) * policy, or by cpu specific default values determined through * experimentation. */ - if (limits->max_perf_ctl && limits->max_sysfs_pct >= - limits->max_policy_pct) { - *max = limits->max_perf_ctl; - } else { - max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), - limits->max_perf)); - *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, - cpu->pstate.turbo_pstate); - } + max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf)); + *max = clamp_t(int, max_perf_adj, + cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - if (limits->min_perf_ctl) { - *min = limits->min_perf_ctl; - } else { - min_perf = fp_toint(mul_fp(int_tofp(max_perf), - limits->min_perf)); - *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); - } + min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf)); + *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) @@ -1017,12 +892,11 @@ static inline void intel_pstate_sample(struct cpudata *cpu) local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); - if (cpu->prev_mperf == mperf) { + tsc = rdtsc(); + if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) { local_irq_restore(flags); return; } - - tsc = rdtsc(); local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; @@ -1057,7 +931,43 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) mod_timer_pinned(&cpu->timer, jiffies + delay); } -static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) +static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) +{ + struct sample *sample = &cpu->sample; + u64 cummulative_iowait, delta_iowait_us; + u64 delta_iowait_mperf; + u64 mperf, now; + int32_t cpu_load; + + cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now); + + /* + * Convert iowait time into number of IO cycles spent at max_freq. + * IO is considered as busy only for the cpu_load algorithm. For + * performance this is not needed since we always try to reach the + * maximum P-State, so we are already boosting the IOs. + */ + delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait; + delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling * + cpu->pstate.max_pstate, MSEC_PER_SEC); + + mperf = cpu->sample.mperf + delta_iowait_mperf; + cpu->prev_cummulative_iowait = cummulative_iowait; + + + /* + * The load can be estimated as the ratio of the mperf counter + * running at a constant frequency during active periods + * (C0) and the time stamp counter running at the same frequency + * also during C-states. + */ + cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); + cpu->sample.busy_scaled = cpu_load; + + return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load); +} + +static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) { int32_t core_busy, max_pstate, current_pstate, sample_ratio; s64 duration_us; @@ -1095,30 +1005,24 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) core_busy = mul_fp(core_busy, sample_ratio); } - return core_busy; + cpu->sample.busy_scaled = core_busy; + return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy); } static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) { - int32_t busy_scaled; - struct _pid *pid; - signed int ctl; - int from; + int from, target_pstate; struct sample *sample; from = cpu->pstate.current_pstate; - pid = &cpu->pid; - busy_scaled = intel_pstate_get_scaled_busy(cpu); - - ctl = pid_calc(pid, busy_scaled); + target_pstate = pstate_funcs.get_target_pstate(cpu); - /* Negative values of ctl increase the pstate and vice versa */ - intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true); + intel_pstate_set_pstate(cpu, target_pstate, true); sample = &cpu->sample; trace_pstate_sample(fp_toint(sample->core_pct_busy), - fp_toint(busy_scaled), + fp_toint(sample->busy_scaled), from, cpu->pstate.current_pstate, sample->mperf, @@ -1153,7 +1057,7 @@ static void intel_pstate_timer_func(unsigned long __data) static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x2a, core_params), ICPU(0x2d, core_params), - ICPU(0x37, byt_params), + ICPU(0x37, silvermont_params), ICPU(0x3a, core_params), ICPU(0x3c, core_params), ICPU(0x3d, core_params), @@ -1162,7 +1066,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x45, core_params), ICPU(0x46, core_params), ICPU(0x47, core_params), - ICPU(0x4c, byt_params), + ICPU(0x4c, airmont_params), ICPU(0x4e, core_params), ICPU(0x4f, core_params), ICPU(0x5e, core_params), @@ -1229,12 +1133,6 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) static int intel_pstate_set_policy(struct cpufreq_policy *policy) { -#if IS_ENABLED(CONFIG_ACPI) - struct cpudata *cpu; - int i; -#endif - pr_debug("intel_pstate: %s max %u policy->max %u\n", __func__, - policy->cpuinfo.max_freq, policy->max); if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -1242,6 +1140,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) policy->max >= policy->cpuinfo.max_freq) { pr_debug("intel_pstate: set performance\n"); limits = &performance_limits; + if (hwp_active) + intel_pstate_hwp_set(); return 0; } @@ -1249,7 +1149,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits = &powersave_limits; limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100); - limits->max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq; + limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, + policy->cpuinfo.max_freq); limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100); /* Normalize user input to [min_policy_pct, max_policy_pct] */ @@ -1261,6 +1162,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_sysfs_pct); limits->max_perf_pct = max(limits->min_policy_pct, limits->max_perf_pct); + limits->max_perf = round_up(limits->max_perf, FRAC_BITS); /* Make sure min_perf_pct <= max_perf_pct */ limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); @@ -1270,23 +1172,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), int_tofp(100)); -#if IS_ENABLED(CONFIG_ACPI) - cpu = all_cpu_data[policy->cpu]; - for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { - int control; - - control = convert_to_native_pstate_format(cpu, i); - if (control * cpu->pstate.scaling == policy->max) - limits->max_perf_ctl = control; - if (control * cpu->pstate.scaling == policy->min) - limits->min_perf_ctl = control; - } - - pr_debug("intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x]\n", - policy->cpuinfo.max_freq, policy->max, limits->min_perf_ctl, - limits->max_perf_ctl); -#endif - if (hwp_active) intel_pstate_hwp_set(); @@ -1341,30 +1226,18 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; - if (!no_acpi_perf) - intel_pstate_init_perf_limits(policy); - /* - * If there is no acpi perf data or error, we ignore and use Intel P - * state calculated limits, So this is not fatal error. - */ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); return 0; } -static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) -{ - return intel_pstate_exit_perf_limits(policy); -} - static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, - .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, .name = "intel_pstate", }; @@ -1403,9 +1276,12 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) pstate_funcs.get_scaling = funcs->get_scaling; pstate_funcs.set = funcs->set; pstate_funcs.get_vid = funcs->get_vid; + pstate_funcs.get_target_pstate = funcs->get_target_pstate; + } #if IS_ENABLED(CONFIG_ACPI) +#include <acpi/processor.h> static bool intel_pstate_no_acpi_pss(void) { @@ -1601,9 +1477,6 @@ static int __init intel_pstate_setup(char *str) force_load = 1; if (!strcmp(str, "hwp_only")) hwp_only = 1; - if (!strcmp(str, "no_acpi")) - no_acpi_perf = 1; - return 0; } early_param("intel_pstate", intel_pstate_setup); |