diff options
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 521 |
1 files changed, 438 insertions, 83 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4b644526fd59..3a9c4325d6e2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -10,6 +10,8 @@ * of the License. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/kernel_stat.h> #include <linux/module.h> @@ -39,10 +41,17 @@ #define ATOM_TURBO_RATIOS 0x66c #define ATOM_TURBO_VIDS 0x66d +#ifdef CONFIG_ACPI +#include <acpi/processor.h> +#endif + #define FRAC_BITS 8 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) #define fp_toint(X) ((X) >> FRAC_BITS) +#define EXT_BITS 6 +#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS) + static inline int32_t mul_fp(int32_t x, int32_t y) { return ((int64_t)x * (int64_t)y) >> FRAC_BITS; @@ -64,8 +73,37 @@ static inline int ceiling_fp(int32_t x) return ret; } +static inline u64 mul_ext_fp(u64 x, u64 y) +{ + return (x * y) >> EXT_FRAC_BITS; +} + +static inline u64 div_ext_fp(u64 x, u64 y) +{ + return div64_u64(x << EXT_FRAC_BITS, y); +} + +/** + * struct sample - Store performance sample + * @core_avg_perf: Ratio of APERF/MPERF which is the actual average + * performance during last sample period + * @busy_scaled: Scaled busy value which is used to calculate next + * P state. This can be different than core_avg_perf + * to account for cpu idle period + * @aperf: Difference of actual performance frequency clock count + * read from APERF MSR between last and current sample + * @mperf: Difference of maximum performance frequency clock count + * read from MPERF MSR between last and current sample + * @tsc: Difference of time stamp counter between last and + * current sample + * @freq: Effective frequency calculated from APERF/MPERF + * @time: Current time from scheduler + * + * This structure is used in the cpudata structure to store performance sample + * data for choosing next P State. + */ struct sample { - int32_t core_pct_busy; + int32_t core_avg_perf; int32_t busy_scaled; u64 aperf; u64 mperf; @@ -74,6 +112,20 @@ struct sample { u64 time; }; +/** + * struct pstate_data - Store P state data + * @current_pstate: Current requested P state + * @min_pstate: Min P state possible for this platform + * @max_pstate: Max P state possible for this platform + * @max_pstate_physical:This is physical Max P state for a processor + * This can be higher than the max_pstate which can + * be limited by platform thermal design power limits + * @scaling: Scaling factor to convert frequency to cpufreq + * frequency units + * @turbo_pstate: Max Turbo P state possible for this platform + * + * Stores the per cpu model P state limits and current P state. + */ struct pstate_data { int current_pstate; int min_pstate; @@ -83,6 +135,19 @@ struct pstate_data { int turbo_pstate; }; +/** + * struct vid_data - Stores voltage information data + * @min: VID data for this platform corresponding to + * the lowest P state + * @max: VID data corresponding to the highest P State. + * @turbo: VID data for turbo P state + * @ratio: Ratio of (vid max - vid min) / + * (max P state - Min P State) + * + * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling) + * This data is used in Atom platforms, where in addition to target P state, + * the voltage data needs to be specified to select next P State. + */ struct vid_data { int min; int max; @@ -90,6 +155,18 @@ struct vid_data { int32_t ratio; }; +/** + * struct _pid - Stores PID data + * @setpoint: Target set point for busyness or performance + * @integral: Storage for accumulated error values + * @p_gain: PID proportional gain + * @i_gain: PID integral gain + * @d_gain: PID derivative gain + * @deadband: PID deadband + * @last_err: Last error storage for integral part of PID calculation + * + * Stores PID coefficients and last error for PID controller. + */ struct _pid { int setpoint; int32_t integral; @@ -100,10 +177,31 @@ struct _pid { int32_t last_err; }; +/** + * struct cpudata - Per CPU instance data storage + * @cpu: CPU number for this instance data + * @update_util: CPUFreq utility callback information + * @update_util_set: CPUFreq utility callback is set + * @pstate: Stores P state limits for this CPU + * @vid: Stores VID limits for this CPU + * @pid: Stores PID parameters for this CPU + * @last_sample_time: Last Sample time + * @prev_aperf: Last APERF value read from APERF MSR + * @prev_mperf: Last MPERF value read from MPERF MSR + * @prev_tsc: Last timestamp counter (TSC) value + * @prev_cummulative_iowait: IO Wait time difference from last and + * current sample + * @sample: Storage for storing last Sample data + * @acpi_perf_data: Stores ACPI perf information read from _PSS + * @valid_pss_table: Set to true for valid ACPI _PSS entries found + * + * This structure stores per CPU instance data for all CPUs. + */ struct cpudata { int cpu; struct update_util_data update_util; + bool update_util_set; struct pstate_data pstate; struct vid_data vid; @@ -115,9 +213,26 @@ struct cpudata { u64 prev_tsc; u64 prev_cummulative_iowait; struct sample sample; +#ifdef CONFIG_ACPI + struct acpi_processor_performance acpi_perf_data; + bool valid_pss_table; +#endif }; static struct cpudata **all_cpu_data; + +/** + * struct pid_adjust_policy - Stores static PID configuration data + * @sample_rate_ms: PID calculation sample rate in ms + * @sample_rate_ns: Sample rate calculation in ns + * @deadband: PID deadband + * @setpoint: PID Setpoint + * @p_gain_pct: PID proportional gain + * @i_gain_pct: PID integral gain + * @d_gain_pct: PID derivative gain + * + * Stores per CPU model static PID configuration data. + */ struct pstate_adjust_policy { int sample_rate_ms; s64 sample_rate_ns; @@ -128,6 +243,20 @@ struct pstate_adjust_policy { int i_gain_pct; }; +/** + * struct pstate_funcs - Per CPU model specific callbacks + * @get_max: Callback to get maximum non turbo effective P state + * @get_max_physical: Callback to get maximum non turbo physical P state + * @get_min: Callback to get minimum P state + * @get_turbo: Callback to get turbo P state + * @get_scaling: Callback to get frequency scaling factor + * @get_val: Callback to convert P state to actual MSR write value + * @get_vid: Callback to get VID data for Atom platforms + * @get_target_pstate: Callback to a function to calculate next P state to use + * + * Core and Atom CPU models have different way to get P State limits. This + * structure is used to store those callbacks. + */ struct pstate_funcs { int (*get_max)(void); int (*get_max_physical)(void); @@ -139,6 +268,11 @@ struct pstate_funcs { int32_t (*get_target_pstate)(struct cpudata *); }; +/** + * struct cpu_defaults- Per CPU model default config data + * @pid_policy: PID config data + * @funcs: Callback function data + */ struct cpu_defaults { struct pstate_adjust_policy pid_policy; struct pstate_funcs funcs; @@ -151,6 +285,37 @@ static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; +#ifdef CONFIG_ACPI +static bool acpi_ppc; +#endif + +/** + * struct perf_limits - Store user and policy limits + * @no_turbo: User requested turbo state from intel_pstate sysfs + * @turbo_disabled: Platform turbo status either from msr + * MSR_IA32_MISC_ENABLE or when maximum available pstate + * matches the maximum turbo pstate + * @max_perf_pct: Effective maximum performance limit in percentage, this + * is minimum of either limits enforced by cpufreq policy + * or limits from user set limits via intel_pstate sysfs + * @min_perf_pct: Effective minimum performance limit in percentage, this + * is maximum of either limits enforced by cpufreq policy + * or limits from user set limits via intel_pstate sysfs + * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct + * This value is used to limit max pstate + * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct + * This value is used to limit min pstate + * @max_policy_pct: The maximum performance in percentage enforced by + * cpufreq setpolicy interface + * @max_sysfs_pct: The maximum performance in percentage enforced by + * intel pstate sysfs interface + * @min_policy_pct: The minimum performance in percentage enforced by + * cpufreq setpolicy interface + * @min_sysfs_pct: The minimum performance in percentage enforced by + * intel pstate sysfs interface + * + * Storage for user and policy defined limits. + */ struct perf_limits { int no_turbo; int turbo_disabled; @@ -196,6 +361,124 @@ static struct perf_limits *limits = &performance_limits; static struct perf_limits *limits = &powersave_limits; #endif +#ifdef CONFIG_ACPI + +static bool intel_pstate_get_ppc_enable_status(void) +{ + if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || + acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) + return true; + + return acpi_ppc; +} + +/* + * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and + * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and + * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state + * ratio, out of it only high 8 bits are used. For example 0x1700 is setting + * target ratio 0x17. The _PSS control value stores in a format which can be + * directly written to PERF_CTL MSR. But in intel_pstate driver this shift + * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). + * This function converts the _PSS control value to intel pstate driver format + * for comparison and assignment. + */ +static int convert_to_native_pstate_format(struct cpudata *cpu, int index) +{ + return cpu->acpi_perf_data.states[index].control >> 8; +} + +static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int turbo_pss_ctl; + int ret; + int i; + + if (hwp_active) + return; + + if (!intel_pstate_get_ppc_enable_status()) + return; + + cpu = all_cpu_data[policy->cpu]; + + ret = acpi_processor_register_performance(&cpu->acpi_perf_data, + policy->cpu); + if (ret) + return; + + /* + * Check if the control value in _PSS is for PERF_CTL MSR, which should + * guarantee that the states returned by it map to the states in our + * list directly. + */ + if (cpu->acpi_perf_data.control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) + goto err; + + /* + * If there is only one entry _PSS, simply ignore _PSS and continue as + * usual without taking _PSS into account + */ + if (cpu->acpi_perf_data.state_count < 2) + goto err; + + pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu); + for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { + pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", + (i == cpu->acpi_perf_data.state ? '*' : ' '), i, + (u32) cpu->acpi_perf_data.states[i].core_frequency, + (u32) cpu->acpi_perf_data.states[i].power, + (u32) cpu->acpi_perf_data.states[i].control); + } + + /* + * The _PSS table doesn't contain whole turbo frequency range. + * This just contains +1 MHZ above the max non turbo frequency, + * with control value corresponding to max turbo ratio. But + * when cpufreq set policy is called, it will call with this + * max frequency, which will cause a reduced performance as + * this driver uses real max turbo frequency as the max + * frequency. So correct this frequency in _PSS table to + * correct max turbo frequency based on the turbo ratio. + * Also need to convert to MHz as _PSS freq is in MHz. + */ + turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); + if (turbo_pss_ctl > cpu->pstate.max_pstate) + cpu->acpi_perf_data.states[0].core_frequency = + policy->cpuinfo.max_freq / 1000; + cpu->valid_pss_table = true; + pr_info("_PPC limits will be enforced\n"); + + return; + + err: + cpu->valid_pss_table = false; + acpi_processor_unregister_performance(policy->cpu); +} + +static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + + cpu = all_cpu_data[policy->cpu]; + if (!cpu->valid_pss_table) + return; + + acpi_processor_unregister_performance(policy->cpu); +} + +#else +static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) +{ +} + +static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ +} +#endif + static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { pid->setpoint = int_tofp(setpoint); @@ -206,17 +489,17 @@ static inline void pid_reset(struct _pid *pid, int setpoint, int busy, static inline void pid_p_gain_set(struct _pid *pid, int percent) { - pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); + pid->p_gain = div_fp(percent, 100); } static inline void pid_i_gain_set(struct _pid *pid, int percent) { - pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); + pid->i_gain = div_fp(percent, 100); } static inline void pid_d_gain_set(struct _pid *pid, int percent) { - pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); + pid->d_gain = div_fp(percent, 100); } static signed int pid_calc(struct _pid *pid, int32_t busy) @@ -318,6 +601,14 @@ static void intel_pstate_hwp_set(const struct cpumask *cpumask) } } +static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy) +{ + if (hwp_active) + intel_pstate_hwp_set(policy->cpus); + + return 0; +} + static void intel_pstate_hwp_set_online_cpus(void) { get_online_cpus(); @@ -394,7 +685,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; - turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total)); + turbo_fp = div_fp(no_turbo, total); turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); return sprintf(buf, "%u\n", turbo_pct); } @@ -436,7 +727,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, update_turbo_state(); if (limits->turbo_disabled) { - pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n"); + pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); return -EPERM; } @@ -465,8 +756,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf_pct = max(limits->min_perf_pct, limits->max_perf_pct); - limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), - int_tofp(100)); + limits->max_perf = div_fp(limits->max_perf_pct, 100); if (hwp_active) intel_pstate_hwp_set_online_cpus(); @@ -490,8 +780,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), - int_tofp(100)); + limits->min_perf = div_fp(limits->min_perf_pct, 100); if (hwp_active) intel_pstate_hwp_set_online_cpus(); @@ -678,6 +967,11 @@ static int core_get_max_pstate(void) if (err) goto skip_tar; + /* For level 1 and 2, bits[23:16] contain the ratio */ + if (tdp_ctrl) + tdp_ratio >>= 16; + + tdp_ratio &= 0xff; /* ratios are only 8 bits long */ if (tdp_ratio - 1 == tar) { max_pstate = tar; pr_debug("max_pstate=TAC %x\n", max_pstate); @@ -871,15 +1165,11 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) intel_pstate_set_min_pstate(cpu); } -static inline void intel_pstate_calc_busy(struct cpudata *cpu) +static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) { struct sample *sample = &cpu->sample; - int64_t core_pct; - - core_pct = int_tofp(sample->aperf) * int_tofp(100); - core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); - sample->core_pct_busy = (int32_t)core_pct; + sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf); } static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) @@ -910,13 +1200,26 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; cpu->prev_tsc = tsc; - return true; + /* + * First time this function is invoked in a given cycle, all of the + * previous sample data fields are equal to zero or stale and they must + * be populated with meaningful numbers for things to work, so assume + * that sample.time will always be reset before setting the utilization + * update hook and make the caller skip the sample then. + */ + return !!cpu->last_sample_time; } static inline int32_t get_avg_frequency(struct cpudata *cpu) { - return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf * - cpu->pstate.scaling, cpu->sample.mperf); + return mul_ext_fp(cpu->sample.core_avg_perf, + cpu->pstate.max_pstate_physical * cpu->pstate.scaling); +} + +static inline int32_t get_avg_pstate(struct cpudata *cpu) +{ + return mul_ext_fp(cpu->pstate.max_pstate_physical, + cpu->sample.core_avg_perf); } static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) @@ -951,48 +1254,43 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); cpu->sample.busy_scaled = cpu_load; - return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load); + return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load); } static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) { - int32_t core_busy, max_pstate, current_pstate, sample_ratio; + int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; u64 duration_ns; - intel_pstate_calc_busy(cpu); - /* - * core_busy is the ratio of actual performance to max - * max_pstate is the max non turbo pstate available - * current_pstate was the pstate that was requested during - * the last sample period. - * - * We normalize core_busy, which was our actual percent - * performance to what we requested during the last sample - * period. The result will be a percentage of busy at a - * specified pstate. + * perf_scaled is the average performance during the last sampling + * period scaled by the ratio of the maximum P-state to the P-state + * requested last time (in percent). That measures the system's + * response to the previous P-state selection. */ - core_busy = cpu->sample.core_pct_busy; - max_pstate = int_tofp(cpu->pstate.max_pstate_physical); - current_pstate = int_tofp(cpu->pstate.current_pstate); - core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); + max_pstate = cpu->pstate.max_pstate_physical; + current_pstate = cpu->pstate.current_pstate; + perf_scaled = mul_ext_fp(cpu->sample.core_avg_perf, + div_fp(100 * max_pstate, current_pstate)); /* * Since our utilization update callback will not run unless we are * in C0, check if the actual elapsed time is significantly greater (3x) * than our sample interval. If it is, then we were idle for a long - * enough period of time to adjust our busyness. + * enough period of time to adjust our performance metric. */ duration_ns = cpu->sample.time - cpu->last_sample_time; - if ((s64)duration_ns > pid_params.sample_rate_ns * 3 - && cpu->last_sample_time > 0) { - sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), - int_tofp(duration_ns)); - core_busy = mul_fp(core_busy, sample_ratio); + if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { + sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns); + perf_scaled = mul_fp(perf_scaled, sample_ratio); + } else { + sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc); + if (sample_ratio < int_tofp(1)) + perf_scaled = 0; } - cpu->sample.busy_scaled = core_busy; - return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy); + cpu->sample.busy_scaled = perf_scaled; + return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled); } static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) @@ -1022,7 +1320,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) intel_pstate_update_pstate(cpu, target_pstate); sample = &cpu->sample; - trace_pstate_sample(fp_toint(sample->core_pct_busy), + trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf), fp_toint(sample->busy_scaled), from, cpu->pstate.current_pstate, @@ -1041,8 +1339,11 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, if ((s64)delta_ns >= pid_params.sample_rate_ns) { bool sample_taken = intel_pstate_sample(cpu, time); - if (sample_taken && !hwp_active) - intel_pstate_adjust_busy_pstate(cpu); + if (sample_taken) { + intel_pstate_calc_avg_perf(cpu); + if (!hwp_active) + intel_pstate_adjust_busy_pstate(cpu); + } } } @@ -1100,44 +1401,85 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_get_cpu_pstates(cpu); intel_pstate_busy_pid_reset(cpu); - intel_pstate_sample(cpu, 0); - - cpu->update_util.func = intel_pstate_update_util; - cpufreq_set_update_util_data(cpunum, &cpu->update_util); - pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); + pr_debug("controlling: cpu %d\n", cpunum); return 0; } static unsigned int intel_pstate_get(unsigned int cpu_num) { - struct sample *sample; - struct cpudata *cpu; + struct cpudata *cpu = all_cpu_data[cpu_num]; - cpu = all_cpu_data[cpu_num]; - if (!cpu) - return 0; - sample = &cpu->sample; - return get_avg_frequency(cpu); + return cpu ? get_avg_frequency(cpu) : 0; +} + +static void intel_pstate_set_update_util_hook(unsigned int cpu_num) +{ + struct cpudata *cpu = all_cpu_data[cpu_num]; + + /* Prevent intel_pstate_update_util() from using stale data. */ + cpu->sample.time = 0; + cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, + intel_pstate_update_util); + cpu->update_util_set = true; +} + +static void intel_pstate_clear_update_util_hook(unsigned int cpu) +{ + struct cpudata *cpu_data = all_cpu_data[cpu]; + + if (!cpu_data->update_util_set) + return; + + cpufreq_remove_update_util_hook(cpu); + cpu_data->update_util_set = false; + synchronize_sched(); +} + +static void intel_pstate_set_performance_limits(struct perf_limits *limits) +{ + limits->no_turbo = 0; + limits->turbo_disabled = 0; + limits->max_perf_pct = 100; + limits->max_perf = int_tofp(1); + limits->min_perf_pct = 100; + limits->min_perf = int_tofp(1); + limits->max_policy_pct = 100; + limits->max_sysfs_pct = 100; + limits->min_policy_pct = 0; + limits->min_sysfs_pct = 0; } static int intel_pstate_set_policy(struct cpufreq_policy *policy) { + struct cpudata *cpu; + if (!policy->cpuinfo.max_freq) return -ENODEV; - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && - policy->max >= policy->cpuinfo.max_freq) { - pr_debug("intel_pstate: set performance\n"); + intel_pstate_clear_update_util_hook(policy->cpu); + + cpu = all_cpu_data[0]; + if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && + policy->max < policy->cpuinfo.max_freq && + policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { + pr_debug("policy->max > max non turbo frequency\n"); + policy->max = policy->cpuinfo.max_freq; + } + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { limits = &performance_limits; - if (hwp_active) - intel_pstate_hwp_set(policy->cpus); - return 0; + if (policy->max >= policy->cpuinfo.max_freq) { + pr_debug("set performance\n"); + intel_pstate_set_performance_limits(limits); + goto out; + } + } else { + pr_debug("set powersave\n"); + limits = &powersave_limits; } - pr_debug("intel_pstate: set powersave\n"); - limits = &powersave_limits; limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100); limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, @@ -1158,13 +1500,13 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) /* Make sure min_perf_pct <= max_perf_pct */ limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), - int_tofp(100)); - limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), - int_tofp(100)); + limits->min_perf = div_fp(limits->min_perf_pct, 100); + limits->max_perf = div_fp(limits->max_perf_pct, 100); - if (hwp_active) - intel_pstate_hwp_set(policy->cpus); + out: + intel_pstate_set_update_util_hook(policy->cpu); + + intel_pstate_hwp_set_policy(policy); return 0; } @@ -1185,10 +1527,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) int cpu_num = policy->cpu; struct cpudata *cpu = all_cpu_data[cpu_num]; - pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); + pr_debug("CPU %d exiting\n", cpu_num); - cpufreq_set_update_util_data(cpu_num, NULL); - synchronize_sched(); + intel_pstate_clear_update_util_hook(cpu_num); if (hwp_active) return; @@ -1219,18 +1560,28 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + intel_pstate_init_acpi_perf_limits(policy); policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); return 0; } +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + intel_pstate_exit_perf_limits(policy); + + return 0; +} + static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, + .resume = intel_pstate_hwp_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, + .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, .name = "intel_pstate", }; @@ -1274,8 +1625,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) } -#if IS_ENABLED(CONFIG_ACPI) -#include <acpi/processor.h> +#ifdef CONFIG_ACPI static bool intel_pstate_no_acpi_pss(void) { @@ -1431,7 +1781,7 @@ hwp_cpu_matched: if (intel_pstate_platform_pwr_mgmt_exists()) return -ENODEV; - pr_info("Intel P-state driver initializing.\n"); + pr_info("Intel P-state driver initializing\n"); all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); if (!all_cpu_data) @@ -1448,15 +1798,14 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); if (hwp_active) - pr_info("intel_pstate: HWP enabled\n"); + pr_info("HWP enabled\n"); return rc; out: get_online_cpus(); for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) { - cpufreq_set_update_util_data(cpu, NULL); - synchronize_sched(); + intel_pstate_clear_update_util_hook(cpu); kfree(all_cpu_data[cpu]); } } @@ -1475,13 +1824,19 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; if (!strcmp(str, "no_hwp")) { - pr_info("intel_pstate: HWP disabled\n"); + pr_info("HWP disabled\n"); no_hwp = 1; } if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) hwp_only = 1; + +#ifdef CONFIG_ACPI + if (!strcmp(str, "support_acpi_ppc")) + acpi_ppc = true; +#endif + return 0; } early_param("intel_pstate", intel_pstate_setup); |