aboutsummaryrefslogtreecommitdiff
path: root/drivers/cpufreq/intel_pstate.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 17:29:07 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 17:29:07 -0700
commit6453dbdda30428a3c56568c96fe70ea3612f07e2 (patch)
tree9a3c6087a2832c36e8c49296fb05f95b877e0111 /drivers/cpufreq/intel_pstate.c
parent27b79027bc112a63ad4004eb83c6acacae08a0de (diff)
parentbc841e260c95608921809a2c7481cf6f03bec21a (diff)
Merge tag 'pm-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "Again, the majority of changes go into the cpufreq subsystem, but there are no big features this time. The cpufreq changes that stand out somewhat are the governor interface rework and improvements related to the handling of frequency tables. Apart from those, there are fixes and new device/CPU IDs in drivers, cleanups and an improvement of the new schedutil governor. Next, there are some changes in the hibernation core, including a fix for a nasty problem related to the MONITOR/MWAIT usage by CPU offline during resume from hibernation, a few core improvements related to memory management during resume, a couple of additional debug features and cleanups. Finally, we have some fixes and cleanups in the devfreq subsystem, generic power domains framework improvements related to system suspend/resume, support for some new chips in intel_idle and in the power capping RAPL driver, a new version of the AnalyzeSuspend utility and some assorted fixes and cleanups. Specifics: - Rework the cpufreq governor interface to make it more straightforward and modify the conservative governor to avoid using transition notifications (Rafael Wysocki). - Rework the handling of frequency tables by the cpufreq core to make it more efficient (Viresh Kumar). - Modify the schedutil governor to reduce the number of wakeups it causes to occur in cases when the CPU frequency doesn't need to be changed (Steve Muckle, Viresh Kumar). - Fix some minor issues and clean up code in the cpufreq core and governors (Rafael Wysocki, Viresh Kumar). - Add Intel Broxton support to the intel_pstate driver (Srinivas Pandruvada). - Fix problems related to the config TDP feature and to the validity of the MSR_HWP_INTERRUPT register in intel_pstate (Jan Kiszka, Srinivas Pandruvada). - Make intel_pstate update the cpu_frequency tracepoint even if the frequency doesn't change to avoid confusing powertop (Rafael Wysocki). - Clean up the usage of __init/__initdata in intel_pstate, mark some of its internal variables as __read_mostly and drop an unused structure element from it (Jisheng Zhang, Carsten Emde). - Clean up the usage of some duplicate MSR symbols in intel_pstate and turbostat (Srinivas Pandruvada). - Update/fix the powernv, s3c24xx and mvebu cpufreq drivers (Akshay Adiga, Viresh Kumar, Ben Dooks). - Fix a regression (introduced during the 4.5 cycle) in the pcc-cpufreq driver by reverting the problematic commit (Andreas Herrmann). - Add support for Intel Denverton to intel_idle, clean up Broxton support in it and make it explicitly non-modular (Jacob Pan, Jan Beulich, Paul Gortmaker). - Add support for Denverton and Ivy Bridge server to the Intel RAPL power capping driver and make it more careful about the handing of MSRs that may not be present (Jacob Pan, Xiaolong Wang). - Fix resume from hibernation on x86-64 by making the CPU offline during resume avoid using MONITOR/MWAIT in the "play dead" loop which may lead to an inadvertent "revival" of a "dead" CPU and a page fault leading to a kernel crash from it (Rafael Wysocki). - Make memory management during resume from hibernation more straightforward (Rafael Wysocki). - Add debug features that should help to detect problems related to hibernation and resume from it (Rafael Wysocki, Chen Yu). - Clean up hibernation core somewhat (Rafael Wysocki). - Prevent KASAN from instrumenting the hibernation core which leads to large numbers of false-positives from it (James Morse). - Prevent PM (hibernate and suspend) notifiers from being called during the cleanup phase if they have not been called during the corresponding preparation phase which is possible if one of the other notifiers returns an error at that time (Lianwei Wang). - Improve suspend-related debug printout in the tasks freezer and clean up suspend-related console handling (Roger Lu, Borislav Petkov). - Update the AnalyzeSuspend script in the kernel sources to version 4.2 (Todd Brandt). - Modify the generic power domains framework to make it handle system suspend/resume better (Ulf Hansson). - Make the runtime PM framework avoid resuming devices synchronously when user space changes the runtime PM settings for them and improve its error reporting (Rafael Wysocki, Linus Walleij). - Fix error paths in devfreq drivers (exynos, exynos-ppmu, exynos-bus) and in the core, make some devfreq code explicitly non-modular and change some of it into tristate (Bartlomiej Zolnierkiewicz, Peter Chen, Paul Gortmaker). - Add DT support to the generic PM clocks management code and make it export some more symbols (Jon Hunter, Paul Gortmaker). - Make the PCI PM core code slightly more robust against possible driver errors (Andy Shevchenko). - Make it possible to change DESTDIR and PREFIX in turbostat (Andy Shevchenko)" * tag 'pm-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (89 commits) Revert "cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency" PM / hibernate: Introduce test_resume mode for hibernation cpufreq: export cpufreq_driver_resolve_freq() cpufreq: Disallow ->resolve_freq() for drivers providing ->target_index() PCI / PM: check all fields in pci_set_platform_pm() cpufreq: acpi-cpufreq: use cached frequency mapping when possible cpufreq: schedutil: map raw required frequency to driver frequency cpufreq: add cpufreq_driver_resolve_freq() cpufreq: intel_pstate: Check cpuid for MSR_HWP_INTERRUPT intel_pstate: Update cpu_frequency tracepoint every time cpufreq: intel_pstate: clean remnant struct element PM / tools: scripts: AnalyzeSuspend v4.2 x86 / hibernate: Use hlt_play_dead() when resuming from hibernation cpufreq: powernv: Replacing pstate_id with frequency table index intel_pstate: Fix MSR_CONFIG_TDP_x addressing in core_get_max_pstate() PM / hibernate: Image data protection during restoration PM / hibernate: Add missing braces in __register_nosave_region() PM / hibernate: Clean up comments in snapshot.c PM / hibernate: Clean up function headers in snapshot.c PM / hibernate: Add missing braces in hibernate_setup() ...
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r--drivers/cpufreq/intel_pstate.c74
1 files changed, 45 insertions, 29 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 28690b284846..9ec033b4f2d9 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -97,7 +97,6 @@ static inline u64 div_ext_fp(u64 x, u64 y)
* read from MPERF MSR between last and current sample
* @tsc: Difference of time stamp counter between last and
* current sample
- * @freq: Effective frequency calculated from APERF/MPERF
* @time: Current time from scheduler
*
* This structure is used in the cpudata structure to store performance sample
@@ -109,7 +108,6 @@ struct sample {
u64 aperf;
u64 mperf;
u64 tsc;
- int freq;
u64 time;
};
@@ -282,9 +280,9 @@ struct cpu_defaults {
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
-static struct pstate_adjust_policy pid_params;
-static struct pstate_funcs pstate_funcs;
-static int hwp_active;
+static struct pstate_adjust_policy pid_params __read_mostly;
+static struct pstate_funcs pstate_funcs __read_mostly;
+static int hwp_active __read_mostly;
#ifdef CONFIG_ACPI
static bool acpi_ppc;
@@ -808,7 +806,8 @@ static void __init intel_pstate_sysfs_expose_params(void)
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
/* First disable HWP notification interrupt as we don't process them */
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+ if (static_cpu_has(X86_FEATURE_HWP_NOTIFY))
+ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
}
@@ -945,7 +944,7 @@ static int core_get_max_pstate(void)
if (err)
goto skip_tar;
- tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
+ tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3);
err = rdmsrl_safe(tdp_msr, &tdp_ratio);
if (err)
goto skip_tar;
@@ -973,7 +972,7 @@ static int core_get_turbo_pstate(void)
u64 value;
int nont, ret;
- rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
+ rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
nont = core_get_max_pstate();
ret = (value) & 255;
if (ret <= nont)
@@ -1002,7 +1001,7 @@ static int knl_get_turbo_pstate(void)
u64 value;
int nont, ret;
- rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
+ rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
nont = core_get_max_pstate();
ret = (((value) >> 8) & 0xFF);
if (ret <= nont)
@@ -1092,6 +1091,26 @@ static struct cpu_defaults knl_params = {
},
};
+static struct cpu_defaults bxt_params = {
+ .pid_policy = {
+ .sample_rate_ms = 10,
+ .deadband = 0,
+ .setpoint = 60,
+ .p_gain_pct = 14,
+ .d_gain_pct = 0,
+ .i_gain_pct = 4,
+ },
+ .funcs = {
+ .get_max = core_get_max_pstate,
+ .get_max_physical = core_get_max_pstate_physical,
+ .get_min = core_get_min_pstate,
+ .get_turbo = core_get_turbo_pstate,
+ .get_scaling = core_get_scaling,
+ .get_val = core_get_val,
+ .get_target_pstate = get_target_pstate_use_cpu_load,
+ },
+};
+
static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
{
int max_perf = cpu->pstate.turbo_pstate;
@@ -1114,17 +1133,12 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
}
-static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
-{
- trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
- cpu->pstate.current_pstate = pstate;
-}
-
static void intel_pstate_set_min_pstate(struct cpudata *cpu)
{
int pstate = cpu->pstate.min_pstate;
- intel_pstate_record_pstate(cpu, pstate);
+ trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
+ cpu->pstate.current_pstate = pstate;
/*
* Generally, there is no guarantee that this code will always run on
* the CPU being updated, so force the register update to run on the
@@ -1284,10 +1298,11 @@ static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
pstate = clamp_t(int, pstate, min_perf, max_perf);
+ trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
if (pstate == cpu->pstate.current_pstate)
return;
- intel_pstate_record_pstate(cpu, pstate);
+ cpu->pstate.current_pstate = pstate;
wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
}
@@ -1352,11 +1367,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params),
ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params),
+ ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params),
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
-static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
+static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
{}
};
@@ -1576,12 +1592,12 @@ static struct cpufreq_driver intel_pstate_driver = {
.name = "intel_pstate",
};
-static int __initdata no_load;
-static int __initdata no_hwp;
-static int __initdata hwp_only;
-static unsigned int force_load;
+static int no_load __initdata;
+static int no_hwp __initdata;
+static int hwp_only __initdata;
+static unsigned int force_load __initdata;
-static int intel_pstate_msrs_not_valid(void)
+static int __init intel_pstate_msrs_not_valid(void)
{
if (!pstate_funcs.get_max() ||
!pstate_funcs.get_min() ||
@@ -1591,7 +1607,7 @@ static int intel_pstate_msrs_not_valid(void)
return 0;
}
-static void copy_pid_params(struct pstate_adjust_policy *policy)
+static void __init copy_pid_params(struct pstate_adjust_policy *policy)
{
pid_params.sample_rate_ms = policy->sample_rate_ms;
pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
@@ -1602,7 +1618,7 @@ static void copy_pid_params(struct pstate_adjust_policy *policy)
pid_params.setpoint = policy->setpoint;
}
-static void copy_cpu_funcs(struct pstate_funcs *funcs)
+static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
{
pstate_funcs.get_max = funcs->get_max;
pstate_funcs.get_max_physical = funcs->get_max_physical;
@@ -1617,7 +1633,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
#ifdef CONFIG_ACPI
-static bool intel_pstate_no_acpi_pss(void)
+static bool __init intel_pstate_no_acpi_pss(void)
{
int i;
@@ -1646,7 +1662,7 @@ static bool intel_pstate_no_acpi_pss(void)
return true;
}
-static bool intel_pstate_has_acpi_ppc(void)
+static bool __init intel_pstate_has_acpi_ppc(void)
{
int i;
@@ -1674,7 +1690,7 @@ struct hw_vendor_info {
};
/* Hardware vendor-specific info that has its own power management modes */
-static struct hw_vendor_info vendor_info[] = {
+static struct hw_vendor_info vendor_info[] __initdata = {
{1, "HP ", "ProLiant", PSS},
{1, "ORACLE", "X4-2 ", PPC},
{1, "ORACLE", "X4-2L ", PPC},
@@ -1693,7 +1709,7 @@ static struct hw_vendor_info vendor_info[] = {
{0, "", ""},
};
-static bool intel_pstate_platform_pwr_mgmt_exists(void)
+static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
{
struct acpi_table_header hdr;
struct hw_vendor_info *v_info;