diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/aperfmperf.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpuid-deps.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/amd.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 32 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/dev-mcelog.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/internal.h | 44 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/core.c | 54 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/internal.h | 28 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/monitor.c | 79 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/rdtgroup.c | 319 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/scattered.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/vmware.c | 2 |
15 files changed, 521 insertions, 91 deletions
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 1f60a2b27936..fdbb5f07448f 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -330,7 +330,16 @@ static void __init bp_init_freq_invariance(void) static void disable_freq_invariance_workfn(struct work_struct *work) { + int cpu; + static_branch_disable(&arch_scale_freq_key); + + /* + * Set arch_freq_scale to a default value on all cpus + * This negates the effect of scaling + */ + for_each_possible_cpu(cpu) + per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE; } static DECLARE_WORK(disable_freq_invariance_work, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d970ddb0cc65..85168740f76a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -86,7 +86,7 @@ void update_spec_ctrl_cond(u64 val) wrmsrl(MSR_IA32_SPEC_CTRL, val); } -u64 spec_ctrl_current(void) +noinstr u64 spec_ctrl_current(void) { return this_cpu_read(x86_spec_ctrl_current); } @@ -1981,6 +1981,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); task_update_spec_tif(task); + if (task == current) + indirect_branch_prediction_barrier(); break; default: return -ERANGE; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5ff73baa5583..6a25e93f2a87 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1256,6 +1256,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define MMIO_SBDS BIT(2) /* CPU is affected by RETbleed, speculating where you would not expect it */ #define RETBLEED BIT(3) +/* CPU is affected by SMT (cross-thread) return predictions */ +#define SMT_RSB BIT(4) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1287,8 +1289,8 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED), - VULNBL_HYGON(0x18, RETBLEED), + VULNBL_AMD(0x17, RETBLEED | SMT_RSB), + VULNBL_HYGON(0x18, RETBLEED | SMT_RSB), {} }; @@ -1406,6 +1408,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !(ia32_cap & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) + setup_force_cpu_bug(X86_BUG_SMT_RSB); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index d95221117129..f6748c8bd647 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -68,6 +68,8 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC }, { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_TOTAL }, + { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_LOCAL }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 10fb5b5c9efa..23c5072fbbb7 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -306,6 +306,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu) if ((low & BIT(5)) && !((high >> 5) & 0x3)) high |= BIT(5); + this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8)); + wrmsr(smca_config, low, high); } @@ -736,15 +738,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) if (m.status & MCI_STATUS_ADDRV) { m.addr = addr; - /* - * Extract [55:<lsb>] where lsb is the least significant - * *valid* bit of the address bits. - */ - if (mce_flags.smca) { - u8 lsb = (m.addr >> 56) & 0x3f; - - m.addr &= GENMASK_ULL(55, lsb); - } + smca_extract_err_addr(&m); } if (mce_flags.smca) { diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 2c8ec5c71712..7832a69d170e 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -67,13 +67,7 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); -struct mce_bank { - u64 ctl; /* subevents to enable */ - - __u64 init : 1, /* initialise bank? */ - __reserved_1 : 63; -}; -static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); +DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); #define ATTR_LEN 16 /* One object for each MCE bank, shared by all CPUs */ @@ -579,7 +573,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, mce->severity != MCE_DEFERRED_SEVERITY) return NOTIFY_DONE; - pfn = mce->addr >> PAGE_SHIFT; + pfn = (mce->addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT; if (!memory_failure(pfn, 0)) { set_mce_nospec(pfn); mce->kflags |= MCE_HANDLED_UC; @@ -633,15 +627,7 @@ static noinstr void mce_read_aux(struct mce *m, int i) m->addr <<= shift; } - /* - * Extract [55:<lsb>] where lsb is the least significant - * *valid* bit of the address bits. - */ - if (mce_flags.smca) { - u8 lsb = (m->addr >> 56) & 0x3f; - - m->addr &= GENMASK_ULL(55, lsb); - } + smca_extract_err_addr(m); } if (mce_flags.smca) { @@ -1308,6 +1294,7 @@ static void kill_me_maybe(struct callback_head *cb) { struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); int flags = MF_ACTION_REQUIRED; + unsigned long pfn; int ret; p->mce_count = 0; @@ -1316,9 +1303,10 @@ static void kill_me_maybe(struct callback_head *cb) if (!p->mce_ripv) flags |= MF_MUST_KILL; - ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags); + pfn = (p->mce_addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT; + ret = memory_failure(pfn, flags); if (!ret) { - set_mce_nospec(p->mce_addr >> PAGE_SHIFT); + set_mce_nospec(pfn); sync_core(); return; } @@ -1340,11 +1328,13 @@ static void kill_me_maybe(struct callback_head *cb) static void kill_me_never(struct callback_head *cb) { struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); + unsigned long pfn; p->mce_count = 0; pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr); - if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0)) - set_mce_nospec(p->mce_addr >> PAGE_SHIFT); + pfn = (p->mce_addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT; + if (!memory_failure(pfn, 0)) + set_mce_nospec(pfn); } static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *)) diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c index 100fbeebdc72..a05ac0716ecf 100644 --- a/arch/x86/kernel/cpu/mce/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c @@ -105,8 +105,7 @@ static ssize_t set_trigger(struct device *s, struct device_attribute *attr, { char *p; - strncpy(mce_helper, buf, sizeof(mce_helper)); - mce_helper[sizeof(mce_helper)-1] = 0; + strscpy(mce_helper, buf, sizeof(mce_helper)); p = strchr(mce_helper, '\n'); if (p) diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 7e03f5b7f6bd..91a415553c27 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -177,6 +177,24 @@ struct mce_vendor_flags { extern struct mce_vendor_flags mce_flags; +struct mce_bank { + /* subevents to enable */ + u64 ctl; + + /* initialise bank? */ + __u64 init : 1, + + /* + * (AMD) MCA_CONFIG[McaLsbInStatusSupported]: When set, this bit indicates + * the LSB field is found in MCA_STATUS and not in MCA_ADDR. + */ + lsb_in_status : 1, + + __reserved_1 : 62; +}; + +DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); + enum mca_msr { MCA_CTL, MCA_STATUS, @@ -189,8 +207,34 @@ extern bool filter_mce(struct mce *m); #ifdef CONFIG_X86_MCE_AMD extern bool amd_filter_mce(struct mce *m); + +/* + * If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits + * [56:0] of MCA_STATUS, else in bits [55:0] of MCA_ADDR. + */ +static __always_inline void smca_extract_err_addr(struct mce *m) +{ + u8 lsb; + + if (!mce_flags.smca) + return; + + if (this_cpu_ptr(mce_banks_array)[m->bank].lsb_in_status) { + lsb = (m->status >> 24) & 0x3f; + + m->addr &= GENMASK_ULL(56, lsb); + + return; + } + + lsb = (m->addr >> 56) & 0x3f; + + m->addr &= GENMASK_ULL(55, lsb); +} + #else static inline bool amd_filter_mce(struct mce *m) { return false; } +static inline void smca_extract_err_addr(struct mce *m) { } #endif #ifdef CONFIG_X86_ANCIENT_MCE diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index c98e52ff5f20..030d3b409768 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -100,6 +100,18 @@ struct rdt_hw_resource rdt_resources_all[] = { .fflags = RFTYPE_RES_MB, }, }, + [RDT_RESOURCE_SMBA] = + { + .r_resctrl = { + .rid = RDT_RESOURCE_SMBA, + .name = "SMBA", + .cache_level = 3, + .domains = domain_init(RDT_RESOURCE_SMBA), + .parse_ctrlval = parse_bw, + .format_str = "%d=%*u", + .fflags = RFTYPE_RES_MB, + }, + }, }; /* @@ -150,6 +162,13 @@ bool is_mba_sc(struct rdt_resource *r) if (!r) return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; + /* + * The software controller support is only applicable to MBA resource. + * Make sure to check for resource type. + */ + if (r->rid != RDT_RESOURCE_MBA) + return false; + return r->membw.mba_sc; } @@ -213,9 +232,15 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r) struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_3_eax eax; union cpuid_0x10_x_edx edx; - u32 ebx, ecx; + u32 ebx, ecx, subleaf; + + /* + * Query CPUID_Fn80000020_EDX_x01 for MBA and + * CPUID_Fn80000020_EDX_x02 for SMBA + */ + subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1; - cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full); + cpuid_count(0x80000020, subleaf, &eax.full, &ebx, &ecx, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; r->default_ctrl = MAX_MBA_BW_AMD; @@ -647,6 +672,8 @@ enum { RDT_FLAG_L2_CAT, RDT_FLAG_L2_CDP, RDT_FLAG_MBA, + RDT_FLAG_SMBA, + RDT_FLAG_BMEC, }; #define RDT_OPT(idx, n, f) \ @@ -670,6 +697,8 @@ static struct rdt_options rdt_options[] __initdata = { RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2), RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), + RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), + RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) @@ -699,7 +728,7 @@ static int __init set_rdt_options(char *str) } __setup("rdt", set_rdt_options); -static bool __init rdt_cpu_has(int flag) +bool __init rdt_cpu_has(int flag) { bool ret = boot_cpu_has(flag); struct rdt_options *o; @@ -734,6 +763,19 @@ static __init bool get_mem_config(void) return false; } +static __init bool get_slow_mem_config(void) +{ + struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA]; + + if (!rdt_cpu_has(X86_FEATURE_SMBA)) + return false; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return __rdt_get_mem_config_amd(&hw_res->r_resctrl); + + return false; +} + static __init bool get_rdt_alloc_resources(void) { struct rdt_resource *r; @@ -764,6 +806,9 @@ static __init bool get_rdt_alloc_resources(void) if (get_mem_config()) ret = true; + if (get_slow_mem_config()) + ret = true; + return ret; } @@ -853,6 +898,9 @@ static __init void rdt_init_res_defs_amd(void) } else if (r->rid == RDT_RESOURCE_MBA) { hw_res->msr_base = MSR_IA32_MBA_BW_BASE; hw_res->msr_update = mba_wrmsr_amd; + } else if (r->rid == RDT_RESOURCE_SMBA) { + hw_res->msr_base = MSR_IA32_SMBA_BW_BASE; + hw_res->msr_update = mba_wrmsr_amd; } } } diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 1df0e3262bca..eb07d4435391 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -209,7 +209,7 @@ static int parse_line(char *line, struct resctrl_schema *s, unsigned long dom_id; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && - r->rid == RDT_RESOURCE_MBA) { + (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); return -EINVAL; } @@ -310,7 +310,6 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) enum resctrl_conf_type t; cpumask_var_t cpu_mask; struct rdt_domain *d; - int cpu; u32 idx; if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) @@ -341,13 +340,9 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) if (cpumask_empty(cpu_mask)) goto done; - cpu = get_cpu(); - /* Update resource control msr on this CPU if it's in cpu_mask. */ - if (cpumask_test_cpu(cpu, cpu_mask)) - rdt_ctrl_update(&msr_param); - /* Update resource control msr on other CPUs. */ - smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1); - put_cpu(); + + /* Update resource control msr on all the CPUs. */ + on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); done: free_cpumask_var(cpu_mask); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 5ebd28e6aa0c..8edecc5763d8 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -30,6 +30,29 @@ */ #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) +/* Reads to Local DRAM Memory */ +#define READS_TO_LOCAL_MEM BIT(0) + +/* Reads to Remote DRAM Memory */ +#define READS_TO_REMOTE_MEM BIT(1) + +/* Non-Temporal Writes to Local Memory */ +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) + +/* Non-Temporal Writes to Remote Memory */ +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) + +/* Reads to Local Memory the system identifies as "Slow Memory" */ +#define READS_TO_LOCAL_S_MEM BIT(4) + +/* Reads to Remote Memory the system identifies as "Slow Memory" */ +#define READS_TO_REMOTE_S_MEM BIT(5) + +/* Dirty Victims to All Types of Memory */ +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) + +/* Max event bits supported */ +#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) struct rdt_fs_context { struct kernfs_fs_context kfc; @@ -52,11 +75,13 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); * struct mon_evt - Entry in the event list of a resource * @evtid: event id * @name: name of the event + * @configurable: true if the event is configurable * @list: entry in &rdt_resource->evt_list */ struct mon_evt { enum resctrl_event_id evtid; char *name; + bool configurable; struct list_head list; }; @@ -409,6 +434,7 @@ enum resctrl_res_level { RDT_RESOURCE_L3, RDT_RESOURCE_L2, RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, /* Must be the last */ RDT_NUM_RESOURCES, @@ -511,6 +537,7 @@ void closid_free(int closid); int alloc_rmid(void); void free_rmid(u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); +bool __init rdt_cpu_has(int flag); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, @@ -527,5 +554,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); void __init thread_throttle_mode_init(void); +void __init mbm_config_rftype_init(const char *config); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index efe0c30d3a12..7fe51488e136 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid) return entry; } +static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +{ + u64 msr_val; + + /* + * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured + * with a valid event code for supported resource type and the bits + * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, + * IA32_QM_CTR.data (bits 61:0) reports the monitored data. + * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) + * are error bits. + */ + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + rdmsrl(MSR_IA32_QM_CTR, msr_val); + + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + *val = msr_val; + return 0; +} + static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) @@ -172,8 +196,29 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, struct arch_mbm_state *am; am = get_arch_mbm_state(hw_dom, rmid, eventid); - if (am) + if (am) { memset(am, 0, sizeof(*am)); + + /* Record any initial, non-zero count value. */ + __rmid_read(rmid, eventid, &am->prev_msr); + } +} + +/* + * Assumes that hardware counters are also reset and thus that there is + * no need to record initial non-zero counts. + */ +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d) +{ + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + + if (is_mbm_total_enabled()) + memset(hw_dom->arch_mbm_total, 0, + sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); + + if (is_mbm_local_enabled()) + memset(hw_dom->arch_mbm_local, 0, + sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); } static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) @@ -191,25 +236,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct arch_mbm_state *am; u64 msr_val, chunks; + int ret; if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) return -EINVAL; - /* - * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured - * with a valid event code for supported resource type and the bits - * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, - * IA32_QM_CTR.data (bits 61:0) reports the monitored data. - * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) - * are error bits. - */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); - rdmsrl(MSR_IA32_QM_CTR, msr_val); - - if (msr_val & RMID_VAL_ERROR) - return -EIO; - if (msr_val & RMID_VAL_UNAVAIL) - return -EINVAL; + ret = __rmid_read(rmid, eventid, &msr_val); + if (ret) + return ret; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { @@ -746,7 +780,7 @@ static void l3_mon_evt_init(struct rdt_resource *r) list_add_tail(&mbm_local_event.list, &r->evt_list); } -int rdt_get_mon_l3_config(struct rdt_resource *r) +int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); @@ -783,6 +817,17 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) if (ret) return ret; + if (rdt_cpu_has(X86_FEATURE_BMEC)) { + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { + mbm_total_event.configurable = true; + mbm_config_rftype_init("mbm_total_bytes_config"); + } + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { + mbm_local_event.configurable = true; + mbm_config_rftype_init("mbm_local_bytes_config"); + } + } + l3_mon_evt_init(r); r->mon_capable = true; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e5a48f05e787..e2c1599d1b37 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -325,12 +325,7 @@ static void update_cpu_closid_rmid(void *info) static void update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) { - int cpu = get_cpu(); - - if (cpumask_test_cpu(cpu, cpu_mask)) - update_cpu_closid_rmid(r); - smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1); - put_cpu(); + on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1); } static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, @@ -580,8 +575,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk, /* * Ensure the task's closid and rmid are written before determining if * the task is current that will decide if it will be interrupted. + * This pairs with the full barrier between the rq->curr update and + * resctrl_sched_in() during context switch. */ - barrier(); + smp_mb(); /* * By now, the task's closid and rmid are set. If the task is current @@ -1001,8 +998,11 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, struct rdt_resource *r = of->kn->parent->priv; struct mon_evt *mevt; - list_for_each_entry(mevt, &r->evt_list, list) + list_for_each_entry(mevt, &r->evt_list, list) { seq_printf(seq, "%s\n", mevt->name); + if (mevt->configurable) + seq_printf(seq, "%s_config\n", mevt->name); + } return 0; } @@ -1213,7 +1213,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; - if (r->rid == RDT_RESOURCE_MBA) + if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) continue; has_cache = true; list_for_each_entry(d, &r->domains, list) { @@ -1402,7 +1402,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, ctrl = resctrl_arch_get_config(r, d, closid, type); - if (r->rid == RDT_RESOURCE_MBA) + if (r->rid == RDT_RESOURCE_MBA || + r->rid == RDT_RESOURCE_SMBA) size = ctrl; else size = rdtgroup_cbm_to_size(r, d, ctrl); @@ -1419,6 +1420,248 @@ out: return ret; } +struct mon_config_info { + u32 evtid; + u32 mon_config; +}; + +#define INVALID_CONFIG_INDEX UINT_MAX + +/** + * mon_event_config_index_get - get the hardware index for the + * configurable event + * @evtid: event id. + * + * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID + * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID + * INVALID_CONFIG_INDEX for invalid evtid + */ +static inline unsigned int mon_event_config_index_get(u32 evtid) +{ + switch (evtid) { + case QOS_L3_MBM_TOTAL_EVENT_ID: + return 0; + case QOS_L3_MBM_LOCAL_EVENT_ID: + return 1; + default: + /* Should never reach here */ + return INVALID_CONFIG_INDEX; + } +} + +static void mon_event_config_read(void *info) +{ + struct mon_config_info *mon_info = info; + unsigned int index; + u64 msrval; + + index = mon_event_config_index_get(mon_info->evtid); + if (index == INVALID_CONFIG_INDEX) { + pr_warn_once("Invalid event id %d\n", mon_info->evtid); + return; + } + rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); + + /* Report only the valid event configuration bits */ + mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; +} + +static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info) +{ + smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1); +} + +static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) +{ + struct mon_config_info mon_info = {0}; + struct rdt_domain *dom; + bool sep = false; + + mutex_lock(&rdtgroup_mutex); + + list_for_each_entry(dom, &r->domains, list) { + if (sep) + seq_puts(s, ";"); + + memset(&mon_info, 0, sizeof(struct mon_config_info)); + mon_info.evtid = evtid; + mondata_config_read(dom, &mon_info); + + seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config); + sep = true; + } + seq_puts(s, "\n"); + + mutex_unlock(&rdtgroup_mutex); + + return 0; +} + +static int mbm_total_bytes_config_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + + mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); + + return 0; +} + +static int mbm_local_bytes_config_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + + mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); + + return 0; +} + +static void mon_event_config_write(void *info) +{ + struct mon_config_info *mon_info = info; + unsigned int index; + + index = mon_event_config_index_get(mon_info->evtid); + if (index == INVALID_CONFIG_INDEX) { + pr_warn_once("Invalid event id %d\n", mon_info->evtid); + return; + } + wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); +} + +static int mbm_config_write_domain(struct rdt_resource *r, + struct rdt_domain *d, u32 evtid, u32 val) +{ + struct mon_config_info mon_info = {0}; + int ret = 0; + + /* mon_config cannot be more than the supported set of events */ + if (val > MAX_EVT_CONFIG_BITS) { + rdt_last_cmd_puts("Invalid event configuration\n"); + return -EINVAL; + } + + /* + * Read the current config value first. If both are the same then + * no need to write it again. + */ + mon_info.evtid = evtid; + mondata_config_read(d, &mon_info); + if (mon_info.mon_config == val) + goto out; + + mon_info.mon_config = val; + + /* + * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the + * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE + * are scoped at the domain level. Writing any of these MSRs + * on one CPU is observed by all the CPUs in the domain. + */ + smp_call_function_any(&d->cpu_mask, mon_event_config_write, + &mon_info, 1); + + /* + * When an Event Configuration is changed, the bandwidth counters + * for all RMIDs and Events will be cleared by the hardware. The + * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for + * every RMID on the next read to any event for every RMID. + * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62) + * cleared while it is tracked by the hardware. Clear the + * mbm_local and mbm_total counts for all the RMIDs. + */ + resctrl_arch_reset_rmid_all(r, d); + +out: + return ret; +} + +static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) +{ + char *dom_str = NULL, *id_str; + unsigned long dom_id, val; + struct rdt_domain *d; + int ret = 0; + +next: + if (!tok || tok[0] == '\0') + return 0; + + /* Start processing the strings for each domain */ + dom_str = strim(strsep(&tok, ";")); + id_str = strsep(&dom_str, "="); + + if (!id_str || kstrtoul(id_str, 10, &dom_id)) { + rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n"); + return -EINVAL; + } + + if (!dom_str || kstrtoul(dom_str, 16, &val)) { + rdt_last_cmd_puts("Non-numeric event configuration value\n"); + return -EINVAL; + } + + list_for_each_entry(d, &r->domains, list) { + if (d->id == dom_id) { + ret = mbm_config_write_domain(r, d, evtid, val); + if (ret) + return -EINVAL; + goto next; + } + } + + return -EINVAL; +} + +static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + struct rdt_resource *r = of->kn->parent->priv; + int ret; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + buf[nbytes - 1] = '\0'; + + ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); + + mutex_unlock(&rdtgroup_mutex); + + return ret ?: nbytes; +} + +static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + struct rdt_resource *r = of->kn->parent->priv; + int ret; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + buf[nbytes - 1] = '\0'; + + ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID); + + mutex_unlock(&rdtgroup_mutex); + + return ret ?: nbytes; +} + /* rdtgroup information files for one cache resource. */ static struct rftype res_common_files[] = { { @@ -1518,6 +1761,20 @@ static struct rftype res_common_files[] = { .fflags = RF_MON_INFO | RFTYPE_RES_CACHE, }, { + .name = "mbm_total_bytes_config", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = mbm_total_bytes_config_show, + .write = mbm_total_bytes_config_write, + }, + { + .name = "mbm_local_bytes_config", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = mbm_local_bytes_config_show, + .write = mbm_local_bytes_config_write, + }, + { .name = "cpus", .mode = 0644, .kf_ops = &rdtgroup_kf_single_ops, @@ -1623,6 +1880,15 @@ void __init thread_throttle_mode_init(void) rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB; } +void __init mbm_config_rftype_init(const char *config) +{ + struct rftype *rft; + + rft = rdtgroup_get_rftype_by_name(config); + if (rft) + rft->fflags = RF_MON_INFO | RFTYPE_RES_CACHE; +} + /** * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file * @r: The resource group with which the file is associated. @@ -1864,13 +2130,9 @@ static int set_cache_qos_cfg(int level, bool enable) /* Pick one CPU from each domain instance to update MSR */ cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } - cpu = get_cpu(); - /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ - if (cpumask_test_cpu(cpu, cpu_mask)) - update(&enable); - /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ - smp_call_function_many(cpu_mask, update, &enable, 1); - put_cpu(); + + /* Update QOS_CFG MSR on all the CPUs in cpu_mask */ + on_each_cpu_mask(cpu_mask, update, &enable, 1); free_cpumask_var(cpu_mask); @@ -2347,7 +2609,7 @@ static int reset_all_ctrls(struct rdt_resource *r) struct msr_param msr_param; cpumask_var_t cpu_mask; struct rdt_domain *d; - int i, cpu; + int i; if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; @@ -2368,13 +2630,9 @@ static int reset_all_ctrls(struct rdt_resource *r) for (i = 0; i < hw_res->num_closid; i++) hw_dom->ctrl_val[i] = r->default_ctrl; } - cpu = get_cpu(); - /* Update CBM on this cpu if it's in cpu_mask. */ - if (cpumask_test_cpu(cpu, cpu_mask)) - rdt_ctrl_update(&msr_param); - /* Update CBM on all other cpus in cpu_mask. */ - smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1); - put_cpu(); + + /* Update CBM on all the CPUs in cpu_mask */ + on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); free_cpumask_var(cpu_mask); @@ -2402,6 +2660,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, WRITE_ONCE(t->rmid, to->mon.rmid); /* + * Order the closid/rmid stores above before the loads + * in task_curr(). This pairs with the full barrier + * between the rq->curr update and resctrl_sched_in() + * during context switch. + */ + smp_mb(); + + /* * If the task is on a CPU, set the CPU in the mask. * The detection is inaccurate as tasks might move or * schedule before the smp function call takes place. @@ -2845,7 +3111,8 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; - if (r->rid == RDT_RESOURCE_MBA) { + if (r->rid == RDT_RESOURCE_MBA || + r->rid == RDT_RESOURCE_SMBA) { rdtgroup_init_mba(r, rdtgrp->closid); if (is_mba_sc(r)) continue; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index f53944fb8f7f..0dad49a09b7a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -45,6 +45,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, + { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, + { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 02039ec3597d..11f83d07925e 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -143,7 +143,7 @@ static __init int parse_no_stealacc(char *arg) } early_param("no-steal-acc", parse_no_stealacc); -static unsigned long long notrace vmware_sched_clock(void) +static noinstr u64 vmware_sched_clock(void) { unsigned long long ns; |