diff options
Diffstat (limited to 'arch/x86/kernel/cpu/resctrl/monitor.c')
| -rw-r--r-- | arch/x86/kernel/cpu/resctrl/monitor.c | 232 | 
1 files changed, 150 insertions, 82 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index eaf25a234ff5..efe0c30d3a12 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -16,8 +16,12 @@   */  #include <linux/module.h> +#include <linux/sizes.h>  #include <linux/slab.h> +  #include <asm/cpu_device_id.h> +#include <asm/resctrl.h> +  #include "internal.h"  struct rmid_entry { @@ -37,8 +41,8 @@ static LIST_HEAD(rmid_free_lru);   * @rmid_limbo_count     count of currently unused but (potentially)   *     dirty RMIDs.   *     This counts RMIDs that no one is currently using but that - *     may have a occupancy value > intel_cqm_threshold. User can change - *     the threshold occupancy value. + *     may have a occupancy value > resctrl_rmid_realloc_threshold. User can + *     change the threshold occupancy value.   */  static unsigned int rmid_limbo_count; @@ -59,10 +63,15 @@ bool rdt_mon_capable;  unsigned int rdt_mon_features;  /* - * This is the threshold cache occupancy at which we will consider an + * This is the threshold cache occupancy in bytes at which we will consider an   * RMID available for re-allocation.   */ -unsigned int resctrl_cqm_threshold; +unsigned int resctrl_rmid_realloc_threshold; + +/* + * This is the maximum value for the reallocation threshold, in bytes. + */ +unsigned int resctrl_rmid_realloc_limit;  #define CF(cf)	((unsigned long)(1048576 * (cf) + 0.5)) @@ -137,9 +146,54 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid)  	return entry;  } -static u64 __rmid_read(u32 rmid, u32 eventid) +static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, +						 u32 rmid, +						 enum resctrl_event_id eventid)  { -	u64 val; +	switch (eventid) { +	case QOS_L3_OCCUP_EVENT_ID: +		return NULL; +	case QOS_L3_MBM_TOTAL_EVENT_ID: +		return &hw_dom->arch_mbm_total[rmid]; +	case QOS_L3_MBM_LOCAL_EVENT_ID: +		return &hw_dom->arch_mbm_local[rmid]; +	} + +	/* Never expect to get here */ +	WARN_ON_ONCE(1); + +	return NULL; +} + +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, +			     u32 rmid, enum resctrl_event_id eventid) +{ +	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); +	struct arch_mbm_state *am; + +	am = get_arch_mbm_state(hw_dom, rmid, eventid); +	if (am) +		memset(am, 0, sizeof(*am)); +} + +static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) +{ +	u64 shift = 64 - width, chunks; + +	chunks = (cur_msr << shift) - (prev_msr << shift); +	return chunks >> shift; +} + +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, +			   u32 rmid, enum resctrl_event_id eventid, u64 *val) +{ +	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); +	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); +	struct arch_mbm_state *am; +	u64 msr_val, chunks; + +	if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) +		return -EINVAL;  	/*  	 * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured @@ -150,16 +204,26 @@ static u64 __rmid_read(u32 rmid, u32 eventid)  	 * are error bits.  	 */  	wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); -	rdmsrl(MSR_IA32_QM_CTR, val); - -	return val; -} +	rdmsrl(MSR_IA32_QM_CTR, msr_val); + +	if (msr_val & RMID_VAL_ERROR) +		return -EIO; +	if (msr_val & RMID_VAL_UNAVAIL) +		return -EINVAL; + +	am = get_arch_mbm_state(hw_dom, rmid, eventid); +	if (am) { +		am->chunks += mbm_overflow_count(am->prev_msr, msr_val, +						 hw_res->mbm_width); +		chunks = get_corrected_mbm_count(rmid, am->chunks); +		am->prev_msr = msr_val; +	} else { +		chunks = msr_val; +	} -static bool rmid_dirty(struct rmid_entry *entry) -{ -	u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); +	*val = chunks * hw_res->mon_scale; -	return val >= resctrl_cqm_threshold; +	return 0;  }  /* @@ -170,11 +234,11 @@ static bool rmid_dirty(struct rmid_entry *entry)   */  void __check_limbo(struct rdt_domain *d, bool force_free)  { +	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;  	struct rmid_entry *entry; -	struct rdt_resource *r;  	u32 crmid = 1, nrmid; - -	r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; +	bool rmid_dirty; +	u64 val = 0;  	/*  	 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that @@ -188,7 +252,15 @@ void __check_limbo(struct rdt_domain *d, bool force_free)  			break;  		entry = __rmid_entry(nrmid); -		if (force_free || !rmid_dirty(entry)) { + +		if (resctrl_arch_rmid_read(r, d, entry->rmid, +					   QOS_L3_OCCUP_EVENT_ID, &val)) { +			rmid_dirty = true; +		} else { +			rmid_dirty = (val >= resctrl_rmid_realloc_threshold); +		} + +		if (force_free || !rmid_dirty) {  			clear_bit(entry->rmid, d->rmid_busy_llc);  			if (!--entry->busy) {  				rmid_limbo_count--; @@ -227,19 +299,19 @@ int alloc_rmid(void)  static void add_rmid_to_limbo(struct rmid_entry *entry)  { -	struct rdt_resource *r; +	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;  	struct rdt_domain *d; -	int cpu; -	u64 val; - -	r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; +	int cpu, err; +	u64 val = 0;  	entry->busy = 0;  	cpu = get_cpu();  	list_for_each_entry(d, &r->domains, list) {  		if (cpumask_test_cpu(cpu, &d->cpu_mask)) { -			val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); -			if (val <= resctrl_cqm_threshold) +			err = resctrl_arch_rmid_read(r, d, entry->rmid, +						     QOS_L3_OCCUP_EVENT_ID, +						     &val); +			if (err || val <= resctrl_rmid_realloc_threshold)  				continue;  		} @@ -277,24 +349,18 @@ void free_rmid(u32 rmid)  		list_add_tail(&entry->list, &rmid_free_lru);  } -static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) +static int __mon_event_count(u32 rmid, struct rmid_read *rr)  { -	u64 shift = 64 - width, chunks; +	struct mbm_state *m; +	u64 tval = 0; -	chunks = (cur_msr << shift) - (prev_msr << shift); -	return chunks >> shift; -} +	if (rr->first) +		resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid); -static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) -{ -	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r); -	struct mbm_state *m; -	u64 chunks, tval; +	rr->err = resctrl_arch_rmid_read(rr->r, rr->d, rmid, rr->evtid, &tval); +	if (rr->err) +		return rr->err; -	tval = __rmid_read(rmid, rr->evtid); -	if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { -		return tval; -	}  	switch (rr->evtid) {  	case QOS_L3_OCCUP_EVENT_ID:  		rr->val += tval; @@ -308,48 +374,47 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)  	default:  		/*  		 * Code would never reach here because an invalid -		 * event id would fail the __rmid_read. +		 * event id would fail in resctrl_arch_rmid_read().  		 */ -		return RMID_VAL_ERROR; +		return -EINVAL;  	}  	if (rr->first) {  		memset(m, 0, sizeof(struct mbm_state)); -		m->prev_bw_msr = m->prev_msr = tval;  		return 0;  	} -	chunks = mbm_overflow_count(m->prev_msr, tval, hw_res->mbm_width); -	m->chunks += chunks; -	m->prev_msr = tval; - -	rr->val += get_corrected_mbm_count(rmid, m->chunks); +	rr->val += tval;  	return 0;  }  /* + * mbm_bw_count() - Update bw count from values previously read by + *		    __mon_event_count(). + * @rmid:	The rmid used to identify the cached mbm_state. + * @rr:		The struct rmid_read populated by __mon_event_count(). + *   * Supporting function to calculate the memory bandwidth - * and delta bandwidth in MBps. + * and delta bandwidth in MBps. The chunks value previously read by + * __mon_event_count() is compared with the chunks value from the previous + * invocation. This must be called once per second to maintain values in MBps.   */  static void mbm_bw_count(u32 rmid, struct rmid_read *rr)  { -	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);  	struct mbm_state *m = &rr->d->mbm_local[rmid]; -	u64 tval, cur_bw, chunks; +	u64 cur_bw, bytes, cur_bytes; -	tval = __rmid_read(rmid, rr->evtid); -	if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) -		return; +	cur_bytes = rr->val; +	bytes = cur_bytes - m->prev_bw_bytes; +	m->prev_bw_bytes = cur_bytes; -	chunks = mbm_overflow_count(m->prev_bw_msr, tval, hw_res->mbm_width); -	cur_bw = (get_corrected_mbm_count(rmid, chunks) * hw_res->mon_scale) >> 20; +	cur_bw = bytes / SZ_1M;  	if (m->delta_comp)  		m->delta_bw = abs(cur_bw - m->prev_bw);  	m->delta_comp = false;  	m->prev_bw = cur_bw; -	m->prev_bw_msr = tval;  }  /* @@ -361,11 +426,11 @@ void mon_event_count(void *info)  	struct rdtgroup *rdtgrp, *entry;  	struct rmid_read *rr = info;  	struct list_head *head; -	u64 ret_val; +	int ret;  	rdtgrp = rr->rgrp; -	ret_val = __mon_event_count(rdtgrp->mon.rmid, rr); +	ret = __mon_event_count(rdtgrp->mon.rmid, rr);  	/*  	 * For Ctrl groups read data from child monitor groups and @@ -377,13 +442,17 @@ void mon_event_count(void *info)  	if (rdtgrp->type == RDTCTRL_GROUP) {  		list_for_each_entry(entry, head, mon.crdtgrp_list) {  			if (__mon_event_count(entry->mon.rmid, rr) == 0) -				ret_val = 0; +				ret = 0;  		}  	} -	/* Report error if none of rmid_reads are successful */ -	if (ret_val) -		rr->val = ret_val; +	/* +	 * __mon_event_count() calls for newly created monitor groups may +	 * report -EINVAL/Unavailable if the monitor hasn't seen any traffic. +	 * Discard error if any of the monitor event reads succeeded. +	 */ +	if (ret == 0) +		rr->err = 0;  }  /* @@ -420,10 +489,8 @@ void mon_event_count(void *info)   */  static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)  { -	u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val; +	u32 closid, rmid, cur_msr_val, new_msr_val;  	struct mbm_state *pmbm_data, *cmbm_data; -	struct rdt_hw_resource *hw_r_mba; -	struct rdt_hw_domain *hw_dom_mba;  	u32 cur_bw, delta_bw, user_bw;  	struct rdt_resource *r_mba;  	struct rdt_domain *dom_mba; @@ -433,8 +500,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)  	if (!is_mbm_local_enabled())  		return; -	hw_r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; -	r_mba = &hw_r_mba->r_resctrl; +	r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; +  	closid = rgrp->closid;  	rmid = rgrp->mon.rmid;  	pmbm_data = &dom_mbm->mbm_local[rmid]; @@ -444,16 +511,13 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)  		pr_warn_once("Failure to get domain for MBA update\n");  		return;  	} -	hw_dom_mba = resctrl_to_arch_dom(dom_mba);  	cur_bw = pmbm_data->prev_bw; -	user_bw = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); +	user_bw = dom_mba->mbps_val[closid];  	delta_bw = pmbm_data->delta_bw; -	/* -	 * resctrl_arch_get_config() chooses the mbps/ctrl value to return -	 * based on is_mba_sc(). For now, reach into the hw_dom. -	 */ -	cur_msr_val = hw_dom_mba->ctrl_val[closid]; + +	/* MBA resource doesn't support CDP */ +	cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);  	/*  	 * For Ctrl groups read data from child monitor groups. @@ -488,9 +552,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)  		return;  	} -	cur_msr = hw_r_mba->msr_base + closid; -	wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba)); -	hw_dom_mba->ctrl_val[closid] = new_msr_val; +	resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);  	/*  	 * Delta values are updated dynamically package wise for each @@ -523,10 +585,12 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)  	 */  	if (is_mbm_total_enabled()) {  		rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID; +		rr.val = 0;  		__mon_event_count(rmid, &rr);  	}  	if (is_mbm_local_enabled()) {  		rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; +		rr.val = 0;  		__mon_event_count(rmid, &rr);  		/* @@ -686,9 +750,10 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)  {  	unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;  	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); -	unsigned int cl_size = boot_cpu_data.x86_cache_size; +	unsigned int threshold;  	int ret; +	resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;  	hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale;  	r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;  	hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; @@ -705,10 +770,14 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)  	 *  	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.  	 */ -	resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid; +	threshold = resctrl_rmid_realloc_limit / r->num_rmid; -	/* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ -	resctrl_cqm_threshold /= hw_res->mon_scale; +	/* +	 * Because num_rmid may not be a power of two, round the value +	 * to the nearest multiple of hw_res->mon_scale so it matches a +	 * value the hardware will measure. mon_scale may not be a power of 2. +	 */ +	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold);  	ret = dom_data_init(r);  	if (ret) @@ -717,7 +786,6 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)  	l3_mon_evt_init(r);  	r->mon_capable = true; -	r->mon_enabled = true;  	return 0;  }  |