diff options
Diffstat (limited to 'arch/x86/kvm/vmx/pmu_intel.c')
| -rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 220 | 
1 files changed, 85 insertions, 135 deletions
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 315c7c2ba89b..12ade343a17e 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -20,53 +20,19 @@  #include "nested.h"  #include "pmu.h" -#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) - -enum intel_pmu_architectural_events { -	/* -	 * The order of the architectural events matters as support for each -	 * event is enumerated via CPUID using the index of the event. -	 */ -	INTEL_ARCH_CPU_CYCLES, -	INTEL_ARCH_INSTRUCTIONS_RETIRED, -	INTEL_ARCH_REFERENCE_CYCLES, -	INTEL_ARCH_LLC_REFERENCES, -	INTEL_ARCH_LLC_MISSES, -	INTEL_ARCH_BRANCHES_RETIRED, -	INTEL_ARCH_BRANCHES_MISPREDICTED, - -	NR_REAL_INTEL_ARCH_EVENTS, - -	/* -	 * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a. -	 * TSC reference cycles.  The architectural reference cycles event may -	 * or may not actually use the TSC as the reference, e.g. might use the -	 * core crystal clock or the bus clock (yeah, "architectural"). -	 */ -	PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS, -	NR_INTEL_ARCH_EVENTS, -}; +/* + * Perf's "BASE" is wildly misleading, architectural PMUs use bits 31:16 of ECX + * to encode the "type" of counter to read, i.e. this is not a "base".  And to + * further confuse things, non-architectural PMUs use bit 31 as a flag for + * "fast" reads, whereas the "type" is an explicit value. + */ +#define INTEL_RDPMC_GP		0 +#define INTEL_RDPMC_FIXED	INTEL_PMC_FIXED_RDPMC_BASE -static struct { -	u8 eventsel; -	u8 unit_mask; -} const intel_arch_events[] = { -	[INTEL_ARCH_CPU_CYCLES]			= { 0x3c, 0x00 }, -	[INTEL_ARCH_INSTRUCTIONS_RETIRED]	= { 0xc0, 0x00 }, -	[INTEL_ARCH_REFERENCE_CYCLES]		= { 0x3c, 0x01 }, -	[INTEL_ARCH_LLC_REFERENCES]		= { 0x2e, 0x4f }, -	[INTEL_ARCH_LLC_MISSES]			= { 0x2e, 0x41 }, -	[INTEL_ARCH_BRANCHES_RETIRED]		= { 0xc4, 0x00 }, -	[INTEL_ARCH_BRANCHES_MISPREDICTED]	= { 0xc5, 0x00 }, -	[PSEUDO_ARCH_REFERENCE_CYCLES]		= { 0x00, 0x03 }, -}; +#define INTEL_RDPMC_TYPE_MASK	GENMASK(31, 16) +#define INTEL_RDPMC_INDEX_MASK	GENMASK(15, 0) -/* mapping between fixed pmc index and intel_arch_events array */ -static int fixed_pmc_events[] = { -	[0] = INTEL_ARCH_INSTRUCTIONS_RETIRED, -	[1] = INTEL_ARCH_CPU_CYCLES, -	[2] = PSEUDO_ARCH_REFERENCE_CYCLES, -}; +#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)  static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)  { @@ -84,77 +50,61 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)  		pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); -		__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); +		__set_bit(KVM_FIXED_PMC_BASE_IDX + i, pmu->pmc_in_use);  		kvm_pmu_request_counter_reprogram(pmc);  	}  } -static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) -{ -	if (pmc_idx < INTEL_PMC_IDX_FIXED) { -		return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx, -				  MSR_P6_EVNTSEL0); -	} else { -		u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED; - -		return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0); -	} -} - -static bool intel_hw_event_available(struct kvm_pmc *pmc) -{ -	struct kvm_pmu *pmu = pmc_to_pmu(pmc); -	u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; -	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; -	int i; - -	BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS); - -	/* -	 * Disallow events reported as unavailable in guest CPUID.  Note, this -	 * doesn't apply to pseudo-architectural events. -	 */ -	for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) { -		if (intel_arch_events[i].eventsel != event_select || -		    intel_arch_events[i].unit_mask != unit_mask) -			continue; - -		return pmu->available_event_types & BIT(i); -	} - -	return true; -} - -static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) -{ -	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); -	bool fixed = idx & (1u << 30); - -	idx &= ~(3u << 30); - -	return fixed ? idx < pmu->nr_arch_fixed_counters -		     : idx < pmu->nr_arch_gp_counters; -} -  static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,  					    unsigned int idx, u64 *mask)  { +	unsigned int type = idx & INTEL_RDPMC_TYPE_MASK;  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); -	bool fixed = idx & (1u << 30);  	struct kvm_pmc *counters;  	unsigned int num_counters; +	u64 bitmask; -	idx &= ~(3u << 30); -	if (fixed) { +	/* +	 * The encoding of ECX for RDPMC is different for architectural versus +	 * non-architecturals PMUs (PMUs with version '0').  For architectural +	 * PMUs, bits 31:16 specify the PMC type and bits 15:0 specify the PMC +	 * index.  For non-architectural PMUs, bit 31 is a "fast" flag, and +	 * bits 30:0 specify the PMC index. +	 * +	 * Yell and reject attempts to read PMCs for a non-architectural PMU, +	 * as KVM doesn't support such PMUs. +	 */ +	if (WARN_ON_ONCE(!pmu->version)) +		return NULL; + +	/* +	 * General Purpose (GP) PMCs are supported on all PMUs, and fixed PMCs +	 * are supported on all architectural PMUs, i.e. on all virtual PMUs +	 * supported by KVM.  Note, KVM only emulates fixed PMCs for PMU v2+, +	 * but the type itself is still valid, i.e. let RDPMC fail due to +	 * accessing a non-existent counter.  Reject attempts to read all other +	 * types, which are unknown/unsupported. +	 */ +	switch (type) { +	case INTEL_RDPMC_FIXED:  		counters = pmu->fixed_counters;  		num_counters = pmu->nr_arch_fixed_counters; -	} else { +		bitmask = pmu->counter_bitmask[KVM_PMC_FIXED]; +		break; +	case INTEL_RDPMC_GP:  		counters = pmu->gp_counters;  		num_counters = pmu->nr_arch_gp_counters; +		bitmask = pmu->counter_bitmask[KVM_PMC_GP]; +		break; +	default: +		return NULL;  	} + +	idx &= INTEL_RDPMC_INDEX_MASK;  	if (idx >= num_counters)  		return NULL; -	*mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; + +	*mask &= bitmask;  	return &counters[array_index_nospec(idx, num_counters)];  } @@ -464,20 +414,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  	return 0;  } -static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu) +/* + * Map fixed counter events to architectural general purpose event encodings. + * Perf doesn't provide APIs to allow KVM to directly program a fixed counter, + * and so KVM instead programs the architectural event to effectively request + * the fixed counter.  Perf isn't guaranteed to use a fixed counter and may + * instead program the encoding into a general purpose counter, e.g. if a + * different perf_event is already utilizing the requested counter, but the end + * result is the same (ignoring the fact that using a general purpose counter + * will likely exacerbate counter contention). + * + * Forcibly inlined to allow asserting on @index at build time, and there should + * never be more than one user. + */ +static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index)  { -	int i; - -	BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED); +	const enum perf_hw_id fixed_pmc_perf_ids[] = { +		[0] = PERF_COUNT_HW_INSTRUCTIONS, +		[1] = PERF_COUNT_HW_CPU_CYCLES, +		[2] = PERF_COUNT_HW_REF_CPU_CYCLES, +	}; +	u64 eventsel; -	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { -		int index = array_index_nospec(i, KVM_PMC_MAX_FIXED); -		struct kvm_pmc *pmc = &pmu->fixed_counters[index]; -		u32 event = fixed_pmc_events[index]; +	BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_PMC_MAX_FIXED); +	BUILD_BUG_ON(index >= KVM_PMC_MAX_FIXED); -		pmc->eventsel = (intel_arch_events[event].unit_mask << 8) | -				 intel_arch_events[event].eventsel; -	} +	/* +	 * Yell if perf reports support for a fixed counter but perf doesn't +	 * have a known encoding for the associated general purpose event. +	 */ +	eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]); +	WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed); +	return eventsel;  }  static void intel_pmu_refresh(struct kvm_vcpu *vcpu) @@ -491,19 +459,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)  	u64 counter_mask;  	int i; -	pmu->nr_arch_gp_counters = 0; -	pmu->nr_arch_fixed_counters = 0; -	pmu->counter_bitmask[KVM_PMC_GP] = 0; -	pmu->counter_bitmask[KVM_PMC_FIXED] = 0; -	pmu->version = 0; -	pmu->reserved_bits = 0xffffffff00200000ull; -	pmu->raw_event_mask = X86_RAW_EVENT_MASK; -	pmu->global_ctrl_mask = ~0ull; -	pmu->global_status_mask = ~0ull; -	pmu->fixed_ctr_ctrl_mask = ~0ull; -	pmu->pebs_enable_mask = ~0ull; -	pmu->pebs_data_cfg_mask = ~0ull; -  	memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));  	/* @@ -515,8 +470,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)  		return;  	entry = kvm_find_cpuid_entry(vcpu, 0xa); -	if (!entry || !vcpu->kvm->arch.enable_pmu) +	if (!entry)  		return; +  	eax.full = entry->eax;  	edx.full = entry->edx; @@ -543,13 +499,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)  						  kvm_pmu_cap.bit_width_fixed);  		pmu->counter_bitmask[KVM_PMC_FIXED] =  			((u64)1 << edx.split.bit_width_fixed) - 1; -		setup_fixed_pmc_eventsel(pmu);  	}  	for (i = 0; i < pmu->nr_arch_fixed_counters; i++)  		pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));  	counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) | -		(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED)); +		(((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX));  	pmu->global_ctrl_mask = counter_mask;  	/* @@ -593,7 +548,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)  			pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE;  			for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {  				pmu->fixed_ctr_ctrl_mask &= -					~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4)); +					~(1ULL << (KVM_FIXED_PMC_BASE_IDX + i * 4));  			}  			pmu->pebs_data_cfg_mask = ~0xff00000full;  		} else { @@ -619,8 +574,9 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)  	for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {  		pmu->fixed_counters[i].type = KVM_PMC_FIXED;  		pmu->fixed_counters[i].vcpu = vcpu; -		pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; +		pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX;  		pmu->fixed_counters[i].current_config = 0; +		pmu->fixed_counters[i].eventsel = intel_get_fixed_pmc_eventsel(i);  	}  	lbr_desc->records.nr = 0; @@ -748,11 +704,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)  	struct kvm_pmc *pmc = NULL;  	int bit, hw_idx; -	for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl, -			 X86_PMC_IDX_MAX) { -		pmc = intel_pmc_idx_to_pmc(pmu, bit); - -		if (!pmc || !pmc_speculative_in_use(pmc) || +	kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) { +		if (!pmc_speculative_in_use(pmc) ||  		    !pmc_is_globally_enabled(pmc) || !pmc->perf_event)  			continue; @@ -767,11 +720,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)  }  struct kvm_pmu_ops intel_pmu_ops __initdata = { -	.hw_event_available = intel_hw_event_available, -	.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,  	.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,  	.msr_idx_to_pmc = intel_msr_idx_to_pmc, -	.is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,  	.is_valid_msr = intel_is_valid_msr,  	.get_msr = intel_pmu_get_msr,  	.set_msr = intel_pmu_set_msr,  |