diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 118 | 
1 files changed, 55 insertions, 63 deletions
| diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 219d3fb423a1..a1e35c9f06b9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs   [ C(LL  ) ] = {  	[ C(OP_READ) ] = {  		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, -		[ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS, +		[ C(RESULT_MISS)   ] = 0,  	},  	[ C(OP_WRITE) ] = {  		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, @@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids  	[ C(OP_READ) ] = {  		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */  		[ C(RESULT_ACCESS) ] = 0x01b7, -		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ -		[ C(RESULT_MISS)   ] = 0x01b7, +		[ C(RESULT_MISS)   ] = 0,  	},  	[ C(OP_WRITE) ] = {  		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ @@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids   [ C(ITLB) ] = {  	[ C(OP_READ) ] = {  		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ -		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */ +		[ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */  	},  	[ C(OP_WRITE) ] = {  		[ C(RESULT_ACCESS) ] = -1, @@ -1924,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)  	xl = &excl_cntrs->states[tid];  	xl->sched_started = true; -	xl->num_alloc_cntrs = 0;  	/*  	 * lock shared state until we are done scheduling  	 * in stop_event_scheduling() @@ -2001,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,  	 * across HT threads  	 */  	is_excl = c->flags & PERF_X86_EVENT_EXCL; +	if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { +		event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; +		if (!cpuc->n_excl++) +			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); +	}  	/*  	 * xl = state of current HT @@ -2009,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,  	xl = &excl_cntrs->states[tid];  	xlo = &excl_cntrs->states[o_tid]; -	/* -	 * do not allow scheduling of more than max_alloc_cntrs -	 * which is set to half the available generic counters. -	 * this helps avoid counter starvation of sibling thread -	 * by ensuring at most half the counters cannot be in -	 * exclusive mode. There is not designated counters for the -	 * limits. Any N/2 counters can be used. This helps with -	 * events with specifix counter constraints -	 */ -	if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs) -		return &emptyconstraint; -  	cx = c;  	/* @@ -2107,7 +2098,7 @@ static struct event_constraint *  intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,  			    struct perf_event *event)  { -	struct event_constraint *c1 = event->hw.constraint; +	struct event_constraint *c1 = cpuc->event_constraint[idx];  	struct event_constraint *c2;  	/* @@ -2151,6 +2142,11 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,  	xl = &excl_cntrs->states[tid];  	xlo = &excl_cntrs->states[o_tid]; +	if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { +		hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; +		if (!--cpuc->n_excl) +			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); +	}  	/*  	 * put_constraint may be called from x86_schedule_events() @@ -2189,8 +2185,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,  static void intel_put_event_constraints(struct cpu_hw_events *cpuc,  					struct perf_event *event)  { -	struct event_constraint *c = event->hw.constraint; -  	intel_put_shared_regs_event_constraints(cpuc, event);  	/* @@ -2198,19 +2192,14 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,  	 * all events are subject to and must call the  	 * put_excl_constraints() routine  	 */ -	if (c && cpuc->excl_cntrs) +	if (cpuc->excl_cntrs)  		intel_put_excl_constraints(cpuc, event); - -	/* cleanup dynamic constraint */ -	if (c && (c->flags & PERF_X86_EVENT_DYNAMIC)) -		event->hw.constraint = NULL;  } -static void intel_commit_scheduling(struct cpu_hw_events *cpuc, -				    struct perf_event *event, int cntr) +static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)  {  	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; -	struct event_constraint *c = event->hw.constraint; +	struct event_constraint *c = cpuc->event_constraint[idx];  	struct intel_excl_states *xlo, *xl;  	int tid = cpuc->excl_thread_id;  	int o_tid = 1 - tid; @@ -2533,34 +2522,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)  	return x86_event_sysfs_show(page, config, event);  } -static __initconst const struct x86_pmu core_pmu = { -	.name			= "core", -	.handle_irq		= x86_pmu_handle_irq, -	.disable_all		= x86_pmu_disable_all, -	.enable_all		= core_pmu_enable_all, -	.enable			= core_pmu_enable_event, -	.disable		= x86_pmu_disable_event, -	.hw_config		= x86_pmu_hw_config, -	.schedule_events	= x86_schedule_events, -	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0, -	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0, -	.event_map		= intel_pmu_event_map, -	.max_events		= ARRAY_SIZE(intel_perfmon_event_map), -	.apic			= 1, -	/* -	 * Intel PMCs cannot be accessed sanely above 32 bit width, -	 * so we install an artificial 1<<31 period regardless of -	 * the generic event period: -	 */ -	.max_period		= (1ULL << 31) - 1, -	.get_event_constraints	= intel_get_event_constraints, -	.put_event_constraints	= intel_put_event_constraints, -	.event_constraints	= intel_core_event_constraints, -	.guest_get_msrs		= core_guest_get_msrs, -	.format_attrs		= intel_arch_formats_attr, -	.events_sysfs_show	= intel_event_sysfs_show, -}; -  struct intel_shared_regs *allocate_shared_regs(int cpu)  {  	struct intel_shared_regs *regs; @@ -2668,8 +2629,6 @@ static void intel_pmu_cpu_starting(int cpu)  		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];  	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { -		int h = x86_pmu.num_counters >> 1; -  		for_each_cpu(i, topology_thread_cpumask(cpu)) {  			struct intel_excl_cntrs *c; @@ -2683,11 +2642,6 @@ static void intel_pmu_cpu_starting(int cpu)  		}  		cpuc->excl_cntrs->core_id = core_id;  		cpuc->excl_cntrs->refcnt++; -		/* -		 * set hard limit to half the number of generic counters -		 */ -		cpuc->excl_cntrs->states[0].max_alloc_cntrs = h; -		cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;  	}  } @@ -2743,6 +2697,44 @@ static struct attribute *intel_arch3_formats_attr[] = {  	NULL,  }; +static __initconst const struct x86_pmu core_pmu = { +	.name			= "core", +	.handle_irq		= x86_pmu_handle_irq, +	.disable_all		= x86_pmu_disable_all, +	.enable_all		= core_pmu_enable_all, +	.enable			= core_pmu_enable_event, +	.disable		= x86_pmu_disable_event, +	.hw_config		= x86_pmu_hw_config, +	.schedule_events	= x86_schedule_events, +	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0, +	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0, +	.event_map		= intel_pmu_event_map, +	.max_events		= ARRAY_SIZE(intel_perfmon_event_map), +	.apic			= 1, +	/* +	 * Intel PMCs cannot be accessed sanely above 32-bit width, +	 * so we install an artificial 1<<31 period regardless of +	 * the generic event period: +	 */ +	.max_period		= (1ULL<<31) - 1, +	.get_event_constraints	= intel_get_event_constraints, +	.put_event_constraints	= intel_put_event_constraints, +	.event_constraints	= intel_core_event_constraints, +	.guest_get_msrs		= core_guest_get_msrs, +	.format_attrs		= intel_arch_formats_attr, +	.events_sysfs_show	= intel_event_sysfs_show, + +	/* +	 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs +	 * together with PMU version 1 and thus be using core_pmu with +	 * shared_regs. We need following callbacks here to allocate +	 * it properly. +	 */ +	.cpu_prepare		= intel_pmu_cpu_prepare, +	.cpu_starting		= intel_pmu_cpu_starting, +	.cpu_dying		= intel_pmu_cpu_dying, +}; +  static __initconst const struct x86_pmu intel_pmu = {  	.name			= "Intel",  	.handle_irq		= intel_pmu_handle_irq, |