diff options
Diffstat (limited to 'arch/arc/kernel/perf_event.c')
| -rw-r--r-- | arch/arc/kernel/perf_event.c | 166 | 
1 files changed, 164 insertions, 2 deletions
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 145722f80c9b..adff957962da 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -17,6 +17,168 @@  /* HW holds 8 symbols + one for null terminator */  #define ARCPMU_EVENT_NAME_LEN	9 +/* + * Some ARC pct quirks: + * + * PERF_COUNT_HW_STALLED_CYCLES_BACKEND + * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND + *	The ARC 700 can either measure stalls per pipeline stage, or all stalls + *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND + *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to + *	STALLED_CYCLES_FRONTEND. + * + *	We could start multiple performance counters and combine everything + *	afterwards, but that makes it complicated. + * + *	Note that I$ cache misses aren't counted by either of the two! + */ + +/* + * ARC PCT has hardware conditions with fixed "names" but variable "indexes" + * (based on a specific RTL build) + * Below is the static map between perf generic/arc specific event_id and + * h/w condition names. + * At the time of probe, we loop thru each index and find it's name to + * complete the mapping of perf event_id to h/w index as latter is needed + * to program the counter really + */ +static const char * const arc_pmu_ev_hw_map[] = { +	/* count cycles */ +	[PERF_COUNT_HW_CPU_CYCLES] = "crun", +	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", +	[PERF_COUNT_HW_BUS_CYCLES] = "crun", + +	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", +	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", + +	/* counts condition */ +	[PERF_COUNT_HW_INSTRUCTIONS] = "iall", +	/* All jump instructions that are taken */ +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak", +#ifdef CONFIG_ISA_ARCV2 +	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp", +#else +	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */ +	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ +#endif +	[PERF_COUNT_ARC_LDC] = "imemrdc",	/* Instr: mem read cached */ +	[PERF_COUNT_ARC_STC] = "imemwrc",	/* Instr: mem write cached */ + +	[PERF_COUNT_ARC_DCLM] = "dclm",		/* D-cache Load Miss */ +	[PERF_COUNT_ARC_DCSM] = "dcsm",		/* D-cache Store Miss */ +	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */ +	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */ +	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */ + +	[PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc",	/* Instr: mem read cached */ +	[PERF_COUNT_HW_CACHE_MISSES] = "dclm",		/* D-cache Load Miss */ +}; + +#define C(_x)			PERF_COUNT_HW_CACHE_##_x +#define CACHE_OP_UNSUPPORTED	0xffff + +static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +	[C(L1D)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC, +			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_STC, +			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(L1I)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= PERF_COUNT_HW_INSTRUCTIONS, +			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(LL)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(DTLB)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC, +			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB, +		}, +			/* DTLB LD/ST Miss not segregated by h/w*/ +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(ITLB)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(BPU)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, +			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(NODE)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +}; +  enum arc_pmu_attr_groups {  	ARCPMU_ATTR_GR_EVENTS,  	ARCPMU_ATTR_GR_FORMATS, @@ -328,7 +490,7 @@ static void arc_pmu_stop(struct perf_event *event, int flags)  	}  	if (!(event->hw.state & PERF_HES_STOPPED)) { -		/* stop ARC pmu here */ +		/* stop hw counter here */  		write_aux_reg(ARC_REG_PCT_INDEX, idx);  		/* condition code #0 is always "never" */ @@ -361,7 +523,7 @@ static int arc_pmu_add(struct perf_event *event, int flags)  {  	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);  	struct hw_perf_event *hwc = &event->hw; -	int idx = hwc->idx; +	int idx;  	idx = ffz(pmu_cpu->used_mask[0]);  	if (idx == arc_pmu->n_counters)  |