diff options
Diffstat (limited to 'tools/lib/perf')
-rw-r--r-- | tools/lib/perf/cpumap.c | 125 | ||||
-rw-r--r-- | tools/lib/perf/evlist.c | 25 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/evsel.h | 15 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/cpumap.h | 19 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/event.h | 3 |
5 files changed, 154 insertions, 33 deletions
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 1229b18bcdb1..2a5a29217374 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -99,6 +99,11 @@ static int cmp_cpu(const void *a, const void *b) return cpu_a->cpu - cpu_b->cpu; } +static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +{ + return RC_CHK_ACCESS(cpus)->map[idx]; +} + static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus) { size_t payload_size = nr_cpus * sizeof(struct perf_cpu); @@ -111,8 +116,12 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu /* Remove dups */ j = 0; for (i = 0; i < nr_cpus; i++) { - if (i == 0 || RC_CHK_ACCESS(cpus)->map[i].cpu != RC_CHK_ACCESS(cpus)->map[i - 1].cpu) - RC_CHK_ACCESS(cpus)->map[j++].cpu = RC_CHK_ACCESS(cpus)->map[i].cpu; + if (i == 0 || + __perf_cpu_map__cpu(cpus, i).cpu != + __perf_cpu_map__cpu(cpus, i - 1).cpu) { + RC_CHK_ACCESS(cpus)->map[j++].cpu = + __perf_cpu_map__cpu(cpus, i).cpu; + } } perf_cpu_map__set_nr(cpus, j); assert(j <= nr_cpus); @@ -269,26 +278,31 @@ out: return cpus; } +static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus) +{ + return RC_CHK_ACCESS(cpus)->nr; +} + struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { struct perf_cpu result = { .cpu = -1 }; - if (cpus && idx < RC_CHK_ACCESS(cpus)->nr) - return RC_CHK_ACCESS(cpus)->map[idx]; + if (cpus && idx < __perf_cpu_map__nr(cpus)) + return __perf_cpu_map__cpu(cpus, idx); return result; } int perf_cpu_map__nr(const struct perf_cpu_map *cpus) { - return cpus ? RC_CHK_ACCESS(cpus)->nr : 1; + return cpus ? __perf_cpu_map__nr(cpus) : 1; } bool perf_cpu_map__empty(const struct perf_cpu_map *map) { - return map ? RC_CHK_ACCESS(map)->map[0].cpu == -1 : true; + return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true; } int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) @@ -299,10 +313,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) return -1; low = 0; - high = RC_CHK_ACCESS(cpus)->nr; + high = __perf_cpu_map__nr(cpus); while (low < high) { int idx = (low + high) / 2; - struct perf_cpu cpu_at_idx = RC_CHK_ACCESS(cpus)->map[idx]; + struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx); if (cpu_at_idx.cpu == cpu.cpu) return idx; @@ -321,6 +335,32 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) return perf_cpu_map__idx(cpus, cpu) != -1; } +bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs) +{ + int nr; + + if (lhs == rhs) + return true; + + if (!lhs || !rhs) + return false; + + nr = __perf_cpu_map__nr(lhs); + if (nr != __perf_cpu_map__nr(rhs)) + return false; + + for (int idx = 0; idx < nr; idx++) { + if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu) + return false; + } + return true; +} + +bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map) +{ + return map && __perf_cpu_map__cpu(map, 0).cpu == -1; +} + struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map) { struct perf_cpu result = { @@ -328,7 +368,9 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map) }; // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return RC_CHK_ACCESS(map)->nr > 0 ? RC_CHK_ACCESS(map)->map[RC_CHK_ACCESS(map)->nr - 1] : result; + return __perf_cpu_map__nr(map) > 0 + ? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1) + : result; } /** Is 'b' a subset of 'a'. */ @@ -336,15 +378,15 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu { if (a == b || !b) return true; - if (!a || RC_CHK_ACCESS(b)->nr > RC_CHK_ACCESS(a)->nr) + if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a)) return false; - for (int i = 0, j = 0; i < RC_CHK_ACCESS(a)->nr; i++) { - if (RC_CHK_ACCESS(a)->map[i].cpu > RC_CHK_ACCESS(b)->map[j].cpu) + for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) { + if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu) return false; - if (RC_CHK_ACCESS(a)->map[i].cpu == RC_CHK_ACCESS(b)->map[j].cpu) { + if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) { j++; - if (j == RC_CHK_ACCESS(b)->nr) + if (j == __perf_cpu_map__nr(b)) return true; } } @@ -374,27 +416,27 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, return perf_cpu_map__get(other); } - tmp_len = RC_CHK_ACCESS(orig)->nr + RC_CHK_ACCESS(other)->nr; + tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other); tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); if (!tmp_cpus) return NULL; /* Standard merge algorithm from wikipedia */ i = j = k = 0; - while (i < RC_CHK_ACCESS(orig)->nr && j < RC_CHK_ACCESS(other)->nr) { - if (RC_CHK_ACCESS(orig)->map[i].cpu <= RC_CHK_ACCESS(other)->map[j].cpu) { - if (RC_CHK_ACCESS(orig)->map[i].cpu == RC_CHK_ACCESS(other)->map[j].cpu) + while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) { + if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu) j++; - tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++]; + tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++); } else - tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++]; + tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++); } - while (i < RC_CHK_ACCESS(orig)->nr) - tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++]; + while (i < __perf_cpu_map__nr(orig)) + tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++); - while (j < RC_CHK_ACCESS(other)->nr) - tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++]; + while (j < __perf_cpu_map__nr(other)) + tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++); assert(k <= tmp_len); merged = cpu_map__trim_new(k, tmp_cpus); @@ -402,3 +444,38 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, perf_cpu_map__put(orig); return merged; } + +struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other) +{ + struct perf_cpu *tmp_cpus; + int tmp_len; + int i, j, k; + struct perf_cpu_map *merged = NULL; + + if (perf_cpu_map__is_subset(other, orig)) + return perf_cpu_map__get(orig); + if (perf_cpu_map__is_subset(orig, other)) + return perf_cpu_map__get(other); + + tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other)); + tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); + if (!tmp_cpus) + return NULL; + + i = j = k = 0; + while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) + i++; + else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu) + j++; + else { + j++; + tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++); + } + } + if (k) + merged = cpu_map__trim_new(k, tmp_cpus); + free(tmp_cpus); + return merged; +} diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 81e8b5fcd8ba..b8b066d0dc5e 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -36,18 +36,33 @@ void perf_evlist__init(struct perf_evlist *evlist) static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, struct perf_evsel *evsel) { - /* - * We already have cpus for evsel (via PMU sysfs) so - * keep it, if there's no target cpu list defined. - */ if (evsel->system_wide) { + /* System wide: set the cpu map of the evsel to all online CPUs. */ perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__new(NULL); + } else if (evlist->has_user_cpus && evsel->is_pmu_core) { + /* + * User requested CPUs on a core PMU, ensure the requested CPUs + * are valid by intersecting with those of the PMU. + */ + perf_cpu_map__put(evsel->cpus); + evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus); } else if (!evsel->own_cpus || evlist->has_user_cpus || - (!evsel->requires_cpu && perf_cpu_map__empty(evlist->user_requested_cpus))) { + (!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) { + /* + * The PMU didn't specify a default cpu map, this isn't a core + * event and the user requested CPUs or the evlist user + * requested CPUs have the "any CPU" (aka dummy) CPU value. In + * which case use the user requested CPUs rather than the PMU + * ones. + */ perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } else if (evsel->cpus != evsel->own_cpus) { + /* + * No user requested cpu map but the PMU cpu map doesn't match + * the evsel's. Reset it back to the PMU cpu map. + */ perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evsel->own_cpus); } diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index a99a75d9e78f..5cd220a61962 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -41,7 +41,14 @@ struct perf_sample_id { struct perf_evsel { struct list_head node; struct perf_event_attr attr; + /** The commonly used cpu map of CPUs the event should be opened upon, etc. */ struct perf_cpu_map *cpus; + /** + * The cpu map read from the PMU. For core PMUs this is the list of all + * CPUs the event can be opened upon. For other PMUs this is the default + * cpu map for opening the event on, for example, the first CPU on a + * socket for an uncore event. + */ struct perf_cpu_map *own_cpus; struct perf_thread_map *threads; struct xyarray *fd; @@ -55,9 +62,9 @@ struct perf_evsel { int nr_members; /* * system_wide is for events that need to be on every CPU, irrespective - * of user requested CPUs or threads. Map propagation will set cpus to - * this event's own_cpus, whereby they will contribute to evlist - * all_cpus. + * of user requested CPUs or threads. Tha main example of this is the + * dummy event. Map propagation will set cpus for this event to all CPUs + * as software PMU events like dummy, have a CPU map that is empty. */ bool system_wide; /* @@ -65,6 +72,8 @@ struct perf_evsel { * i.e. it cannot be the 'any CPU' value of -1. */ bool requires_cpu; + /** Is the PMU for the event a core one? Effects the handling of own_cpus. */ + bool is_pmu_core; int idx; }; diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 3f43f770cdac..e38d859a384d 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -11,8 +11,16 @@ struct perf_cpu { int cpu; }; +struct perf_cache { + int cache_lvl; + int cache; +}; + struct perf_cpu_map; +/** + * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value. + */ LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); @@ -20,12 +28,23 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); +LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); +/** + * perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value. + */ LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); +LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, + const struct perf_cpu_map *rhs); +/** + * perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value? + */ +LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 51b9338f4c11..ba2dcf64f4e6 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -380,7 +380,8 @@ enum { PERF_STAT_CONFIG_TERM__AGGR_MODE = 0, PERF_STAT_CONFIG_TERM__INTERVAL = 1, PERF_STAT_CONFIG_TERM__SCALE = 2, - PERF_STAT_CONFIG_TERM__MAX = 3, + PERF_STAT_CONFIG_TERM__AGGR_LEVEL = 3, + PERF_STAT_CONFIG_TERM__MAX = 4, }; struct perf_record_stat_config_entry { |