aboutsummaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
authorYicong Yang <yangyicong@hisilicon.com>2024-02-08 10:40:26 +0800
committerNamhyung Kim <namhyung@kernel.org>2024-02-09 14:59:53 -0800
commitcbc917a1b03bce85f385c1e640c9dcb02ffb9ab0 (patch)
tree85bf911a310bfd4c9e3804d1959afc8441ac3a8f /tools/perf/util
parent9a440bb2e2e9a4af3a7857af42a825f61b27a18c (diff)
perf stat: Support per-cluster aggregation
Some platforms have 'cluster' topology and CPUs in the cluster will share resources like L3 Cache Tag (for HiSilicon Kunpeng SoC) or L2 cache (for Intel Jacobsville). Currently parsing and building cluster topology have been supported since [1]. perf stat has already supported aggregation for other topologies like die or socket, etc. It'll be useful to aggregate per-cluster to find problems like L3T bandwidth contention. This patch add support for "--per-cluster" option for per-cluster aggregation. Also update the docs and related test. The output will be like: [root@localhost tmp]# perf stat -a -e LLC-load --per-cluster -- sleep 5 Performance counter stats for 'system wide': S56-D0-CLS158 4 1,321,521,570 LLC-load S56-D0-CLS594 4 794,211,453 LLC-load S56-D0-CLS1030 4 41,623 LLC-load S56-D0-CLS1466 4 41,646 LLC-load S56-D0-CLS1902 4 16,863 LLC-load S56-D0-CLS2338 4 15,721 LLC-load S56-D0-CLS2774 4 22,671 LLC-load [...] On a legacy system without cluster or cluster support, the output will be look like: [root@localhost perf]# perf stat -a -e cycles --per-cluster -- sleep 1 Performance counter stats for 'system wide': S56-D0-CLS0 64 18,011,485 cycles S7182-D0-CLS0 64 16,548,835 cycles Note that this patch doesn't mix the cluster information in the outputs of --per-core to avoid breaking any tools/scripts using it. Note that perf recently supports "--per-cache" aggregation, but it's not the same with the cluster although cluster CPUs may share some cache resources. For example on my machine all clusters within a die share the same L3 cache: $ cat /sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_list 0-31 $ cat /sys/devices/system/cpu/cpu0/topology/cluster_cpus_list 0-3 [1] commit c5e22feffdd7 ("topology: Represent clusters of CPUs within a die") Tested-by: Jie Zhan <zhanjie9@hisilicon.com> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Cc: james.clark@arm.com Cc: 21cnbao@gmail.com Cc: prime.zeng@hisilicon.com Cc: Jonathan.Cameron@huawei.com Cc: fanghao11@huawei.com Cc: linuxarm@huawei.com Cc: tim.c.chen@intel.com Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Namhyung Kim <namhyung@kernel.org> Link: https://lore.kernel.org/r/20240208024026.2691-1-yangyicong@huawei.com
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/cpumap.c33
-rw-r--r--tools/perf/util/cpumap.h19
-rw-r--r--tools/perf/util/env.h1
-rw-r--r--tools/perf/util/stat-display.c13
-rw-r--r--tools/perf/util/stat.h1
5 files changed, 62 insertions, 5 deletions
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0581ee0fa5f2..356e30c42cd8 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -222,6 +222,8 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
return a->socket - b->socket;
else if (a->die != b->die)
return a->die - b->die;
+ else if (a->cluster != b->cluster)
+ return a->cluster - b->cluster;
else if (a->cache_lvl != b->cache_lvl)
return a->cache_lvl - b->cache_lvl;
else if (a->cache != b->cache)
@@ -309,6 +311,30 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
return id;
}
+int cpu__get_cluster_id(struct perf_cpu cpu)
+{
+ int value, ret = cpu__get_topology_int(cpu.cpu, "cluster_id", &value);
+
+ return ret ?: value;
+}
+
+struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data)
+{
+ int cluster = cpu__get_cluster_id(cpu);
+ struct aggr_cpu_id id;
+
+ /* There is no cluster_id on legacy system. */
+ if (cluster == -1)
+ cluster = 0;
+
+ id = aggr_cpu_id__die(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
+ return id;
+
+ id.cluster = cluster;
+ return id;
+}
+
int cpu__get_core_id(struct perf_cpu cpu)
{
int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value);
@@ -320,8 +346,8 @@ struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data)
struct aggr_cpu_id id;
int core = cpu__get_core_id(cpu);
- /* aggr_cpu_id__die returns a struct with socket and die set. */
- id = aggr_cpu_id__die(cpu, data);
+ /* aggr_cpu_id__die returns a struct with socket die, and cluster set. */
+ id = aggr_cpu_id__cluster(cpu, data);
if (aggr_cpu_id__is_empty(&id))
return id;
@@ -683,6 +709,7 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b
a->node == b->node &&
a->socket == b->socket &&
a->die == b->die &&
+ a->cluster == b->cluster &&
a->cache_lvl == b->cache_lvl &&
a->cache == b->cache &&
a->core == b->core &&
@@ -695,6 +722,7 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
a->node == -1 &&
a->socket == -1 &&
a->die == -1 &&
+ a->cluster == -1 &&
a->cache_lvl == -1 &&
a->cache == -1 &&
a->core == -1 &&
@@ -708,6 +736,7 @@ struct aggr_cpu_id aggr_cpu_id__empty(void)
.node = -1,
.socket = -1,
.die = -1,
+ .cluster = -1,
.cache_lvl = -1,
.cache = -1,
.core = -1,
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 9df2aeb34d3d..26cf76c693f5 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -20,6 +20,8 @@ struct aggr_cpu_id {
int socket;
/** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */
int die;
+ /** The cluster id as read from /sys/devices/system/cpu/cpuX/topology/cluster_id */
+ int cluster;
/** The cache level as read from /sys/devices/system/cpu/cpuX/cache/indexY/level */
int cache_lvl;
/**
@@ -87,6 +89,11 @@ int cpu__get_socket_id(struct perf_cpu cpu);
*/
int cpu__get_die_id(struct perf_cpu cpu);
/**
+ * cpu__get_cluster_id - Returns the cluster id as read from
+ * /sys/devices/system/cpu/cpuX/topology/cluster_id for the given CPU
+ */
+int cpu__get_cluster_id(struct perf_cpu cpu);
+/**
* cpu__get_core_id - Returns the core id as read from
* /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU.
*/
@@ -127,9 +134,15 @@ struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data);
*/
struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data);
/**
- * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket
- * populated with the core, die and socket for cpu. The function signature is
- * compatible with aggr_cpu_id_get_t.
+ * aggr_cpu_id__cluster - Create an aggr_cpu_id with cluster, die and socket
+ * populated with the cluster, die and socket for cpu. The function signature
+ * is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the core, cluster, die and
+ * socket populated with the core, die and socket for cpu. The function
+ * signature is compatible with aggr_cpu_id_get_t.
*/
struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data);
/**
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 7c527e65c186..2a2c37cc40b7 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -12,6 +12,7 @@ struct perf_cpu_map;
struct cpu_topology_map {
int socket_id;
int die_id;
+ int cluster_id;
int core_id;
};
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 8c61f8627ebc..4dfe7d9517a9 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -201,6 +201,9 @@ static void print_aggr_id_std(struct perf_stat_config *config,
snprintf(buf, sizeof(buf), "S%d-D%d-L%d-ID%d",
id.socket, id.die, id.cache_lvl, id.cache);
break;
+ case AGGR_CLUSTER:
+ snprintf(buf, sizeof(buf), "S%d-D%d-CLS%d", id.socket, id.die, id.cluster);
+ break;
case AGGR_DIE:
snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die);
break;
@@ -251,6 +254,10 @@ static void print_aggr_id_csv(struct perf_stat_config *config,
fprintf(config->output, "S%d-D%d-L%d-ID%d%s%d%s",
id.socket, id.die, id.cache_lvl, id.cache, sep, aggr_nr, sep);
break;
+ case AGGR_CLUSTER:
+ fprintf(config->output, "S%d-D%d-CLS%d%s%d%s",
+ id.socket, id.die, id.cluster, sep, aggr_nr, sep);
+ break;
case AGGR_DIE:
fprintf(output, "S%d-D%d%s%d%s",
id.socket, id.die, sep, aggr_nr, sep);
@@ -300,6 +307,10 @@ static void print_aggr_id_json(struct perf_stat_config *config,
fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ",
id.socket, id.die, id.cache_lvl, id.cache, aggr_nr);
break;
+ case AGGR_CLUSTER:
+ fprintf(output, "\"cluster\" : \"S%d-D%d-CLS%d\", \"aggregate-number\" : %d, ",
+ id.socket, id.die, id.cluster, aggr_nr);
+ break;
case AGGR_DIE:
fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
id.socket, id.die, aggr_nr);
@@ -1248,6 +1259,7 @@ static void print_header_interval_std(struct perf_stat_config *config,
case AGGR_NODE:
case AGGR_SOCKET:
case AGGR_DIE:
+ case AGGR_CLUSTER:
case AGGR_CACHE:
case AGGR_CORE:
fprintf(output, "#%*s %-*s cpus",
@@ -1550,6 +1562,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
switch (config->aggr_mode) {
case AGGR_CORE:
case AGGR_CACHE:
+ case AGGR_CLUSTER:
case AGGR_DIE:
case AGGR_SOCKET:
case AGGR_NODE:
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 4357ba114822..d6e5c8787ba2 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -48,6 +48,7 @@ enum aggr_mode {
AGGR_GLOBAL,
AGGR_SOCKET,
AGGR_DIE,
+ AGGR_CLUSTER,
AGGR_CACHE,
AGGR_CORE,
AGGR_THREAD,