From d4ec840baecbed280c7305f9103a10641d4d3799 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:46 +0100 Subject: cacheinfo: Use of_cpu_device_node_get instead cpu_dev->of_node The of_cpu_device_node_get takes care of fetching the CPU'd device node either from cached cpu_dev->of_node if cpu_dev is initialised or uses of_get_cpu_node to parse and fetch node if cpu_dev isn't available yet. Just use of_cpu_device_node_get instead of getting the cpu device first and then using cpu_dev->of_node for two reasons: 1. There is no other use of cpu_dev and can be simplified 2. It enabled the use detect_cache_attributes and hence cache_setup_of_node much earlier before the CPUs are registered as devices. Link: https://lore.kernel.org/r/20220704101605.1318280-3-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/cacheinfo.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index dad296229161..b0bde272e2ae 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -157,7 +157,6 @@ static int cache_setup_of_node(unsigned int cpu) { struct device_node *np; struct cacheinfo *this_leaf; - struct device *cpu_dev = get_cpu_device(cpu); struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); unsigned int index = 0; @@ -166,11 +165,7 @@ static int cache_setup_of_node(unsigned int cpu) return 0; } - if (!cpu_dev) { - pr_err("No cpu device for CPU %d\n", cpu); - return -ENODEV; - } - np = cpu_dev->of_node; + np = of_cpu_device_node_get(cpu); if (!np) { pr_err("Failed to find cpu%d device node\n", cpu); return -ENOENT; -- cgit From b14e8d21f726f4ffeaf8833783eda68a1c152b15 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:47 +0100 Subject: cacheinfo: Add helper to access any cache index for a given CPU The cacheinfo for a given CPU at a given index is used at quite a few places by fetching the base point for index 0 using the helper per_cpu_cacheinfo(cpu) and offsetting it by the required index. Instead, add another helper to fetch the required pointer directly and use it to simplify and improve readability. Link: https://lore.kernel.org/r/20220704101605.1318280-4-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/cacheinfo.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index b0bde272e2ae..e13ef41763e4 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -25,6 +25,8 @@ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo); #define ci_cacheinfo(cpu) (&per_cpu(ci_cpu_cacheinfo, cpu)) #define cache_leaves(cpu) (ci_cacheinfo(cpu)->num_leaves) #define per_cpu_cacheinfo(cpu) (ci_cacheinfo(cpu)->info_list) +#define per_cpu_cacheinfo_idx(cpu, idx) \ + (per_cpu_cacheinfo(cpu) + (idx)) struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) { @@ -172,7 +174,7 @@ static int cache_setup_of_node(unsigned int cpu) } while (index < cache_leaves(cpu)) { - this_leaf = this_cpu_ci->info_list + index; + this_leaf = per_cpu_cacheinfo_idx(cpu, index); if (this_leaf->level != 1) np = of_find_next_cache_node(np); else @@ -231,7 +233,7 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) for (index = 0; index < cache_leaves(cpu); index++) { unsigned int i; - this_leaf = this_cpu_ci->info_list + index; + this_leaf = per_cpu_cacheinfo_idx(cpu, index); /* skip if shared_cpu_map is already populated */ if (!cpumask_empty(&this_leaf->shared_cpu_map)) continue; @@ -242,7 +244,7 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (i == cpu || !sib_cpu_ci->info_list) continue;/* skip if itself or no cacheinfo */ - sib_leaf = sib_cpu_ci->info_list + index; + sib_leaf = per_cpu_cacheinfo_idx(i, index); if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_set_cpu(i, &this_leaf->shared_cpu_map); @@ -258,12 +260,11 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) static void cache_shared_cpu_map_remove(unsigned int cpu) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf, *sib_leaf; unsigned int sibling, index; for (index = 0; index < cache_leaves(cpu); index++) { - this_leaf = this_cpu_ci->info_list + index; + this_leaf = per_cpu_cacheinfo_idx(cpu, index); for_each_cpu(sibling, &this_leaf->shared_cpu_map) { struct cpu_cacheinfo *sib_cpu_ci; @@ -274,7 +275,7 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) if (!sib_cpu_ci->info_list) continue; - sib_leaf = sib_cpu_ci->info_list + index; + sib_leaf = per_cpu_cacheinfo_idx(sibling, index); cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } @@ -609,7 +610,6 @@ static int cache_add_dev(unsigned int cpu) int rc; struct device *ci_dev, *parent; struct cacheinfo *this_leaf; - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); const struct attribute_group **cache_groups; rc = cpu_cache_sysfs_init(cpu); @@ -618,7 +618,7 @@ static int cache_add_dev(unsigned int cpu) parent = per_cpu_cache_dev(cpu); for (i = 0; i < cache_leaves(cpu); i++) { - this_leaf = this_cpu_ci->info_list + i; + this_leaf = per_cpu_cacheinfo_idx(cpu, i); if (this_leaf->disable_sysfs) continue; if (this_leaf->type == CACHE_TYPE_NOCACHE) -- cgit From 9447eb0f1575572218267180b4edff937b3aec57 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:48 +0100 Subject: cacheinfo: Move cache_leaves_are_shared out of CONFIG_OF cache_leaves_are_shared is already used even with ACPI and PPTT. It checks if the cache leaves are the shared based on fw_token pointer. However it is defined conditionally only if CONFIG_OF is enabled which is wrong. Move the function cache_leaves_are_shared out of CONFIG_OF and keep it generic. It also handles the case where both OF and ACPI is not defined. Link: https://lore.kernel.org/r/20220704101605.1318280-5-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/cacheinfo.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index e13ef41763e4..2cea9201f31c 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -33,13 +33,21 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) return ci_cacheinfo(cpu); } -#ifdef CONFIG_OF static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { + /* + * For non DT/ACPI systems, assume unique level 1 caches, + * system-wide shared caches for all other levels. This will be used + * only if arch specific code has not populated shared_cpu_map + */ + if (!(IS_ENABLED(CONFIG_OF) || IS_ENABLED(CONFIG_ACPI))) + return !(this_leaf->level == 1); + return sib_leaf->fw_token == this_leaf->fw_token; } +#ifdef CONFIG_OF /* OF properties to query for a given cache type */ struct cache_type_info { const char *size_prop; @@ -193,16 +201,6 @@ static int cache_setup_of_node(unsigned int cpu) } #else static inline int cache_setup_of_node(unsigned int cpu) { return 0; } -static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, - struct cacheinfo *sib_leaf) -{ - /* - * For non-DT/ACPI systems, assume unique level 1 caches, system-wide - * shared caches for all other levels. This will be used only if - * arch specific code has not populated shared_cpu_map - */ - return !(this_leaf->level == 1); -} #endif int __weak cache_setup_acpi(unsigned int cpu) -- cgit From cc1cfc47ea47187a21ec1f079b3c53264157fe15 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:49 +0100 Subject: cacheinfo: Add support to check if last level cache(LLC) is valid or shared It is useful to have helper to check if the given two CPUs share last level cache. We can do that check by comparing fw_token or by comparing the cache ID. Currently we check just for fw_token as the cache ID is optional. This helper can be used to build the llc_sibling during arch specific topology parsing and feeding information to the sched_domains. This also helps to get rid of llc_id in the CPU topology as it is sort of duplicate information. Also add helper to check if the llc information in cacheinfo is valid or not. Link: https://lore.kernel.org/r/20220704101605.1318280-6-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/cacheinfo.c | 26 ++++++++++++++++++++++++++ include/linux/cacheinfo.h | 2 ++ 2 files changed, 28 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 2cea9201f31c..fdc1baa342f1 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -47,6 +47,32 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, return sib_leaf->fw_token == this_leaf->fw_token; } +bool last_level_cache_is_valid(unsigned int cpu) +{ + struct cacheinfo *llc; + + if (!cache_leaves(cpu)) + return false; + + llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1); + + return !!llc->fw_token; +} + +bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y) +{ + struct cacheinfo *llc_x, *llc_y; + + if (!last_level_cache_is_valid(cpu_x) || + !last_level_cache_is_valid(cpu_y)) + return false; + + llc_x = per_cpu_cacheinfo_idx(cpu_x, cache_leaves(cpu_x) - 1); + llc_y = per_cpu_cacheinfo_idx(cpu_y, cache_leaves(cpu_y) - 1); + + return cache_leaves_are_shared(llc_x, llc_y); +} + #ifdef CONFIG_OF /* OF properties to query for a given cache type */ struct cache_type_info { diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 4ff37cb763ae..7e429bc5c1a4 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -82,6 +82,8 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); +bool last_level_cache_is_valid(unsigned int cpu); +bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); #ifndef CONFIG_ACPI_PPTT /* * acpi_find_last_cache_level is only called on ACPI enabled -- cgit From 36bbc5b4ffab33ccac0f4db27f619a6ba7a4fd32 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:50 +0100 Subject: cacheinfo: Allow early detection and population of cache attributes Some architecture/platforms may need to setup cache properties very early in the boot along with other cpu topologies so that all these information can be used to build sched_domains which is used by the scheduler. Allow detect_cache_attributes to be called quite early during the boot. Link: https://lore.kernel.org/r/20220704101605.1318280-7-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/cacheinfo.c | 55 +++++++++++++++++++++++++++++++---------------- include/linux/cacheinfo.h | 1 + 2 files changed, 38 insertions(+), 18 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index fdc1baa342f1..4d21a1022fa9 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -193,14 +193,8 @@ static int cache_setup_of_node(unsigned int cpu) { struct device_node *np; struct cacheinfo *this_leaf; - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); unsigned int index = 0; - /* skip if fw_token is already populated */ - if (this_cpu_ci->info_list->fw_token) { - return 0; - } - np = of_cpu_device_node_get(cpu); if (!np) { pr_err("Failed to find cpu%d device node\n", cpu); @@ -236,6 +230,18 @@ int __weak cache_setup_acpi(unsigned int cpu) unsigned int coherency_max_size; +static int cache_setup_properties(unsigned int cpu) +{ + int ret = 0; + + if (of_have_populated_dt()) + ret = cache_setup_of_node(cpu); + else if (!acpi_disabled) + ret = cache_setup_acpi(cpu); + + return ret; +} + static int cache_shared_cpu_map_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -246,21 +252,21 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (this_cpu_ci->cpu_map_populated) return 0; - if (of_have_populated_dt()) - ret = cache_setup_of_node(cpu); - else if (!acpi_disabled) - ret = cache_setup_acpi(cpu); - - if (ret) - return ret; + /* + * skip setting up cache properties if LLC is valid, just need + * to update the shared cpu_map if the cache attributes were + * populated early before all the cpus are brought online + */ + if (!last_level_cache_is_valid(cpu)) { + ret = cache_setup_properties(cpu); + if (ret) + return ret; + } for (index = 0; index < cache_leaves(cpu); index++) { unsigned int i; this_leaf = per_cpu_cacheinfo_idx(cpu, index); - /* skip if shared_cpu_map is already populated */ - if (!cpumask_empty(&this_leaf->shared_cpu_map)) - continue; cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); for_each_online_cpu(i) { @@ -330,17 +336,28 @@ int __weak populate_cache_leaves(unsigned int cpu) return -ENOENT; } -static int detect_cache_attributes(unsigned int cpu) +int detect_cache_attributes(unsigned int cpu) { int ret; + /* Since early detection of the cacheinfo is allowed via this + * function and this also gets called as CPU hotplug callbacks via + * cacheinfo_cpu_online, the initialisation can be skipped and only + * CPU maps can be updated as the CPU online status would be update + * if called via cacheinfo_cpu_online path. + */ + if (per_cpu_cacheinfo(cpu)) + goto update_cpu_map; + if (init_cache_level(cpu) || !cache_leaves(cpu)) return -ENOENT; per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_KERNEL); - if (per_cpu_cacheinfo(cpu) == NULL) + if (per_cpu_cacheinfo(cpu) == NULL) { + cache_leaves(cpu) = 0; return -ENOMEM; + } /* * populate_cache_leaves() may completely setup the cache leaves and @@ -349,6 +366,8 @@ static int detect_cache_attributes(unsigned int cpu) ret = populate_cache_leaves(cpu); if (ret) goto free_ci; + +update_cpu_map: /* * For systems using DT for cache hierarchy, fw_token * and shared_cpu_map will be set up here only if they are diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 7e429bc5c1a4..00b7a6ae8617 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -84,6 +84,7 @@ int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); bool last_level_cache_is_valid(unsigned int cpu); bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); +int detect_cache_attributes(unsigned int cpu); #ifndef CONFIG_ACPI_PPTT /* * acpi_find_last_cache_level is only called on ACPI enabled -- cgit From f16d1becf96f0a95dc9e1a5a7f97feeec2b149d5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:51 +0100 Subject: cacheinfo: Use cache identifiers to check if the caches are shared if available The cache identifiers is an optional property on most of the platforms. The presence of one must be indicated by the CACHE_ID valid bit in the attributes. We can use the cache identifiers provided by the firmware to check if any two cpus share the same cache instead of relying on the fw_token generated and set in the OS. Link: https://lore.kernel.org/r/20220704101605.1318280-8-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Signed-off-by: Sudeep Holla --- drivers/base/cacheinfo.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 4d21a1022fa9..e331b399adeb 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -44,6 +44,10 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, if (!(IS_ENABLED(CONFIG_OF) || IS_ENABLED(CONFIG_ACPI))) return !(this_leaf->level == 1); + if ((sib_leaf->attributes & CACHE_ID) && + (this_leaf->attributes & CACHE_ID)) + return sib_leaf->id == this_leaf->id; + return sib_leaf->fw_token == this_leaf->fw_token; } @@ -56,7 +60,8 @@ bool last_level_cache_is_valid(unsigned int cpu) llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1); - return !!llc->fw_token; + return (llc->attributes & CACHE_ID) || !!llc->fw_token; + } bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y) -- cgit From 521103134a0d07774c8b17f25ff0ef70cbd56c9d Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:52 +0100 Subject: cacheinfo: Align checks in cache_shared_cpu_map_{setup,remove} for readability The checks to skip the CPU itself or no cacheinfo case are implemented bit differently though the effect is exactly same. Just align the implementation in both cache_shared_cpu_map_{setup,remove} just for improved readability. No functional change. Link: https://lore.kernel.org/r/20220704101605.1318280-9-sudeep.holla@arm.com Tested-by: Conor Dooley Signed-off-by: Sudeep Holla --- drivers/base/cacheinfo.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index e331b399adeb..65d566ff24c4 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -279,6 +279,7 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (i == cpu || !sib_cpu_ci->info_list) continue;/* skip if itself or no cacheinfo */ + sib_leaf = per_cpu_cacheinfo_idx(i, index); if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); @@ -301,14 +302,11 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) for (index = 0; index < cache_leaves(cpu); index++) { this_leaf = per_cpu_cacheinfo_idx(cpu, index); for_each_cpu(sibling, &this_leaf->shared_cpu_map) { - struct cpu_cacheinfo *sib_cpu_ci; - - if (sibling == cpu) /* skip itself */ - continue; + struct cpu_cacheinfo *sib_cpu_ci = + get_cpu_cacheinfo(sibling); - sib_cpu_ci = get_cpu_cacheinfo(sibling); - if (!sib_cpu_ci->info_list) - continue; + if (sibling == cpu || !sib_cpu_ci->info_list) + continue;/* skip if itself or no cacheinfo */ sib_leaf = per_cpu_cacheinfo_idx(sibling, index); cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); -- cgit From 38db9b95464f82fed28794afe0214d9439d86f7c Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:53 +0100 Subject: arch_topology: Add support to parse and detect cache attributes Currently ACPI populates just the minimum information about the last level cache from PPTT in order to feed the same to build sched_domains. Similar support for DT platforms is not present. In order to enable the same, the entire cache hierarchy information can be built as part of CPU topoplogy parsing both on ACPI and DT platforms. Note that this change builds the cacheinfo early even on ACPI systems, but the current mechanism of building llc_sibling mask remains unchanged. Link: https://lore.kernel.org/r/20220704101605.1318280-10-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 579c851a2bd7..e2f7d9ea558e 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -780,15 +781,28 @@ __weak int __init parse_acpi_topology(void) #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) void __init init_cpu_topology(void) { + int ret, cpu; + reset_cpu_topology(); + ret = parse_acpi_topology(); + if (!ret) + ret = of_have_populated_dt() && parse_dt_topology(); - /* - * Discard anything that was parsed if we hit an error so we - * don't use partial information. - */ - if (parse_acpi_topology()) - reset_cpu_topology(); - else if (of_have_populated_dt() && parse_dt_topology()) + if (ret) { + /* + * Discard anything that was parsed if we hit an error so we + * don't use partial information. + */ reset_cpu_topology(); + return; + } + + for_each_possible_cpu(cpu) { + ret = detect_cache_attributes(cpu); + if (ret) { + pr_info("Early cacheinfo failed, ret = %d\n", ret); + break; + } + } } #endif -- cgit From f027db2f9a09e76858d06828b9ff817272d64ccc Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:54 +0100 Subject: arch_topology: Use the last level cache information from the cacheinfo The cacheinfo is now initialised early along with the CPU topology initialisation. Instead of relying on the LLC ID information parsed separately only with ACPI PPTT elsewhere, migrate to use the similar information from the cacheinfo. This is generic for both DT and ACPI systems. The ACPI LLC ID information parsed separately can now be removed from arch specific code. Link: https://lore.kernel.org/r/20220704101605.1318280-11-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index e2f7d9ea558e..4f936c984fb6 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -668,7 +668,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu) /* not numa in package, lets use the package siblings */ core_mask = &cpu_topology[cpu].core_sibling; } - if (cpu_topology[cpu].llc_id != -1) { + + if (last_level_cache_is_valid(cpu)) { if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) core_mask = &cpu_topology[cpu].llc_sibling; } @@ -699,7 +700,7 @@ void update_siblings_masks(unsigned int cpuid) for_each_online_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) { + if (last_level_cache_is_shared(cpu, cpuid)) { cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); } -- cgit From 5b8dc787ce4a45c87254d1b0b22f161347ab7f81 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:56 +0100 Subject: arch_topology: Drop LLC identifier stash from the CPU topology Since the cacheinfo LLC information is used directly in arch_topology, there is no need to parse and store the LLC ID information only for ACPI systems in the CPU topology. Remove the redundant LLC ID from the generic CPU arch_topology information. Link: https://lore.kernel.org/r/20220704101605.1318280-13-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 1 - include/linux/arch_topology.h | 1 - 2 files changed, 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 4f936c984fb6..8206990c679f 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -752,7 +752,6 @@ void __init reset_cpu_topology(void) cpu_topo->core_id = -1; cpu_topo->cluster_id = -1; cpu_topo->package_id = -1; - cpu_topo->llc_id = -1; clear_cpu_topology(cpu); } diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 58cbe18d825c..a07b510e7dc5 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -68,7 +68,6 @@ struct cpu_topology { int core_id; int cluster_id; int package_id; - int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; -- cgit From 3f8283296b16c4e43fd79a5ac364ae8171fe1567 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:57 +0100 Subject: arch_topology: Set thread sibling cpumask only within the cluster Currently the cluster identifier is not set on the DT based platforms. The reset or default value is -1 for all the CPUs. Once we assign the cluster identifier values correctly that may result in getting the thread siblings wrong as the core identifiers can be same for 2 different CPUs belonging to 2 different cluster. So, in order to get the thread sibling cpumasks correct, we need to update them only if the cores they belong are in the same cluster within the socket. Let us skip updation of the thread sibling cpumaks if the cluster identifier doesn't match. This change won't affect even if the cluster identifiers are not set currently but will avoid any breakage once we set the same correctly. Link: https://lore.kernel.org/r/20220704101605.1318280-14-sudeep.holla@arm.com Tested-by: Gavin Shan Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 8206990c679f..6ab173caf1dc 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -708,15 +708,17 @@ void update_siblings_masks(unsigned int cpuid) if (cpuid_topo->package_id != cpu_topo->package_id) continue; - if (cpuid_topo->cluster_id == cpu_topo->cluster_id && - cpuid_topo->cluster_id != -1) { + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + continue; + + if (cpuid_topo->cluster_id != -1) { cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling); cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling); } - cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); - cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); - if (cpuid_topo->core_id != cpu_topo->core_id) continue; -- cgit From 9eb5e54f876dda1aae0aef10bdd61da4331509ba Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:58 +0100 Subject: arch_topology: Check for non-negative value rather than -1 for IDs validity Instead of just comparing the cpu topology IDs with -1 to check their validity, improve that by checking for a valid non-negative value. Link: https://lore.kernel.org/r/20220704101605.1318280-15-sudeep.holla@arm.com Suggested-by: Andy Shevchenko Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 6ab173caf1dc..c0b0ee64a79d 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -642,7 +642,7 @@ static int __init parse_dt_topology(void) * only mark cores described in the DT as possible. */ for_each_possible_cpu(cpu) - if (cpu_topology[cpu].package_id == -1) + if (cpu_topology[cpu].package_id < 0) ret = -EINVAL; out_map: @@ -714,7 +714,7 @@ void update_siblings_masks(unsigned int cpuid) if (cpuid_topo->cluster_id != cpu_topo->cluster_id) continue; - if (cpuid_topo->cluster_id != -1) { + if (cpuid_topo->cluster_id >= 0) { cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling); cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling); } -- cgit From 5a01bb8efb5177236498fc57b147cabd2b792613 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:59 +0100 Subject: arch_topology: Avoid parsing through all the CPUs once a outlier CPU is found There is no point in looping through all the CPU's physical package identifier to check if it is valid or not once a CPU which is outside the topology(i.e. outlier CPU) is found. Let us just break out of the loop early in such case. Link: https://lore.kernel.org/r/20220704101605.1318280-16-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index c0b0ee64a79d..8f6a964d2512 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -642,8 +642,10 @@ static int __init parse_dt_topology(void) * only mark cores described in the DT as possible. */ for_each_possible_cpu(cpu) - if (cpu_topology[cpu].package_id < 0) + if (cpu_topology[cpu].package_id < 0) { ret = -EINVAL; + break; + } out_map: of_node_put(map); -- cgit From 26a2b73a7b15a51ec1409648c9b43882f66fbacf Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:16:00 +0100 Subject: arch_topology: Don't set cluster identifier as physical package identifier Currently as we parse the CPU topology from /cpu-map node from the device tree, we assign generated cluster count as the physical package identifier for each CPU which is wrong. The device tree bindings for CPU topology supports sockets to infer the socket or physical package identifier for a given CPU. Since it is fairly new and not supported on most of the old and existing systems, we can assume all such systems have single socket/physical package. Fix the physical package identifier to 0 by removing the assignment of cluster identifier to the same. Link: https://lore.kernel.org/r/20220704101605.1318280-17-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Ionela Voinescu Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 8f6a964d2512..e384afb6cac7 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -549,7 +549,6 @@ static int __init parse_cluster(struct device_node *cluster, int depth) bool leaf = true; bool has_cores = false; struct device_node *c; - static int package_id __initdata; int core_id = 0; int i, ret; @@ -588,7 +587,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, package_id, core_id++); + ret = parse_core(c, 0, core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -605,9 +604,6 @@ static int __init parse_cluster(struct device_node *cluster, int depth) if (leaf && !has_cores) pr_warn("%pOF: empty cluster\n", cluster); - if (leaf) - package_id++; - return 0; } -- cgit From bfcc4397435dc0407099b9a805391abc05c2313b Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Mon, 4 Jul 2022 11:16:01 +0100 Subject: arch_topology: Limit span of cpu_clustergroup_mask() Currently the cluster identifier is not set on DT based platforms. The reset or default value is -1 for all the CPUs. Once we assign the cluster identifier values correctly, the cluster_sibling mask will be populated and returned by cpu_clustergroup_mask() to contribute in the creation of the CLS scheduling domain level, if SCHED_CLUSTER is enabled. To avoid topologies that will result in questionable or incorrect scheduling domains, impose restrictions regarding the span of clusters, as presented to scheduling domains building code: cluster_sibling should not span more or the same CPUs as cpu_coregroup_mask(). This is needed in order to obtain a strict separation between the MC and CLS levels, and maintain the same domains for existing platforms in the presence of CONFIG_SCHED_CLUSTER, where the new cluster information is redundant and irrelevant for the scheduler. While previously the scheduling domain builder code would have removed MC as redundant and kept CLS if SCHED_CLUSTER was enabled and the cpu_coregroup_mask() and cpu_clustergroup_mask() spanned the same CPUs, now CLS will be removed and MC kept. Link: https://lore.kernel.org/r/20220704101605.1318280-18-sudeep.holla@arm.com Cc: Darren Hart Tested-by: Conor Dooley Acked-by: Vincent Guittot Signed-off-by: Ionela Voinescu Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index e384afb6cac7..591c1f8e15e2 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -686,6 +686,14 @@ const struct cpumask *cpu_coregroup_mask(int cpu) const struct cpumask *cpu_clustergroup_mask(int cpu) { + /* + * Forbid cpu_clustergroup_mask() to span more or the same CPUs as + * cpu_coregroup_mask(). + */ + if (cpumask_subset(cpu_coregroup_mask(cpu), + &cpu_topology[cpu].cluster_sibling)) + return get_cpu_mask(cpu); + return &cpu_topology[cpu].cluster_sibling; } -- cgit From 556c9678a7d4456a677588ce308200a673b7eb1f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:16:02 +0100 Subject: arch_topology: Set cluster identifier in each core/thread from /cpu-map Let us set the cluster identifier as parsed from the device tree cluster nodes within /cpu-map. We don't support nesting of clusters yet as there are no real hardware to support clusters of clusters. Link: https://lore.kernel.org/r/20220704101605.1318280-19-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Ionela Voinescu Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 591c1f8e15e2..217a91fc1f59 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -497,7 +497,7 @@ static int __init get_cpu_for_node(struct device_node *node) } static int __init parse_core(struct device_node *core, int package_id, - int core_id) + int cluster_id, int core_id) { char name[20]; bool leaf = true; @@ -513,6 +513,7 @@ static int __init parse_core(struct device_node *core, int package_id, cpu = get_cpu_for_node(t); if (cpu >= 0) { cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].cluster_id = cluster_id; cpu_topology[cpu].core_id = core_id; cpu_topology[cpu].thread_id = i; } else if (cpu != -ENODEV) { @@ -534,6 +535,7 @@ static int __init parse_core(struct device_node *core, int package_id, } cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].cluster_id = cluster_id; cpu_topology[cpu].core_id = core_id; } else if (leaf && cpu != -ENODEV) { pr_err("%pOF: Can't get CPU for leaf core\n", core); @@ -543,7 +545,8 @@ static int __init parse_core(struct device_node *core, int package_id, return 0; } -static int __init parse_cluster(struct device_node *cluster, int depth) +static int __init +parse_cluster(struct device_node *cluster, int cluster_id, int depth) { char name[20]; bool leaf = true; @@ -563,7 +566,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) c = of_get_child_by_name(cluster, name); if (c) { leaf = false; - ret = parse_cluster(c, depth + 1); + ret = parse_cluster(c, i, depth + 1); of_node_put(c); if (ret != 0) return ret; @@ -587,7 +590,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, 0, core_id++); + ret = parse_core(c, 0, cluster_id, core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -627,7 +630,7 @@ static int __init parse_dt_topology(void) if (!map) goto out; - ret = parse_cluster(map, 0); + ret = parse_cluster(map, -1, 0); if (ret != 0) goto out_map; -- cgit From dea8c0b40fb500be29f4649cf01202e42a8a54f8 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:16:03 +0100 Subject: arch_topology: Add support for parsing sockets in /cpu-map Finally let us add support for socket nodes in /cpu-map in the device tree. Since this may not be present in all the old platforms and even most of the existing platforms, we need to assume absence of the socket node indicates that it is a single socket system and handle appropriately. Also it is likely that most single socket systems skip to as the node since it is optional. Link: https://lore.kernel.org/r/20220704101605.1318280-20-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Ionela Voinescu Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 217a91fc1f59..8719c4458df9 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -545,8 +545,8 @@ static int __init parse_core(struct device_node *core, int package_id, return 0; } -static int __init -parse_cluster(struct device_node *cluster, int cluster_id, int depth) +static int __init parse_cluster(struct device_node *cluster, int package_id, + int cluster_id, int depth) { char name[20]; bool leaf = true; @@ -566,7 +566,7 @@ parse_cluster(struct device_node *cluster, int cluster_id, int depth) c = of_get_child_by_name(cluster, name); if (c) { leaf = false; - ret = parse_cluster(c, i, depth + 1); + ret = parse_cluster(c, package_id, i, depth + 1); of_node_put(c); if (ret != 0) return ret; @@ -590,7 +590,8 @@ parse_cluster(struct device_node *cluster, int cluster_id, int depth) } if (leaf) { - ret = parse_core(c, 0, cluster_id, core_id++); + ret = parse_core(c, package_id, cluster_id, + core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -610,6 +611,32 @@ parse_cluster(struct device_node *cluster, int cluster_id, int depth) return 0; } +static int __init parse_socket(struct device_node *socket) +{ + char name[20]; + struct device_node *c; + bool has_socket = false; + int package_id = 0, ret; + + do { + snprintf(name, sizeof(name), "socket%d", package_id); + c = of_get_child_by_name(socket, name); + if (c) { + has_socket = true; + ret = parse_cluster(c, package_id, -1, 0); + of_node_put(c); + if (ret != 0) + return ret; + } + package_id++; + } while (c); + + if (!has_socket) + ret = parse_cluster(socket, 0, -1, 0); + + return ret; +} + static int __init parse_dt_topology(void) { struct device_node *cn, *map; @@ -630,7 +657,7 @@ static int __init parse_dt_topology(void) if (!map) goto out; - ret = parse_cluster(map, -1, 0); + ret = parse_socket(map); if (ret != 0) goto out_map; -- cgit From 00e66e37af0090f9ed95ca4bc3d8f5c6171daaf0 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:16:04 +0100 Subject: arch_topology: Warn that topology for nested clusters is not supported We don't support the topology for clusters of CPU clusters while the DT and ACPI bindings theoritcally support the same. Just warn about the same so that it is clear to the users of arch_topology that the nested clusters are not yet supported. Link: https://lore.kernel.org/r/20220704101605.1318280-21-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Signed-off-by: Sudeep Holla --- drivers/base/arch_topology.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 8719c4458df9..441e14ac33a4 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -567,6 +567,8 @@ static int __init parse_cluster(struct device_node *cluster, int package_id, if (c) { leaf = false; ret = parse_cluster(c, package_id, i, depth + 1); + if (depth > 0) + pr_warn("Topology for clusters of clusters not yet supported\n"); of_node_put(c); if (ret != 0) return ret; -- cgit