aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/topology.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r--kernel/sched/topology.c160
1 files changed, 102 insertions, 58 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index dd7770226086..09d35044bd88 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -211,6 +211,15 @@ unsigned int sysctl_sched_energy_aware = 1;
DEFINE_MUTEX(sched_energy_mutex);
bool sched_energy_update;
+void rebuild_sched_domains_energy(void)
+{
+ mutex_lock(&sched_energy_mutex);
+ sched_energy_update = true;
+ rebuild_sched_domains();
+ sched_energy_update = false;
+ mutex_unlock(&sched_energy_mutex);
+}
+
#ifdef CONFIG_PROC_SYSCTL
int sched_energy_aware_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -223,13 +232,8 @@ int sched_energy_aware_handler(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write) {
state = static_branch_unlikely(&sched_energy_present);
- if (state != sysctl_sched_energy_aware) {
- mutex_lock(&sched_energy_mutex);
- sched_energy_update = 1;
- rebuild_sched_domains();
- sched_energy_update = 0;
- mutex_unlock(&sched_energy_mutex);
- }
+ if (state != sysctl_sched_energy_aware)
+ rebuild_sched_domains_energy();
}
return ret;
@@ -324,6 +328,7 @@ static void sched_energy_set(bool has_eas)
* 3. no SMT is detected.
* 4. the EM complexity is low enough to keep scheduling overheads low;
* 5. schedutil is driving the frequency of all CPUs of the rd;
+ * 6. frequency invariance support is present;
*
* The complexity of the Energy Model is defined as:
*
@@ -372,6 +377,14 @@ static bool build_perf_domains(const struct cpumask *cpu_map)
goto free;
}
+ if (!arch_scale_freq_invariant()) {
+ if (sched_debug()) {
+ pr_warn("rd %*pbl: Disabling EAS: frequency-invariant load tracking not yet supported",
+ cpumask_pr_args(cpu_map));
+ }
+ goto free;
+ }
+
for_each_cpu(i, cpu_map) {
/* Skip already covered CPUs. */
if (find_pd(pd, i))
@@ -516,6 +529,7 @@ static int init_rootdomain(struct root_domain *rd)
init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
#endif
+ rd->visit_gen = 0;
init_dl_bw(&rd->dl_bw);
if (cpudl_init(&rd->cpudl) != 0)
goto free_rto_mask;
@@ -674,6 +688,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
{
struct rq *rq = cpu_rq(cpu);
struct sched_domain *tmp;
+ int numa_distance = 0;
/* Remove the sched domains which do not contribute to scheduling. */
for (tmp = sd; tmp; ) {
@@ -705,6 +720,38 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
sd->child = NULL;
}
+ for (tmp = sd; tmp; tmp = tmp->parent)
+ numa_distance += !!(tmp->flags & SD_NUMA);
+
+ /*
+ * FIXME: Diameter >=3 is misrepresented.
+ *
+ * Smallest diameter=3 topology is:
+ *
+ * node 0 1 2 3
+ * 0: 10 20 30 40
+ * 1: 20 10 20 30
+ * 2: 30 20 10 20
+ * 3: 40 30 20 10
+ *
+ * 0 --- 1 --- 2 --- 3
+ *
+ * NUMA-3 0-3 N/A N/A 0-3
+ * groups: {0-2},{1-3} {1-3},{0-2}
+ *
+ * NUMA-2 0-2 0-3 0-3 1-3
+ * groups: {0-1},{1-3} {0-2},{2-3} {1-3},{0-1} {2-3},{0-2}
+ *
+ * NUMA-1 0-1 0-2 1-3 2-3
+ * groups: {0},{1} {1},{2},{0} {2},{3},{1} {3},{2}
+ *
+ * NUMA-0 0 1 2 3
+ *
+ * The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the
+ * group span isn't a subset of the domain span.
+ */
+ WARN_ONCE(numa_distance > 2, "Shortest NUMA path spans too many nodes\n");
+
sched_domain_debug(sd, cpu);
rq_attach_root(rq, rd);
@@ -1549,66 +1596,58 @@ static void init_numa_topology_type(void)
}
}
+
+#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS)
+
void sched_init_numa(void)
{
- int next_distance, curr_distance = node_distance(0, 0);
struct sched_domain_topology_level *tl;
- int level = 0;
- int i, j, k;
-
- sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
- if (!sched_domains_numa_distance)
- return;
-
- /* Includes NUMA identity node at level 0. */
- sched_domains_numa_distance[level++] = curr_distance;
- sched_domains_numa_levels = level;
+ unsigned long *distance_map;
+ int nr_levels = 0;
+ int i, j;
/*
* O(nr_nodes^2) deduplicating selection sort -- in order to find the
* unique distances in the node_distance() table.
- *
- * Assumes node_distance(0,j) includes all distances in
- * node_distance(i,j) in order to avoid cubic time.
*/
- next_distance = curr_distance;
+ distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL);
+ if (!distance_map)
+ return;
+
+ bitmap_zero(distance_map, NR_DISTANCE_VALUES);
for (i = 0; i < nr_node_ids; i++) {
for (j = 0; j < nr_node_ids; j++) {
- for (k = 0; k < nr_node_ids; k++) {
- int distance = node_distance(i, k);
-
- if (distance > curr_distance &&
- (distance < next_distance ||
- next_distance == curr_distance))
- next_distance = distance;
+ int distance = node_distance(i, j);
- /*
- * While not a strong assumption it would be nice to know
- * about cases where if node A is connected to B, B is not
- * equally connected to A.
- */
- if (sched_debug() && node_distance(k, i) != distance)
- sched_numa_warn("Node-distance not symmetric");
-
- if (sched_debug() && i && !find_numa_distance(distance))
- sched_numa_warn("Node-0 not representative");
+ if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) {
+ sched_numa_warn("Invalid distance value range");
+ return;
}
- if (next_distance != curr_distance) {
- sched_domains_numa_distance[level++] = next_distance;
- sched_domains_numa_levels = level;
- curr_distance = next_distance;
- } else break;
+
+ bitmap_set(distance_map, distance, 1);
}
+ }
+ /*
+ * We can now figure out how many unique distance values there are and
+ * allocate memory accordingly.
+ */
+ nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES);
- /*
- * In case of sched_debug() we verify the above assumption.
- */
- if (!sched_debug())
- break;
+ sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL);
+ if (!sched_domains_numa_distance) {
+ bitmap_free(distance_map);
+ return;
}
+ for (i = 0, j = 0; i < nr_levels; i++, j++) {
+ j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j);
+ sched_domains_numa_distance[i] = j;
+ }
+
+ bitmap_free(distance_map);
+
/*
- * 'level' contains the number of unique distances
+ * 'nr_levels' contains the number of unique distances
*
* The sched_domains_numa_distance[] array includes the actual distance
* numbers.
@@ -1617,15 +1656,15 @@ void sched_init_numa(void)
/*
* Here, we should temporarily reset sched_domains_numa_levels to 0.
* If it fails to allocate memory for array sched_domains_numa_masks[][],
- * the array will contain less then 'level' members. This could be
+ * the array will contain less then 'nr_levels' members. This could be
* dangerous when we use it to iterate array sched_domains_numa_masks[][]
* in other functions.
*
- * We reset it to 'level' at the end of this function.
+ * We reset it to 'nr_levels' at the end of this function.
*/
sched_domains_numa_levels = 0;
- sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
+ sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL);
if (!sched_domains_numa_masks)
return;
@@ -1633,7 +1672,7 @@ void sched_init_numa(void)
* Now for each level, construct a mask per node which contains all
* CPUs of nodes that are that many hops away from us.
*/
- for (i = 0; i < level; i++) {
+ for (i = 0; i < nr_levels; i++) {
sched_domains_numa_masks[i] =
kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
if (!sched_domains_numa_masks[i])
@@ -1641,12 +1680,17 @@ void sched_init_numa(void)
for (j = 0; j < nr_node_ids; j++) {
struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
+ int k;
+
if (!mask)
return;
sched_domains_numa_masks[i][j] = mask;
for_each_node(k) {
+ if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
+ sched_numa_warn("Node-distance not symmetric");
+
if (node_distance(j, k) > sched_domains_numa_distance[i])
continue;
@@ -1658,7 +1702,7 @@ void sched_init_numa(void)
/* Compute default topology size */
for (i = 0; sched_domain_topology[i].mask; i++);
- tl = kzalloc((i + level + 1) *
+ tl = kzalloc((i + nr_levels + 1) *
sizeof(struct sched_domain_topology_level), GFP_KERNEL);
if (!tl)
return;
@@ -1681,7 +1725,7 @@ void sched_init_numa(void)
/*
* .. and append 'j' levels of NUMA goodness.
*/
- for (j = 1; j < level; i++, j++) {
+ for (j = 1; j < nr_levels; i++, j++) {
tl[i] = (struct sched_domain_topology_level){
.mask = sd_numa_mask,
.sd_flags = cpu_numa_flags,
@@ -1693,8 +1737,8 @@ void sched_init_numa(void)
sched_domain_topology = tl;
- sched_domains_numa_levels = level;
- sched_max_numa_distance = sched_domains_numa_distance[level - 1];
+ sched_domains_numa_levels = nr_levels;
+ sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
init_numa_topology_type();
}