aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorJens Axboe <[email protected]>2018-12-18 08:29:53 -0700
committerJens Axboe <[email protected]>2018-12-18 08:29:53 -0700
commit4b9254328254bed12a4ac449cdff2c332e630837 (patch)
tree90ef63c168b0e63e6f07f8736f18faa8a544406f /kernel
parent1a9430db2835c0c00acc87d915b573496998c1bf (diff)
parentcd19181bf9ad4b7f40f2a4e0355d052109c76529 (diff)
Merge branch 'for-4.21/block' into for-4.21/aio
* for-4.21/block: (351 commits) blk-mq: enable IO poll if .nr_queues of type poll > 0 blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight() blk-mq: skip zero-queue maps in blk_mq_map_swqueue block: fix blk-iolatency accounting underflow blk-mq: fix dispatch from sw queue block: mq-deadline: Fix write completion handling nvme-pci: don't share queue maps blk-mq: only dispatch to non-defauly queue maps if they have queues blk-mq: export hctx->type in debugfs instead of sysfs blk-mq: fix allocation for queue mapping table blk-wbt: export internal state via debugfs blk-mq-debugfs: support rq_qos block: update sysfs documentation block: loop: check error using IS_ERR instead of IS_ERR_OR_NULL in loop_add() aoe: add __exit annotation block: clear REQ_HIPRI if polling is not supported blk-mq: replace and kill blk_mq_request_issue_directly blk-mq: issue directly with bypass 'false' in blk_mq_sched_insert_requests blk-mq: refactor the code of issue request directly block: remove the bio_integrity_advance export ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cgroup.c48
-rw-r--r--kernel/irq/affinity.c148
-rw-r--r--kernel/trace/blktrace.c4
3 files changed, 141 insertions, 59 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 6aaf5dd5383b..8b79318810ad 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
}
/**
- * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
+ * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest (%NULL returns @cgrp->self)
*
@@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
* enabled. If @ss is associated with the hierarchy @cgrp is on, this
* function is guaranteed to return non-NULL css.
*/
-static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
- struct cgroup_subsys *ss)
+static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
+ struct cgroup_subsys *ss)
{
lockdep_assert_held(&cgroup_mutex);
@@ -524,6 +524,35 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
}
/**
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest
+ *
+ * Find and get the effective css of @cgrp for @ss. The effective css is
+ * defined as the matching css of the nearest ancestor including self which
+ * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
+ * the root css is returned, so this function always returns a valid css.
+ *
+ * The returned css is not guaranteed to be online, and therefore it is the
+ * callers responsiblity to tryget a reference for it.
+ */
+struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+ struct cgroup_subsys *ss)
+{
+ struct cgroup_subsys_state *css;
+
+ do {
+ css = cgroup_css(cgrp, ss);
+
+ if (css)
+ return css;
+ cgrp = cgroup_parent(cgrp);
+ } while (cgrp);
+
+ return init_css_set.subsys[ss->id];
+}
+
+/**
* cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest
@@ -605,10 +634,11 @@ EXPORT_SYMBOL_GPL(of_css);
*
* Should be called under cgroup_[tree_]mutex.
*/
-#define for_each_e_css(css, ssid, cgrp) \
- for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
- if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
- ; \
+#define for_each_e_css(css, ssid, cgrp) \
+ for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
+ if (!((css) = cgroup_e_css_by_mask(cgrp, \
+ cgroup_subsys[(ssid)]))) \
+ ; \
else
/**
@@ -1007,7 +1037,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
* @ss is in this hierarchy, so we want the
* effective css from @cgrp.
*/
- template[i] = cgroup_e_css(cgrp, ss);
+ template[i] = cgroup_e_css_by_mask(cgrp, ss);
} else {
/*
* @ss is not in this hierarchy, so we don't want
@@ -3024,7 +3054,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
return ret;
/*
- * At this point, cgroup_e_css() results reflect the new csses
+ * At this point, cgroup_e_css_by_mask() results reflect the new csses
* making the following cgroup_update_dfl_csses() properly update
* css associations of all tasks in the subtree.
*/
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index f4f29b9d90ee..08c904eb7279 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -94,15 +94,15 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
return nodes;
}
-static int irq_build_affinity_masks(const struct irq_affinity *affd,
- int startvec, int numvecs,
+static int __irq_build_affinity_masks(const struct irq_affinity *affd,
+ int startvec, int numvecs, int firstvec,
cpumask_var_t *node_to_cpumask,
const struct cpumask *cpu_mask,
struct cpumask *nmsk,
struct cpumask *masks)
{
int n, nodes, cpus_per_vec, extra_vecs, done = 0;
- int last_affv = affd->pre_vectors + numvecs;
+ int last_affv = firstvec + numvecs;
int curvec = startvec;
nodemask_t nodemsk = NODE_MASK_NONE;
@@ -117,12 +117,11 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
*/
if (numvecs <= nodes) {
for_each_node_mask(n, nodemsk) {
- cpumask_copy(masks + curvec, node_to_cpumask[n]);
- if (++done == numvecs)
- break;
+ cpumask_or(masks + curvec, masks + curvec, node_to_cpumask[n]);
if (++curvec == last_affv)
- curvec = affd->pre_vectors;
+ curvec = firstvec;
}
+ done = numvecs;
goto out;
}
@@ -130,7 +129,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
int ncpus, v, vecs_to_assign, vecs_per_node;
/* Spread the vectors per node */
- vecs_per_node = (numvecs - (curvec - affd->pre_vectors)) / nodes;
+ vecs_per_node = (numvecs - (curvec - firstvec)) / nodes;
/* Get the cpus on this node which are in the mask */
cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
@@ -158,7 +157,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
if (done >= numvecs)
break;
if (curvec >= last_affv)
- curvec = affd->pre_vectors;
+ curvec = firstvec;
--nodes;
}
@@ -166,6 +165,62 @@ out:
return done;
}
+/*
+ * build affinity in two stages:
+ * 1) spread present CPU on these vectors
+ * 2) spread other possible CPUs on these vectors
+ */
+static int irq_build_affinity_masks(const struct irq_affinity *affd,
+ int startvec, int numvecs, int firstvec,
+ cpumask_var_t *node_to_cpumask,
+ struct cpumask *masks)
+{
+ int curvec = startvec, nr_present, nr_others;
+ int ret = -ENOMEM;
+ cpumask_var_t nmsk, npresmsk;
+
+ if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+ return ret;
+
+ if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
+ goto fail;
+
+ ret = 0;
+ /* Stabilize the cpumasks */
+ get_online_cpus();
+ build_node_to_cpumask(node_to_cpumask);
+
+ /* Spread on present CPUs starting from affd->pre_vectors */
+ nr_present = __irq_build_affinity_masks(affd, curvec, numvecs,
+ firstvec, node_to_cpumask,
+ cpu_present_mask, nmsk, masks);
+
+ /*
+ * Spread on non present CPUs starting from the next vector to be
+ * handled. If the spreading of present CPUs already exhausted the
+ * vector space, assign the non present CPUs to the already spread
+ * out vectors.
+ */
+ if (nr_present >= numvecs)
+ curvec = firstvec;
+ else
+ curvec = firstvec + nr_present;
+ cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
+ nr_others = __irq_build_affinity_masks(affd, curvec, numvecs,
+ firstvec, node_to_cpumask,
+ npresmsk, nmsk, masks);
+ put_online_cpus();
+
+ if (nr_present < numvecs)
+ WARN_ON(nr_present + nr_others < numvecs);
+
+ free_cpumask_var(npresmsk);
+
+ fail:
+ free_cpumask_var(nmsk);
+ return ret;
+}
+
/**
* irq_create_affinity_masks - Create affinity masks for multiqueue spreading
* @nvecs: The total number of vectors
@@ -178,8 +233,9 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
{
int affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
int curvec, usedvecs;
- cpumask_var_t nmsk, npresmsk, *node_to_cpumask;
+ cpumask_var_t *node_to_cpumask;
struct cpumask *masks = NULL;
+ int i, nr_sets;
/*
* If there aren't any vectors left after applying the pre/post
@@ -188,15 +244,9 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
if (nvecs == affd->pre_vectors + affd->post_vectors)
return NULL;
- if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
- return NULL;
-
- if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
- goto outcpumsk;
-
node_to_cpumask = alloc_node_to_cpumask();
if (!node_to_cpumask)
- goto outnpresmsk;
+ return NULL;
masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
if (!masks)
@@ -206,30 +256,28 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
for (curvec = 0; curvec < affd->pre_vectors; curvec++)
cpumask_copy(masks + curvec, irq_default_affinity);
- /* Stabilize the cpumasks */
- get_online_cpus();
- build_node_to_cpumask(node_to_cpumask);
-
- /* Spread on present CPUs starting from affd->pre_vectors */
- usedvecs = irq_build_affinity_masks(affd, curvec, affvecs,
- node_to_cpumask, cpu_present_mask,
- nmsk, masks);
-
/*
- * Spread on non present CPUs starting from the next vector to be
- * handled. If the spreading of present CPUs already exhausted the
- * vector space, assign the non present CPUs to the already spread
- * out vectors.
+ * Spread on present CPUs starting from affd->pre_vectors. If we
+ * have multiple sets, build each sets affinity mask separately.
*/
- if (usedvecs >= affvecs)
- curvec = affd->pre_vectors;
- else
- curvec = affd->pre_vectors + usedvecs;
- cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
- usedvecs += irq_build_affinity_masks(affd, curvec, affvecs,
- node_to_cpumask, npresmsk,
- nmsk, masks);
- put_online_cpus();
+ nr_sets = affd->nr_sets;
+ if (!nr_sets)
+ nr_sets = 1;
+
+ for (i = 0, usedvecs = 0; i < nr_sets; i++) {
+ int this_vecs = affd->sets ? affd->sets[i] : affvecs;
+ int ret;
+
+ ret = irq_build_affinity_masks(affd, curvec, this_vecs,
+ curvec, node_to_cpumask, masks);
+ if (ret) {
+ kfree(masks);
+ masks = NULL;
+ goto outnodemsk;
+ }
+ curvec += this_vecs;
+ usedvecs += this_vecs;
+ }
/* Fill out vectors at the end that don't need affinity */
if (usedvecs >= affvecs)
@@ -241,10 +289,6 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
outnodemsk:
free_node_to_cpumask(node_to_cpumask);
-outnpresmsk:
- free_cpumask_var(npresmsk);
-outcpumsk:
- free_cpumask_var(nmsk);
return masks;
}
@@ -258,13 +302,21 @@ int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity
{
int resv = affd->pre_vectors + affd->post_vectors;
int vecs = maxvec - resv;
- int ret;
+ int set_vecs;
if (resv > minvec)
return 0;
- get_online_cpus();
- ret = min_t(int, cpumask_weight(cpu_possible_mask), vecs) + resv;
- put_online_cpus();
- return ret;
+ if (affd->nr_sets) {
+ int i;
+
+ for (i = 0, set_vecs = 0; i < affd->nr_sets; i++)
+ set_vecs += affd->sets[i];
+ } else {
+ get_online_cpus();
+ set_vecs = cpumask_weight(cpu_possible_mask);
+ put_online_cpus();
+ }
+
+ return resv + min(set_vecs, vecs);
}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2868d85f1fb1..fac0ddf8a8e2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
return NULL;
- if (!bio->bi_css)
+ if (!bio->bi_blkg)
return NULL;
- return cgroup_get_kernfs_id(bio->bi_css->cgroup);
+ return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
}
#else
static union kernfs_node_id *