diff options
| author | Jens Axboe <[email protected]> | 2018-12-18 08:29:53 -0700 |
|---|---|---|
| committer | Jens Axboe <[email protected]> | 2018-12-18 08:29:53 -0700 |
| commit | 4b9254328254bed12a4ac449cdff2c332e630837 (patch) | |
| tree | 90ef63c168b0e63e6f07f8736f18faa8a544406f /kernel | |
| parent | 1a9430db2835c0c00acc87d915b573496998c1bf (diff) | |
| parent | cd19181bf9ad4b7f40f2a4e0355d052109c76529 (diff) | |
Merge branch 'for-4.21/block' into for-4.21/aio
* for-4.21/block: (351 commits)
blk-mq: enable IO poll if .nr_queues of type poll > 0
blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()
blk-mq: skip zero-queue maps in blk_mq_map_swqueue
block: fix blk-iolatency accounting underflow
blk-mq: fix dispatch from sw queue
block: mq-deadline: Fix write completion handling
nvme-pci: don't share queue maps
blk-mq: only dispatch to non-defauly queue maps if they have queues
blk-mq: export hctx->type in debugfs instead of sysfs
blk-mq: fix allocation for queue mapping table
blk-wbt: export internal state via debugfs
blk-mq-debugfs: support rq_qos
block: update sysfs documentation
block: loop: check error using IS_ERR instead of IS_ERR_OR_NULL in loop_add()
aoe: add __exit annotation
block: clear REQ_HIPRI if polling is not supported
blk-mq: replace and kill blk_mq_request_issue_directly
blk-mq: issue directly with bypass 'false' in blk_mq_sched_insert_requests
blk-mq: refactor the code of issue request directly
block: remove the bio_integrity_advance export
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup/cgroup.c | 48 | ||||
| -rw-r--r-- | kernel/irq/affinity.c | 148 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 4 |
3 files changed, 141 insertions, 59 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6aaf5dd5383b..8b79318810ad 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, } /** - * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem + * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss * @cgrp: the cgroup of interest * @ss: the subsystem of interest (%NULL returns @cgrp->self) * @@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, * enabled. If @ss is associated with the hierarchy @cgrp is on, this * function is guaranteed to return non-NULL css. */ -static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, - struct cgroup_subsys *ss) +static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, + struct cgroup_subsys *ss) { lockdep_assert_held(&cgroup_mutex); @@ -524,6 +524,35 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, } /** + * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem + * @cgrp: the cgroup of interest + * @ss: the subsystem of interest + * + * Find and get the effective css of @cgrp for @ss. The effective css is + * defined as the matching css of the nearest ancestor including self which + * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, + * the root css is returned, so this function always returns a valid css. + * + * The returned css is not guaranteed to be online, and therefore it is the + * callers responsiblity to tryget a reference for it. + */ +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) +{ + struct cgroup_subsys_state *css; + + do { + css = cgroup_css(cgrp, ss); + + if (css) + return css; + cgrp = cgroup_parent(cgrp); + } while (cgrp); + + return init_css_set.subsys[ss->id]; +} + +/** * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem * @cgrp: the cgroup of interest * @ss: the subsystem of interest @@ -605,10 +634,11 @@ EXPORT_SYMBOL_GPL(of_css); * * Should be called under cgroup_[tree_]mutex. */ -#define for_each_e_css(css, ssid, cgrp) \ - for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ - if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \ - ; \ +#define for_each_e_css(css, ssid, cgrp) \ + for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ + if (!((css) = cgroup_e_css_by_mask(cgrp, \ + cgroup_subsys[(ssid)]))) \ + ; \ else /** @@ -1007,7 +1037,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, * @ss is in this hierarchy, so we want the * effective css from @cgrp. */ - template[i] = cgroup_e_css(cgrp, ss); + template[i] = cgroup_e_css_by_mask(cgrp, ss); } else { /* * @ss is not in this hierarchy, so we don't want @@ -3024,7 +3054,7 @@ static int cgroup_apply_control(struct cgroup *cgrp) return ret; /* - * At this point, cgroup_e_css() results reflect the new csses + * At this point, cgroup_e_css_by_mask() results reflect the new csses * making the following cgroup_update_dfl_csses() properly update * css associations of all tasks in the subtree. */ diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f4f29b9d90ee..08c904eb7279 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -94,15 +94,15 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, return nodes; } -static int irq_build_affinity_masks(const struct irq_affinity *affd, - int startvec, int numvecs, +static int __irq_build_affinity_masks(const struct irq_affinity *affd, + int startvec, int numvecs, int firstvec, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, struct cpumask *nmsk, struct cpumask *masks) { int n, nodes, cpus_per_vec, extra_vecs, done = 0; - int last_affv = affd->pre_vectors + numvecs; + int last_affv = firstvec + numvecs; int curvec = startvec; nodemask_t nodemsk = NODE_MASK_NONE; @@ -117,12 +117,11 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, */ if (numvecs <= nodes) { for_each_node_mask(n, nodemsk) { - cpumask_copy(masks + curvec, node_to_cpumask[n]); - if (++done == numvecs) - break; + cpumask_or(masks + curvec, masks + curvec, node_to_cpumask[n]); if (++curvec == last_affv) - curvec = affd->pre_vectors; + curvec = firstvec; } + done = numvecs; goto out; } @@ -130,7 +129,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, int ncpus, v, vecs_to_assign, vecs_per_node; /* Spread the vectors per node */ - vecs_per_node = (numvecs - (curvec - affd->pre_vectors)) / nodes; + vecs_per_node = (numvecs - (curvec - firstvec)) / nodes; /* Get the cpus on this node which are in the mask */ cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); @@ -158,7 +157,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, if (done >= numvecs) break; if (curvec >= last_affv) - curvec = affd->pre_vectors; + curvec = firstvec; --nodes; } @@ -166,6 +165,62 @@ out: return done; } +/* + * build affinity in two stages: + * 1) spread present CPU on these vectors + * 2) spread other possible CPUs on these vectors + */ +static int irq_build_affinity_masks(const struct irq_affinity *affd, + int startvec, int numvecs, int firstvec, + cpumask_var_t *node_to_cpumask, + struct cpumask *masks) +{ + int curvec = startvec, nr_present, nr_others; + int ret = -ENOMEM; + cpumask_var_t nmsk, npresmsk; + + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) + return ret; + + if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) + goto fail; + + ret = 0; + /* Stabilize the cpumasks */ + get_online_cpus(); + build_node_to_cpumask(node_to_cpumask); + + /* Spread on present CPUs starting from affd->pre_vectors */ + nr_present = __irq_build_affinity_masks(affd, curvec, numvecs, + firstvec, node_to_cpumask, + cpu_present_mask, nmsk, masks); + + /* + * Spread on non present CPUs starting from the next vector to be + * handled. If the spreading of present CPUs already exhausted the + * vector space, assign the non present CPUs to the already spread + * out vectors. + */ + if (nr_present >= numvecs) + curvec = firstvec; + else + curvec = firstvec + nr_present; + cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); + nr_others = __irq_build_affinity_masks(affd, curvec, numvecs, + firstvec, node_to_cpumask, + npresmsk, nmsk, masks); + put_online_cpus(); + + if (nr_present < numvecs) + WARN_ON(nr_present + nr_others < numvecs); + + free_cpumask_var(npresmsk); + + fail: + free_cpumask_var(nmsk); + return ret; +} + /** * irq_create_affinity_masks - Create affinity masks for multiqueue spreading * @nvecs: The total number of vectors @@ -178,8 +233,9 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) { int affvecs = nvecs - affd->pre_vectors - affd->post_vectors; int curvec, usedvecs; - cpumask_var_t nmsk, npresmsk, *node_to_cpumask; + cpumask_var_t *node_to_cpumask; struct cpumask *masks = NULL; + int i, nr_sets; /* * If there aren't any vectors left after applying the pre/post @@ -188,15 +244,9 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) if (nvecs == affd->pre_vectors + affd->post_vectors) return NULL; - if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) - return NULL; - - if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) - goto outcpumsk; - node_to_cpumask = alloc_node_to_cpumask(); if (!node_to_cpumask) - goto outnpresmsk; + return NULL; masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); if (!masks) @@ -206,30 +256,28 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) for (curvec = 0; curvec < affd->pre_vectors; curvec++) cpumask_copy(masks + curvec, irq_default_affinity); - /* Stabilize the cpumasks */ - get_online_cpus(); - build_node_to_cpumask(node_to_cpumask); - - /* Spread on present CPUs starting from affd->pre_vectors */ - usedvecs = irq_build_affinity_masks(affd, curvec, affvecs, - node_to_cpumask, cpu_present_mask, - nmsk, masks); - /* - * Spread on non present CPUs starting from the next vector to be - * handled. If the spreading of present CPUs already exhausted the - * vector space, assign the non present CPUs to the already spread - * out vectors. + * Spread on present CPUs starting from affd->pre_vectors. If we + * have multiple sets, build each sets affinity mask separately. */ - if (usedvecs >= affvecs) - curvec = affd->pre_vectors; - else - curvec = affd->pre_vectors + usedvecs; - cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); - usedvecs += irq_build_affinity_masks(affd, curvec, affvecs, - node_to_cpumask, npresmsk, - nmsk, masks); - put_online_cpus(); + nr_sets = affd->nr_sets; + if (!nr_sets) + nr_sets = 1; + + for (i = 0, usedvecs = 0; i < nr_sets; i++) { + int this_vecs = affd->sets ? affd->sets[i] : affvecs; + int ret; + + ret = irq_build_affinity_masks(affd, curvec, this_vecs, + curvec, node_to_cpumask, masks); + if (ret) { + kfree(masks); + masks = NULL; + goto outnodemsk; + } + curvec += this_vecs; + usedvecs += this_vecs; + } /* Fill out vectors at the end that don't need affinity */ if (usedvecs >= affvecs) @@ -241,10 +289,6 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) outnodemsk: free_node_to_cpumask(node_to_cpumask); -outnpresmsk: - free_cpumask_var(npresmsk); -outcpumsk: - free_cpumask_var(nmsk); return masks; } @@ -258,13 +302,21 @@ int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity { int resv = affd->pre_vectors + affd->post_vectors; int vecs = maxvec - resv; - int ret; + int set_vecs; if (resv > minvec) return 0; - get_online_cpus(); - ret = min_t(int, cpumask_weight(cpu_possible_mask), vecs) + resv; - put_online_cpus(); - return ret; + if (affd->nr_sets) { + int i; + + for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) + set_vecs += affd->sets[i]; + } else { + get_online_cpus(); + set_vecs = cpumask_weight(cpu_possible_mask); + put_online_cpus(); + } + + return resv + min(set_vecs, vecs); } diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2868d85f1fb1..fac0ddf8a8e2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return NULL; - if (!bio->bi_css) + if (!bio->bi_blkg) return NULL; - return cgroup_get_kernfs_id(bio->bi_css->cgroup); + return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup); } #else static union kernfs_node_id * |