diff options
54 files changed, 544 insertions, 357 deletions
diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst index 4d151fbe2058..f9bf18ea6509 100644 --- a/Documentation/block/inline-encryption.rst +++ b/Documentation/block/inline-encryption.rst @@ -142,7 +142,7 @@ Therefore, we also introduce *blk-crypto-fallback*, which is an implementation of inline encryption using the kernel crypto API. blk-crypto-fallback is built into the block layer, so it works on any block device without any special setup. Essentially, when a bio with an encryption context is submitted to a -request_queue that doesn't support that encryption context, the block layer will +block_device that doesn't support that encryption context, the block layer will handle en/decryption of the bio using blk-crypto-fallback. For encryption, the data cannot be encrypted in-place, as callers usually rely @@ -187,7 +187,7 @@ API presented to users of the block layer ``blk_crypto_config_supported()`` allows users to check ahead of time whether inline encryption with particular crypto settings will work on a particular -request_queue -- either via hardware or via blk-crypto-fallback. This function +block_device -- either via hardware or via blk-crypto-fallback. This function takes in a ``struct blk_crypto_config`` which is like blk_crypto_key, but omits the actual bytes of the key and instead just contains the algorithm, data unit size, etc. This function can be useful if blk-crypto-fallback is disabled. @@ -195,7 +195,7 @@ size, etc. This function can be useful if blk-crypto-fallback is disabled. ``blk_crypto_init_key()`` allows users to initialize a blk_crypto_key. Users must call ``blk_crypto_start_using_key()`` before actually starting to use -a blk_crypto_key on a request_queue (even if ``blk_crypto_config_supported()`` +a blk_crypto_key on a block_device (even if ``blk_crypto_config_supported()`` was called earlier). This is needed to initialize blk-crypto-fallback if it will be needed. This must not be called from the data path, as this may have to allocate resources, which may deadlock in that case. @@ -207,7 +207,7 @@ for en/decryption. Users don't need to worry about freeing the bio_crypt_ctx later, as that happens automatically when the bio is freed or reset. Finally, when done using inline encryption with a blk_crypto_key on a -request_queue, users must call ``blk_crypto_evict_key()``. This ensures that +block_device, users must call ``blk_crypto_evict_key()``. This ensures that the key is evicted from all keyslots it may be programmed into and unlinked from any kernel data structures it may be linked into. @@ -221,9 +221,9 @@ as follows: 5. ``blk_crypto_evict_key()`` (after all I/O has completed) 6. Zeroize the blk_crypto_key (this has no dedicated function) -If a blk_crypto_key is being used on multiple request_queues, then +If a blk_crypto_key is being used on multiple block_devices, then ``blk_crypto_config_supported()`` (if used), ``blk_crypto_start_using_key()``, -and ``blk_crypto_evict_key()`` must be called on each request_queue. +and ``blk_crypto_evict_key()`` must be called on each block_device. API presented to device drivers =============================== diff --git a/block/bio.c b/block/bio.c index aa1de6a367f9..ab59a491a883 100644 --- a/block/bio.c +++ b/block/bio.c @@ -25,9 +25,15 @@ #include "blk-rq-qos.h" #include "blk-cgroup.h" +#define ALLOC_CACHE_THRESHOLD 16 +#define ALLOC_CACHE_SLACK 64 +#define ALLOC_CACHE_MAX 256 + struct bio_alloc_cache { struct bio *free_list; + struct bio *free_list_irq; unsigned int nr; + unsigned int nr_irq; }; static struct biovec_slab { @@ -408,6 +414,22 @@ static void punt_bios_to_rescuer(struct bio_set *bs) queue_work(bs->rescue_workqueue, &bs->rescue_work); } +static void bio_alloc_irq_cache_splice(struct bio_alloc_cache *cache) +{ + unsigned long flags; + + /* cache->free_list must be empty */ + if (WARN_ON_ONCE(cache->free_list)) + return; + + local_irq_save(flags); + cache->free_list = cache->free_list_irq; + cache->free_list_irq = NULL; + cache->nr += cache->nr_irq; + cache->nr_irq = 0; + local_irq_restore(flags); +} + static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp, struct bio_set *bs) @@ -417,8 +439,12 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, cache = per_cpu_ptr(bs->cache, get_cpu()); if (!cache->free_list) { - put_cpu(); - return NULL; + if (READ_ONCE(cache->nr_irq) >= ALLOC_CACHE_THRESHOLD) + bio_alloc_irq_cache_splice(cache); + if (!cache->free_list) { + put_cpu(); + return NULL; + } } bio = cache->free_list; cache->free_list = bio->bi_next; @@ -462,9 +488,6 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad * for per bio allocations. * - * If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process - * context, not hard/soft IRQ. - * * Returns: Pointer to new bio on success, NULL on failure. */ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, @@ -526,6 +549,8 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, } if (unlikely(!p)) return NULL; + if (!mempool_is_saturated(&bs->bio_pool)) + opf &= ~REQ_ALLOC_CACHE; bio = p + bs->front_pad; if (nr_vecs > BIO_INLINE_VECS) { @@ -676,11 +701,8 @@ void guard_bio_eod(struct bio *bio) bio_truncate(bio, maxsector << 9); } -#define ALLOC_CACHE_MAX 512 -#define ALLOC_CACHE_SLACK 64 - -static void bio_alloc_cache_prune(struct bio_alloc_cache *cache, - unsigned int nr) +static int __bio_alloc_cache_prune(struct bio_alloc_cache *cache, + unsigned int nr) { unsigned int i = 0; struct bio *bio; @@ -692,6 +714,17 @@ static void bio_alloc_cache_prune(struct bio_alloc_cache *cache, if (++i == nr) break; } + return i; +} + +static void bio_alloc_cache_prune(struct bio_alloc_cache *cache, + unsigned int nr) +{ + nr -= __bio_alloc_cache_prune(cache, nr); + if (!READ_ONCE(cache->free_list)) { + bio_alloc_irq_cache_splice(cache); + __bio_alloc_cache_prune(cache, nr); + } } static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node) @@ -725,6 +758,35 @@ static void bio_alloc_cache_destroy(struct bio_set *bs) bs->cache = NULL; } +static inline void bio_put_percpu_cache(struct bio *bio) +{ + struct bio_alloc_cache *cache; + + cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu()); + if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX) { + put_cpu(); + bio_free(bio); + return; + } + + bio_uninit(bio); + + if ((bio->bi_opf & REQ_POLLED) && !WARN_ON_ONCE(in_interrupt())) { + bio->bi_next = cache->free_list; + cache->free_list = bio; + cache->nr++; + } else { + unsigned long flags; + + local_irq_save(flags); + bio->bi_next = cache->free_list_irq; + cache->free_list_irq = bio; + cache->nr_irq++; + local_irq_restore(flags); + } + put_cpu(); +} + /** * bio_put - release a reference to a bio * @bio: bio to release reference to @@ -740,20 +802,10 @@ void bio_put(struct bio *bio) if (!atomic_dec_and_test(&bio->__bi_cnt)) return; } - - if ((bio->bi_opf & REQ_ALLOC_CACHE) && !WARN_ON_ONCE(in_interrupt())) { - struct bio_alloc_cache *cache; - - bio_uninit(bio); - cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu()); - bio->bi_next = cache->free_list; - cache->free_list = bio; - if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK) - bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK); - put_cpu(); - } else { + if (bio->bi_opf & REQ_ALLOC_CACHE) + bio_put_percpu_cache(bio); + else bio_free(bio); - } } EXPORT_SYMBOL(bio_put); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 6a5c849ee061..57941d2a8ba3 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -59,6 +59,37 @@ static struct workqueue_struct *blkcg_punt_bio_wq; #define BLKG_DESTROY_BATCH_SIZE 64 +/* + * Lockless lists for tracking IO stats update + * + * New IO stats are stored in the percpu iostat_cpu within blkcg_gq (blkg). + * There are multiple blkg's (one for each block device) attached to each + * blkcg. The rstat code keeps track of which cpu has IO stats updated, + * but it doesn't know which blkg has the updated stats. If there are many + * block devices in a system, the cost of iterating all the blkg's to flush + * out the IO stats can be high. To reduce such overhead, a set of percpu + * lockless lists (lhead) per blkcg are used to track the set of recently + * updated iostat_cpu's since the last flush. An iostat_cpu will be put + * onto the lockless list on the update side [blk_cgroup_bio_start()] if + * not there yet and then removed when being flushed [blkcg_rstat_flush()]. + * References to blkg are gotten and then put back in the process to + * protect against blkg removal. + * + * Return: 0 if successful or -ENOMEM if allocation fails. + */ +static int init_blkcg_llists(struct blkcg *blkcg) +{ + int cpu; + + blkcg->lhead = alloc_percpu_gfp(struct llist_head, GFP_KERNEL); + if (!blkcg->lhead) + return -ENOMEM; + + for_each_possible_cpu(cpu) + init_llist_head(per_cpu_ptr(blkcg->lhead, cpu)); + return 0; +} + /** * blkcg_css - find the current css * @@ -236,8 +267,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, blkg->blkcg = blkcg; u64_stats_init(&blkg->iostat.sync); - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync); + per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg; + } for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -827,7 +860,9 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct blkcg *blkcg = css_to_blkcg(css); - struct blkcg_gq *blkg; + struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); + struct llist_node *lnode; + struct blkg_iostat_set *bisc, *next_bisc; /* Root-level stats are sourced from system-wide IO stats */ if (!cgroup_parent(css->cgroup)) @@ -835,12 +870,21 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) rcu_read_lock(); - hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { + lnode = llist_del_all(lhead); + if (!lnode) + goto out; + + /* + * Iterate only the iostat_cpu's queued in the lockless list. + */ + llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) { + struct blkcg_gq *blkg = bisc->blkg; struct blkcg_gq *parent = blkg->parent; - struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu); struct blkg_iostat cur; unsigned int seq; + WRITE_ONCE(bisc->lqueued, false); + /* fetch the current per-cpu values */ do { seq = u64_stats_fetch_begin(&bisc->sync); @@ -853,8 +897,10 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) if (parent && parent->parent) blkcg_iostat_update(parent, &blkg->iostat.cur, &blkg->iostat.last); + percpu_ref_put(&blkg->refcnt); } +out: rcu_read_unlock(); } @@ -1038,10 +1084,12 @@ struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css) */ static void blkcg_destroy_blkgs(struct blkcg *blkcg) { + int cpu; + might_sleep(); + css_get(&blkcg->css); spin_lock_irq(&blkcg->lock); - while (!hlist_empty(&blkcg->blkg_list)) { struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, struct blkcg_gq, blkcg_node); @@ -1064,6 +1112,17 @@ static void blkcg_destroy_blkgs(struct blkcg *blkcg) } spin_unlock_irq(&blkcg->lock); + + /* + * Flush all the non-empty percpu lockless lists. + */ + for_each_possible_cpu(cpu) { + struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); + + if (!llist_empty(lhead)) + cgroup_rstat_css_cpu_flush(&blkcg->css, cpu); + } + css_put(&blkcg->css); } /** @@ -1132,6 +1191,7 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) mutex_unlock(&blkcg_pol_mutex); + free_percpu(blkcg->lhead); kfree(blkcg); } @@ -1139,7 +1199,6 @@ static struct cgroup_subsys_state * blkcg_css_alloc(struct cgroup_subsys_state *parent_css) { struct blkcg *blkcg; - struct cgroup_subsys_state *ret; int i; mutex_lock(&blkcg_pol_mutex); @@ -1148,12 +1207,13 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) blkcg = &blkcg_root; } else { blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); - if (!blkcg) { - ret = ERR_PTR(-ENOMEM); + if (!blkcg) goto unlock; - } } + if (init_blkcg_llists(blkcg)) + goto free_blkcg; + for (i = 0; i < BLKCG_MAX_POLS ; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkcg_policy_data *cpd; @@ -1168,10 +1228,9 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) continue; cpd = pol->cpd_alloc_fn(GFP_KERNEL); - if (!cpd) { - ret = ERR_PTR(-ENOMEM); + if (!cpd) goto free_pd_blkcg; - } + blkcg->cpd[i] = cpd; cpd->blkcg = blkcg; cpd->plid = i; @@ -1195,12 +1254,13 @@ free_pd_blkcg: for (i--; i >= 0; i--) if (blkcg->cpd[i]) blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]); - + free_percpu(blkcg->lhead); +free_blkcg: if (blkcg != &blkcg_root) kfree(blkcg); unlock: mutex_unlock(&blkcg_pol_mutex); - return ret; + return ERR_PTR(-ENOMEM); } static int blkcg_css_online(struct cgroup_subsys_state *css) @@ -1943,6 +2003,7 @@ static int blk_cgroup_io_type(struct bio *bio) void blk_cgroup_bio_start(struct bio *bio) { + struct blkcg *blkcg = bio->bi_blkg->blkcg; int rwd = blk_cgroup_io_type(bio), cpu; struct blkg_iostat_set *bis; unsigned long flags; @@ -1961,9 +2022,21 @@ void blk_cgroup_bio_start(struct bio *bio) } bis->cur.ios[rwd]++; + /* + * If the iostat_cpu isn't in a lockless list, put it into the + * list to indicate that a stat update is pending. + */ + if (!READ_ONCE(bis->lqueued)) { + struct llist_head *lhead = this_cpu_ptr(blkcg->lhead); + + llist_add(&bis->lnode, lhead); + WRITE_ONCE(bis->lqueued, true); + percpu_ref_get(&bis->blkg->refcnt); + } + u64_stats_update_end_irqrestore(&bis->sync, flags); if (cgroup_subsys_on_dfl(io_cgrp_subsys)) - cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu); + cgroup_rstat_updated(blkcg->css.cgroup, cpu); put_cpu(); } diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index aa2b286bc825..1e94e404eaa8 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -18,6 +18,7 @@ #include <linux/cgroup.h> #include <linux/kthread.h> #include <linux/blk-mq.h> +#include <linux/llist.h> struct blkcg_gq; struct blkg_policy_data; @@ -43,6 +44,9 @@ struct blkg_iostat { struct blkg_iostat_set { struct u64_stats_sync sync; + struct blkcg_gq *blkg; + struct llist_node lnode; + int lqueued; /* queued in llist */ struct blkg_iostat cur; struct blkg_iostat last; }; @@ -97,6 +101,12 @@ struct blkcg { struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; struct list_head all_blkcgs_node; + + /* + * List of updated percpu blkg_iostat_set's since the last flush. + */ + struct llist_head __percpu *lhead; + #ifdef CONFIG_BLK_CGROUP_FC_APPID char fc_app_id[FC_APPID_LEN]; #endif diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h index e6818ffaddbf..d31fa80454e4 100644 --- a/block/blk-crypto-internal.h +++ b/block/blk-crypto-internal.h @@ -65,6 +65,18 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq) return rq->crypt_ctx; } +blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + struct blk_crypto_keyslot **slot_ptr); + +void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot); + +int __blk_crypto_evict_key(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key); + +bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile, + const struct blk_crypto_config *cfg); + #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline int blk_crypto_sysfs_register(struct request_queue *q) diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c index 96c511967386..0307fb0d95d3 100644 --- a/block/blk-crypto-profile.c +++ b/block/blk-crypto-profile.c @@ -32,6 +32,7 @@ #include <linux/wait.h> #include <linux/blkdev.h> #include <linux/blk-integrity.h> +#include "blk-crypto-internal.h" struct blk_crypto_keyslot { atomic_t slot_refs; diff --git a/block/blk-crypto.c b/block/blk-crypto.c index a496aaef85ba..6a461f4d676a 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -267,7 +267,6 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr) { struct bio *bio = *bio_ptr; const struct blk_crypto_key *bc_key = bio->bi_crypt_context->bc_key; - struct blk_crypto_profile *profile; /* Error if bio has no data. */ if (WARN_ON_ONCE(!bio_has_data(bio))) { @@ -284,10 +283,9 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr) * Success if device supports the encryption context, or if we succeeded * in falling back to the crypto API. */ - profile = bdev_get_queue(bio->bi_bdev)->crypto_profile; - if (__blk_crypto_cfg_supported(profile, &bc_key->crypto_cfg)) + if (blk_crypto_config_supported_natively(bio->bi_bdev, + &bc_key->crypto_cfg)) return true; - if (blk_crypto_fallback_bio_prep(bio_ptr)) return true; fail: @@ -352,22 +350,29 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, return 0; } +bool blk_crypto_config_supported_natively(struct block_device *bdev, + const struct blk_crypto_config *cfg) +{ + return __blk_crypto_cfg_supported(bdev_get_queue(bdev)->crypto_profile, + cfg); +} + /* * Check if bios with @cfg can be en/decrypted by blk-crypto (i.e. either the - * request queue it's submitted to supports inline crypto, or the + * block_device it's submitted to supports inline crypto, or the * blk-crypto-fallback is enabled and supports the cfg). */ -bool blk_crypto_config_supported(struct request_queue *q, +bool blk_crypto_config_supported(struct block_device *bdev, const struct blk_crypto_config *cfg) { return IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) || - __blk_crypto_cfg_supported(q->crypto_profile, cfg); + blk_crypto_config_supported_natively(bdev, cfg); } /** * blk_crypto_start_using_key() - Start using a blk_crypto_key on a device + * @bdev: block device to operate on * @key: A key to use on the device - * @q: the request queue for the device * * Upper layers must call this function to ensure that either the hardware * supports the key's crypto settings, or the crypto API fallback has transforms @@ -379,10 +384,10 @@ bool blk_crypto_config_supported(struct request_queue *q, * blk-crypto-fallback is either disabled or the needed algorithm * is disabled in the crypto API; or another -errno code. */ -int blk_crypto_start_using_key(const struct blk_crypto_key *key, - struct request_queue *q) +int blk_crypto_start_using_key(struct block_device *bdev, + const struct blk_crypto_key *key) { - if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg)) + if (blk_crypto_config_supported_natively(bdev, &key->crypto_cfg)) return 0; return blk_crypto_fallback_start_using_mode(key->crypto_cfg.crypto_mode); } @@ -390,7 +395,7 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key, /** * blk_crypto_evict_key() - Evict a key from any inline encryption hardware * it may have been programmed into - * @q: The request queue who's associated inline encryption hardware this key + * @bdev: The block_device who's associated inline encryption hardware this key * might have been programmed into * @key: The key to evict * @@ -400,14 +405,16 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key, * * Return: 0 on success or if the key wasn't in any keyslot; -errno on error. */ -int blk_crypto_evict_key(struct request_queue *q, +int blk_crypto_evict_key(struct block_device *bdev, const struct blk_crypto_key *key) { - if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg)) + struct request_queue *q = bdev_get_queue(bdev); + + if (blk_crypto_config_supported_natively(bdev, &key->crypto_cfg)) return __blk_crypto_evict_key(q->crypto_profile, key); /* - * If the request_queue didn't support the key, then blk-crypto-fallback + * If the block_device didn't support the key, then blk-crypto-fallback * may have been used, so try to evict the key from blk-crypto-fallback. */ return blk_crypto_fallback_evict_key(key); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 93997d297d42..4515288fbe35 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -185,7 +185,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct blk_mq_ctx *ctx; - int i, ret; + int i, j, ret; if (!hctx->nr_ctx) return 0; @@ -197,9 +197,16 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) hctx_for_each_ctx(hctx, ctx, i) { ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu); if (ret) - break; + goto out; } + return 0; +out: + hctx_for_each_ctx(hctx, ctx, j) { + if (j < i) + kobject_del(&ctx->kobj); + } + kobject_del(&hctx->kobj); return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index ee16b4c34c6a..4e6b3ccd4989 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4381,7 +4381,7 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, struct blk_mq_tags **new_tags; if (set->nr_hw_queues >= new_nr_hw_queues) - return 0; + goto done; new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), GFP_KERNEL, set->numa_node); @@ -4393,8 +4393,8 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, sizeof(*set->tags)); kfree(set->tags); set->tags = new_tags; +done: set->nr_hw_queues = new_nr_hw_queues; - return 0; } @@ -4630,9 +4630,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head, INIT_LIST_HEAD(&qe->node); qe->q = q; + qe->type = q->elevator->type; /* keep a reference to the elevator module as we'll switch back */ __elevator_get(qe->type); - qe->type = q->elevator->type; list_add(&qe->node, head); elevator_disable(q); mutex_unlock(&q->sysfs_lock); diff --git a/block/elevator.c b/block/elevator.c index a5bdc3b1e7e5..599413620558 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -91,12 +91,11 @@ static inline bool elv_support_features(struct request_queue *q, } /** - * elevator_match - Test an elevator name and features + * elevator_match - Check whether @e's name or alias matches @name * @e: Scheduler to test * @name: Elevator name to test * - * Return true if the elevator @e name matches @name and if @e provides all - * the features specified by @required_features. + * Return true if the elevator @e's name or alias matches @name. */ static bool elevator_match(const struct elevator_type *e, const char *name) { @@ -650,10 +649,10 @@ void elevator_init_mq(struct request_queue *q) } /* - * switch to new_e io scheduler. be careful not to introduce deadlocks - - * we don't free the old io scheduler, before we have allocated what we - * need for the new one. this way we have a chance of going back to the old - * one, if the new one fails init for some reason. + * Switch to new_e io scheduler. + * + * If switching fails, we are most likely running out of memory and not able + * to restore the old io scheduler, so leaving the io scheduler being none. */ int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { @@ -683,6 +682,12 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e) out_unfreeze: blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q); + + if (ret) { + pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n", + new_e->elevator_name); + } + return ret; } @@ -716,9 +721,6 @@ static int elevator_change(struct request_queue *q, const char *elevator_name) if (!blk_queue_registered(q)) return -ENOENT; - /* - * Special case for mq, turn off scheduling - */ if (!strncmp(elevator_name, "none", 4)) { if (q->elevator) elevator_disable(q); diff --git a/block/fops.c b/block/fops.c index b90742595317..50d245e8c913 100644 --- a/block/fops.c +++ b/block/fops.c @@ -405,12 +405,6 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, return ret; } -static int blkdev_writepages(struct address_space *mapping, - struct writeback_control *wbc) -{ - return generic_writepages(mapping, wbc); -} - const struct address_space_operations def_blk_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, @@ -419,7 +413,6 @@ const struct address_space_operations def_blk_aops = { .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, - .writepages = blkdev_writepages, .direct_IO = blkdev_direct_IO, .migrate_folio = buffer_migrate_folio_norefs, .is_dirty_writeback = buffer_check_dirty_writeback, diff --git a/block/genhd.c b/block/genhd.c index 09cde914e054..075d8da284f5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -478,10 +478,6 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, goto out_put_holder_dir; } - ret = bd_register_pending_holders(disk); - if (ret < 0) - goto out_put_slave_dir; - ret = blk_register_queue(disk); if (ret) goto out_put_slave_dir; @@ -528,6 +524,7 @@ out_unregister_queue: blk_unregister_queue(disk); out_put_slave_dir: kobject_put(disk->slave_dir); + disk->slave_dir = NULL; out_put_holder_dir: kobject_put(disk->part0->bd_holder_dir); out_del_integrity: @@ -629,6 +626,7 @@ void del_gendisk(struct gendisk *disk) kobject_put(disk->part0->bd_holder_dir); kobject_put(disk->slave_dir); + disk->slave_dir = NULL; part_stat_set_all(disk->part0, 0); disk->part0->bd_stamp = 0; diff --git a/block/holder.c b/block/holder.c index 5283bc804cc1..37d18c13d958 100644 --- a/block/holder.c +++ b/block/holder.c @@ -4,7 +4,7 @@ struct bd_holder_disk { struct list_head list; - struct block_device *bdev; + struct kobject *holder_dir; int refcnt; }; @@ -14,7 +14,7 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, struct bd_holder_disk *holder; list_for_each_entry(holder, &disk->slave_bdevs, list) - if (holder->bdev == bdev) + if (holder->holder_dir == bdev->bd_holder_dir) return holder; return NULL; } @@ -29,19 +29,6 @@ static void del_symlink(struct kobject *from, struct kobject *to) sysfs_remove_link(from, kobject_name(to)); } -static int __link_disk_holder(struct block_device *bdev, struct gendisk *disk) -{ - int ret; - - ret = add_symlink(disk->slave_dir, bdev_kobj(bdev)); - if (ret) - return ret; - ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); - if (ret) - del_symlink(disk->slave_dir, bdev_kobj(bdev)); - return ret; -} - /** * bd_link_disk_holder - create symlinks between holding disk and slave bdev * @bdev: the claimed slave bdev @@ -75,12 +62,30 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) struct bd_holder_disk *holder; int ret = 0; - mutex_lock(&disk->open_mutex); + if (WARN_ON_ONCE(!disk->slave_dir)) + return -EINVAL; + + if (bdev->bd_disk == disk) + return -EINVAL; + + /* + * del_gendisk drops the initial reference to bd_holder_dir, so we + * need to keep our own here to allow for cleanup past that point. + */ + mutex_lock(&bdev->bd_disk->open_mutex); + if (!disk_live(bdev->bd_disk)) { + mutex_unlock(&bdev->bd_disk->open_mutex); + return -ENODEV; + } + kobject_get(bdev->bd_holder_dir); + mutex_unlock(&bdev->bd_disk->open_mutex); + mutex_lock(&disk->open_mutex); WARN_ON_ONCE(!bdev->bd_holder); holder = bd_find_holder_disk(bdev, disk); if (holder) { + kobject_put(bdev->bd_holder_dir); holder->refcnt++; goto out_unlock; } @@ -92,36 +97,32 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) } INIT_LIST_HEAD(&holder->list); - holder->bdev = bdev; holder->refcnt = 1; - if (disk->slave_dir) { - ret = __link_disk_holder(bdev, disk); - if (ret) { - kfree(holder); - goto out_unlock; - } - } + holder->holder_dir = bdev->bd_holder_dir; + ret = add_symlink(disk->slave_dir, bdev_kobj(bdev)); + if (ret) + goto out_free_holder; + ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); + if (ret) + goto out_del_symlink; list_add(&holder->list, &disk->slave_bdevs); - /* - * del_gendisk drops the initial reference to bd_holder_dir, so we need - * to keep our own here to allow for cleanup past that point. - */ - kobject_get(bdev->bd_holder_dir); + mutex_unlock(&disk->open_mutex); + return 0; + +out_del_symlink: + del_symlink(disk->slave_dir, bdev_kobj(bdev)); +out_free_holder: + kfree(holder); out_unlock: mutex_unlock(&disk->open_mutex); + if (ret) + kobject_put(bdev->bd_holder_dir); return ret; } EXPORT_SYMBOL_GPL(bd_link_disk_holder); -static void __unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ - del_symlink(disk->slave_dir, bdev_kobj(bdev)); - del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); -} - /** * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder() * @bdev: the calimed slave bdev @@ -136,36 +137,18 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; + if (WARN_ON_ONCE(!disk->slave_dir)) + return; + mutex_lock(&disk->open_mutex); holder = bd_find_holder_disk(bdev, disk); if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { - if (disk->slave_dir) - __unlink_disk_holder(bdev, disk); - kobject_put(bdev->bd_holder_dir); + del_symlink(disk->slave_dir, bdev_kobj(bdev)); + del_symlink(holder->holder_dir, &disk_to_dev(disk)->kobj); + kobject_put(holder->holder_dir); list_del_init(&holder->list); kfree(holder); } mutex_unlock(&disk->open_mutex); } EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); - -int bd_register_pending_holders(struct gendisk *disk) -{ - struct bd_holder_disk *holder; - int ret; - - mutex_lock(&disk->open_mutex); - list_for_each_entry(holder, &disk->slave_bdevs, list) { - ret = __link_disk_holder(holder->bdev, disk); - if (ret) - goto out_undo; - } - mutex_unlock(&disk->open_mutex); - return 0; - -out_undo: - list_for_each_entry_continue_reverse(holder, &disk->slave_bdevs, list) - __unlink_disk_holder(holder->bdev, disk); - mutex_unlock(&disk->open_mutex); - return ret; -} diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 5639921dfa92..f10c2a0d18d4 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -131,6 +131,20 @@ static u8 dd_rq_ioclass(struct request *rq) } /* + * get the request before `rq' in sector-sorted order + */ +static inline struct request * +deadline_earlier_request(struct request *rq) +{ + struct rb_node *node = rb_prev(&rq->rb_node); + + if (node) + return rb_entry_rq(node); + + return NULL; +} + +/* * get the request after `rq' in sector-sorted order */ static inline struct request * @@ -278,6 +292,39 @@ static inline int deadline_check_fifo(struct dd_per_prio *per_prio, } /* + * Check if rq has a sequential request preceding it. + */ +static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq) +{ + struct request *prev = deadline_earlier_request(rq); + + if (!prev) + return false; + + return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq); +} + +/* + * Skip all write requests that are sequential from @rq, even if we cross + * a zone boundary. + */ +static struct request *deadline_skip_seq_writes(struct deadline_data *dd, + struct request *rq) +{ + sector_t pos = blk_rq_pos(rq); + sector_t skipped_sectors = 0; + + while (rq) { + if (blk_rq_pos(rq) != pos + skipped_sectors) + break; + skipped_sectors += blk_rq_sectors(rq); + rq = deadline_latter_request(rq); + } + + return rq; +} + +/* * For the specified data direction, return the next request to * dispatch using arrival ordered lists. */ @@ -297,11 +344,16 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, /* * Look for a write request that can be dispatched, that is one with - * an unlocked target zone. + * an unlocked target zone. For some HDDs, breaking a sequential + * write stream can lead to lower throughput, so make sure to preserve + * sequential write streams, even if that stream crosses into the next + * zones and these zones are unlocked. */ spin_lock_irqsave(&dd->zone_lock, flags); list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) { - if (blk_req_can_dispatch_to_zone(rq)) + if (blk_req_can_dispatch_to_zone(rq) && + (blk_queue_nonrot(rq->q) || + !deadline_is_seq_write(dd, rq))) goto out; } rq = NULL; @@ -331,13 +383,19 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio, /* * Look for a write request that can be dispatched, that is one with - * an unlocked target zone. + * an unlocked target zone. For some HDDs, breaking a sequential + * write stream can lead to lower throughput, so make sure to preserve + * sequential write streams, even if that stream crosses into the next + * zones and these zones are unlocked. */ spin_lock_irqsave(&dd->zone_lock, flags); while (rq) { if (blk_req_can_dispatch_to_zone(rq)) break; - rq = deadline_latter_request(rq); + if (blk_queue_nonrot(rq->q)) + rq = deadline_latter_request(rq); + else + rq = deadline_skip_seq_writes(dd, rq); } spin_unlock_irqrestore(&dd->zone_lock, flags); @@ -789,6 +847,18 @@ static void dd_prepare_request(struct request *rq) rq->elv.priv[0] = NULL; } +static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx) +{ + struct deadline_data *dd = hctx->queue->elevator->elevator_data; + enum dd_prio p; + + for (p = 0; p <= DD_PRIO_MAX; p++) + if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE])) + return true; + + return false; +} + /* * Callback from inside blk_mq_free_request(). * @@ -828,9 +898,10 @@ static void dd_finish_request(struct request *rq) spin_lock_irqsave(&dd->zone_lock, flags); blk_req_zone_write_unlock(rq); - if (!list_empty(&per_prio->fifo_list[DD_WRITE])) - blk_mq_sched_mark_restart_hctx(rq->mq_hctx); spin_unlock_irqrestore(&dd->zone_lock, flags); + + if (dd_has_write_work(rq->mq_hctx)) + blk_mq_sched_mark_restart_hctx(rq->mq_hctx); } } diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index cbacddc55a1d..6fb4e38fca88 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0-only # # DRBD device driver configuration # diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 8bd534697d1b..c93e462130ff 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0-only drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index e27478ae579c..5db147f3c02d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_actlog.c diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 7d9db33363de..b90a5c1003fc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_bitmap.c diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index b3b9cd5628fd..a72c096aa5b1 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) "drbd debugfs: " fmt #include <linux/kernel.h> #include <linux/module.h> diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h index 58e31cef0844..ee3d66eb40c6 100644 --- a/drivers/block/drbd/drbd_debugfs.h +++ b/drivers/block/drbd/drbd_debugfs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/debugfs.h> diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 913cf4c55cba..e29bd10ac52f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* drbd_int.h diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index f07b4378388b..5024ffd6143d 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only #include <asm/bug.h> #include <linux/rbtree_augmented.h> #include "drbd_interval.h" diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index b8c2dee5edc8..366489b72fe9 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __DRBD_INTERVAL_H #define __DRBD_INTERVAL_H diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f3e4db16fd07..e02db1dccab1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd.c @@ -2217,7 +2217,8 @@ void drbd_destroy_device(struct kref *kref) kref_put(&peer_device->connection->kref, drbd_destroy_connection); kfree(peer_device); } - memset(device, 0xfd, sizeof(*device)); + if (device->submit.wq) + destroy_workqueue(device->submit.wq); kfree(device); kref_put(&resource->kref, drbd_destroy_resource); } @@ -2309,7 +2310,6 @@ void drbd_destroy_resource(struct kref *kref) idr_destroy(&resource->devices); free_cpumask_var(resource->cpu_mask); kfree(resource->name); - memset(resource, 0xf2, sizeof(*resource)); kfree(resource); } @@ -2650,7 +2650,6 @@ void drbd_destroy_connection(struct kref *kref) drbd_free_socket(&connection->data); kfree(connection->int_dig_in); kfree(connection->int_dig_vv); - memset(connection, 0xfc, sizeof(*connection)); kfree(connection); kref_put(&resource->kref, drbd_destroy_resource); } @@ -2774,7 +2773,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig err = add_disk(disk); if (err) - goto out_idr_remove_from_resource; + goto out_destroy_workqueue; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2788,6 +2787,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; +out_destroy_workqueue: + destroy_workqueue(device->submit.wq); out_idr_remove_from_resource: for_each_connection(connection, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 63f589926d85..cb55b28fba78 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_nl.c diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c index 6a09b0b98018..df0d241d3f6a 100644 --- a/drivers/block/drbd/drbd_nla.c +++ b/drivers/block/drbd/drbd_nla.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <net/netlink.h> #include <linux/drbd_genl_api.h> diff --git a/drivers/block/drbd/drbd_nla.h b/drivers/block/drbd/drbd_nla.h index f5eaffb6474e..d3555df0d353 100644 --- a/drivers/block/drbd/drbd_nla.h +++ b/drivers/block/drbd/drbd_nla.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __DRBD_NLA_H #define __DRBD_NLA_H diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 3c0193de2498..2227fb0db1ce 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_proc.c diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h index a882b65ab5d2..56bbca9d7700 100644 --- a/drivers/block/drbd/drbd_protocol.h +++ b/drivers/block/drbd/drbd_protocol.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __DRBD_PROTOCOL_H #define __DRBD_PROTOCOL_H diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 93d6df4dc5a4..e045fb55f3bf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_receiver.c diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 7f9bcc82fc9c..ced15557197a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_req.c diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 6237fa1dcb0e..b4017b5c3fbc 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* drbd_req.h diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 3f7bf9f2d874..75d13ea0024f 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_state.c diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index f87371e55e68..cbaeb8018dbf 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef DRBD_STATE_H #define DRBD_STATE_H diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index d5b0479bc9a6..9d78d8e3912e 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef DRBD_STATE_CHANGE_H #define DRBD_STATE_CHANGE_H diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index fc01307607ea..0a06f744b096 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd.h diff --git a/drivers/block/drbd/drbd_strings.h b/drivers/block/drbd/drbd_strings.h index 87b94a27358a..0201f6590f6a 100644 --- a/drivers/block/drbd/drbd_strings.h +++ b/drivers/block/drbd/drbd_strings.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __DRBD_STRINGS_H #define __DRBD_STRINGS_H diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h index 01e3babc5277..1ee81e3c2152 100644 --- a/drivers/block/drbd/drbd_vli.h +++ b/drivers/block/drbd/drbd_vli.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- linux-c -*- drbd_receiver.c diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c69beefc9d5c..3df033bfccf8 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_worker.c diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 078da18bb86d..8541d5688f3a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1215,7 +1215,7 @@ static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev, struct dm_keyslot_evict_args *args = data; int err; - err = blk_crypto_evict_key(bdev_get_queue(dev->bdev), args->key); + err = blk_crypto_evict_key(dev->bdev, args->key); if (!args->err) args->err = err; /* Always try to evict the key from all devices. */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 95a1ee3d314e..e1ea3a7bd9d9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -732,28 +732,48 @@ static char *_dm_claim_ptr = "I belong to device-mapper"; /* * Open a table device so we can use it as a map destination. */ -static int open_table_device(struct table_device *td, dev_t dev, - struct mapped_device *md) +static struct table_device *open_table_device(struct mapped_device *md, + dev_t dev, fmode_t mode) { + struct table_device *td; struct block_device *bdev; u64 part_off; int r; - BUG_ON(td->dm_dev.bdev); + td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); + if (!td) + return ERR_PTR(-ENOMEM); + refcount_set(&td->count, 1); - bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); + bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr); + if (IS_ERR(bdev)) { + r = PTR_ERR(bdev); + goto out_free_td; + } - r = bd_link_disk_holder(bdev, dm_disk(md)); - if (r) { - blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL); - return r; + /* + * We can be called before the dm disk is added. In that case we can't + * register the holder relation here. It will be done once add_disk was + * called. + */ + if (md->disk->slave_dir) { + r = bd_link_disk_holder(bdev, md->disk); + if (r) + goto out_blkdev_put; } + td->dm_dev.mode = mode; td->dm_dev.bdev = bdev; td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, NULL, NULL); - return 0; + format_dev_t(td->dm_dev.name, dev); + list_add(&td->list, &md->table_devices); + return td; + +out_blkdev_put: + blkdev_put(bdev, mode | FMODE_EXCL); +out_free_td: + kfree(td); + return ERR_PTR(r); } /* @@ -761,14 +781,12 @@ static int open_table_device(struct table_device *td, dev_t dev, */ static void close_table_device(struct table_device *td, struct mapped_device *md) { - if (!td->dm_dev.bdev) - return; - - bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); + if (md->disk->slave_dir) + bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); put_dax(td->dm_dev.dax_dev); - td->dm_dev.bdev = NULL; - td->dm_dev.dax_dev = NULL; + list_del(&td->list); + kfree(td); } static struct table_device *find_table_device(struct list_head *l, dev_t dev, @@ -786,31 +804,16 @@ static struct table_device *find_table_device(struct list_head *l, dev_t dev, int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, struct dm_dev **result) { - int r; struct table_device *td; mutex_lock(&md->table_devices_lock); td = find_table_device(&md->table_devices, dev, mode); if (!td) { - td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); - if (!td) { + td = open_table_device(md, dev, mode); + if (IS_ERR(td)) { mutex_unlock(&md->table_devices_lock); - return -ENOMEM; + return PTR_ERR(td); } - - td->dm_dev.mode = mode; - td->dm_dev.bdev = NULL; - - if ((r = open_table_device(td, dev, md))) { - mutex_unlock(&md->table_devices_lock); - kfree(td); - return r; - } - - format_dev_t(td->dm_dev.name, dev); - - refcount_set(&td->count, 1); - list_add(&td->list, &md->table_devices); } else { refcount_inc(&td->count); } @@ -825,27 +828,11 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) struct table_device *td = container_of(d, struct table_device, dm_dev); mutex_lock(&md->table_devices_lock); - if (refcount_dec_and_test(&td->count)) { + if (refcount_dec_and_test(&td->count)) close_table_device(td, md); - list_del(&td->list); - kfree(td); - } mutex_unlock(&md->table_devices_lock); } -static void free_table_devices(struct list_head *devices) -{ - struct list_head *tmp, *next; - - list_for_each_safe(tmp, next, devices) { - struct table_device *td = list_entry(tmp, struct table_device, list); - - DMWARN("dm_destroy: %s still exists with %d references", - td->dm_dev.name, refcount_read(&td->count)); - kfree(td); - } -} - /* * Get the geometry associated with a dm device */ @@ -1972,8 +1959,21 @@ static void cleanup_mapped_device(struct mapped_device *md) md->disk->private_data = NULL; spin_unlock(&_minor_lock); if (dm_get_md_type(md) != DM_TYPE_NONE) { + struct table_device *td; + dm_sysfs_exit(md); + list_for_each_entry(td, &md->table_devices, list) { + bd_unlink_disk_holder(td->dm_dev.bdev, + md->disk); + } + + /* + * Hold lock to make sure del_gendisk() won't concurrent + * with open/close_table_device(). + */ + mutex_lock(&md->table_devices_lock); del_gendisk(md->disk); + mutex_unlock(&md->table_devices_lock); } dm_queue_destroy_crypto_profile(md->queue); put_disk(md->disk); @@ -2122,7 +2122,7 @@ static void free_dev(struct mapped_device *md) cleanup_mapped_device(md); - free_table_devices(&md->table_devices); + WARN_ON_ONCE(!list_empty(&md->table_devices)); dm_stats_cleanup(&md->stats); free_minor(minor); @@ -2305,6 +2305,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) { enum dm_queue_mode type = dm_table_get_type(t); struct queue_limits limits; + struct table_device *td; int r; switch (type) { @@ -2333,17 +2334,40 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) if (r) return r; + /* + * Hold lock to make sure add_disk() and del_gendisk() won't concurrent + * with open_table_device() and close_table_device(). + */ + mutex_lock(&md->table_devices_lock); r = add_disk(md->disk); + mutex_unlock(&md->table_devices_lock); if (r) return r; - r = dm_sysfs_init(md); - if (r) { - del_gendisk(md->disk); - return r; + /* + * Register the holder relationship for devices added before the disk + * was live. + */ + list_for_each_entry(td, &md->table_devices, list) { + r = bd_link_disk_holder(td->dm_dev.bdev, md->disk); + if (r) + goto out_undo_holders; } + + r = dm_sysfs_init(md); + if (r) + goto out_undo_holders; + md->type = type; return 0; + +out_undo_holders: + list_for_each_entry_continue_reverse(td, &md->table_devices, list) + bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); + mutex_lock(&md->table_devices_lock); + del_gendisk(md->disk); + mutex_unlock(&md->table_devices_lock); + return r; } struct mapped_device *dm_get_md(dev_t dev) diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index cea8b14007e6..8bfb3ce86476 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -12,7 +12,7 @@ * provides the key and IV to use. */ -#include <linux/blk-crypto-profile.h> +#include <linux/blk-crypto.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/sched/mm.h> @@ -77,10 +77,8 @@ static void fscrypt_log_blk_crypto_impl(struct fscrypt_mode *mode, unsigned int i; for (i = 0; i < num_devs; i++) { - struct request_queue *q = bdev_get_queue(devs[i]); - if (!IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) || - __blk_crypto_cfg_supported(q->crypto_profile, cfg)) { + blk_crypto_config_supported_natively(devs[i], cfg)) { if (!xchg(&mode->logged_blk_crypto_native, 1)) pr_info("fscrypt: %s using blk-crypto (native)\n", mode->friendly_name); @@ -139,8 +137,7 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci) return PTR_ERR(devs); for (i = 0; i < num_devs; i++) { - if (!blk_crypto_config_supported(bdev_get_queue(devs[i]), - &crypto_cfg)) + if (!blk_crypto_config_supported(devs[i], &crypto_cfg)) goto out_free_devs; } @@ -184,8 +181,7 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, goto fail; } for (i = 0; i < num_devs; i++) { - err = blk_crypto_start_using_key(blk_key, - bdev_get_queue(devs[i])); + err = blk_crypto_start_using_key(devs[i], blk_key); if (err) break; } @@ -224,7 +220,7 @@ void fscrypt_destroy_inline_crypt_key(struct super_block *sb, devs = fscrypt_get_devices(sb, &num_devs); if (!IS_ERR(devs)) { for (i = 0; i < num_devs; i++) - blk_crypto_evict_key(bdev_get_queue(devs[i]), blk_key); + blk_crypto_evict_key(devs[i], blk_key); kfree(devs); } kfree_sensitive(blk_key); diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h index bbab65bd5428..e6802b69cdd6 100644 --- a/include/linux/blk-crypto-profile.h +++ b/include/linux/blk-crypto-profile.h @@ -138,18 +138,6 @@ int devm_blk_crypto_profile_init(struct device *dev, unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot); -blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile, - const struct blk_crypto_key *key, - struct blk_crypto_keyslot **slot_ptr); - -void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot); - -bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile, - const struct blk_crypto_config *cfg); - -int __blk_crypto_evict_key(struct blk_crypto_profile *profile, - const struct blk_crypto_key *key); - void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile); void blk_crypto_profile_destroy(struct blk_crypto_profile *profile); diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 69b24fe92cbf..a33d32f5c268 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -71,9 +71,6 @@ struct bio_crypt_ctx { #include <linux/blk_types.h> #include <linux/blkdev.h> -struct request; -struct request_queue; - #ifdef CONFIG_BLK_INLINE_ENCRYPTION static inline bool bio_has_crypt_ctx(struct bio *bio) @@ -94,13 +91,15 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, unsigned int dun_bytes, unsigned int data_unit_size); -int blk_crypto_start_using_key(const struct blk_crypto_key *key, - struct request_queue *q); +int blk_crypto_start_using_key(struct block_device *bdev, + const struct blk_crypto_key *key); -int blk_crypto_evict_key(struct request_queue *q, +int blk_crypto_evict_key(struct block_device *bdev, const struct blk_crypto_key *key); -bool blk_crypto_config_supported(struct request_queue *q, +bool blk_crypto_config_supported_natively(struct block_device *bdev, + const struct blk_crypto_config *cfg); +bool blk_crypto_config_supported(struct block_device *bdev, const struct blk_crypto_config *cfg); #else /* CONFIG_BLK_INLINE_ENCRYPTION */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9188aa3f6259..516e45246868 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -833,7 +833,6 @@ void set_capacity(struct gendisk *disk, sector_t size); #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); -int bd_register_pending_holders(struct gendisk *disk); #else static inline int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) @@ -844,10 +843,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { } -static inline int bd_register_pending_holders(struct gendisk *disk) -{ - return 0; -} #endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ dev_t part_devt(struct gendisk *disk, u8 partno); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 528bd44b59e2..6c4e66b3fa84 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -766,6 +766,7 @@ void cgroup_rstat_flush(struct cgroup *cgrp); void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp); void cgroup_rstat_flush_hold(struct cgroup *cgrp); void cgroup_rstat_flush_release(void); +void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu); /* * Basic resource stats. diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 07add7882a5d..c9afcdd9324c 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -199,7 +199,6 @@ struct lru_cache { unsigned long flags; - void *lc_private; const char *name; /* nr_elements there */ @@ -241,7 +240,6 @@ extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, unsigned e_count, size_t e_size, size_t e_off); extern void lc_reset(struct lru_cache *lc); extern void lc_destroy(struct lru_cache *lc); -extern void lc_set(struct lru_cache *lc, unsigned int enr, int index); extern void lc_del(struct lru_cache *lc, struct lc_element *element); extern struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr); @@ -297,6 +295,5 @@ extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); container_of(ptr, type, member) extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i); -extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e); #endif diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 0c964ac107c2..4aae6c06c5f2 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -30,6 +30,11 @@ static inline bool mempool_initialized(mempool_t *pool) return pool->elements != NULL; } +static inline bool mempool_is_saturated(mempool_t *pool) +{ + return READ_ONCE(pool->curr_nr) >= pool->min_nr; +} + void mempool_exit(mempool_t *pool); int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, diff --git a/include/linux/wait.h b/include/linux/wait.h index 7f5a51aae0a7..a0307b516b09 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -209,7 +209,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq list_del(&wq_entry->entry); } -void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); +int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, unsigned int mode, void *key, wait_queue_entry_t *bookmark); diff --git a/io_uring/rw.c b/io_uring/rw.c index bb47cc4da713..5c91cc80b348 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -665,6 +665,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) ret = kiocb_set_rw_flags(kiocb, rw->flags); if (unlikely(ret)) return ret; + kiocb->ki_flags |= IOCB_ALLOC_CACHE; /* * If the file is marked O_NONBLOCK, still allow retry for it if it @@ -680,7 +681,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) return -EOPNOTSUPP; kiocb->private = NULL; - kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; + kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; req->iopoll_completed = 0; } else { diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 793ecff29038..910e633869b0 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -281,6 +281,26 @@ void cgroup_rstat_flush_release(void) spin_unlock_irq(&cgroup_rstat_lock); } +/** + * cgroup_rstat_css_cpu_flush - flush stats for the given css and cpu + * @css: target css to be flush + * @cpu: the cpu that holds the stats to be flush + * + * A lightweight rstat flush operation for a given css and cpu. + * Only the cpu_lock is being held for mutual exclusion, the cgroup_rstat_lock + * isn't used. + */ +void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu) +{ + raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); + + raw_spin_lock_irq(cpu_lock); + rcu_read_lock(); + css->ss->css_rstat_flush(css, cpu); + rcu_read_unlock(); + raw_spin_unlock_irq(cpu_lock); +} + int cgroup_rstat_init(struct cgroup *cgrp) { int cpu; diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 9860bb9a847c..133b74730738 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -121,11 +121,12 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, return nr_exclusive; } -static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, +static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, int wake_flags, void *key) { unsigned long flags; wait_queue_entry_t bookmark; + int remaining = nr_exclusive; bookmark.flags = 0; bookmark.private = NULL; @@ -134,10 +135,12 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int do { spin_lock_irqsave(&wq_head->lock, flags); - nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, + remaining = __wake_up_common(wq_head, mode, remaining, wake_flags, key, &bookmark); spin_unlock_irqrestore(&wq_head->lock, flags); } while (bookmark.flags & WQ_FLAG_BOOKMARK); + + return nr_exclusive - remaining; } /** @@ -147,13 +150,14 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int * @nr_exclusive: how many wake-one or wake-many threads to wake up * @key: is directly passed to the wakeup function * - * If this function wakes up a task, it executes a full memory barrier before - * accessing the task state. + * If this function wakes up a task, it executes a full memory barrier + * before accessing the task state. Returns the number of exclusive + * tasks that were awaken. */ -void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, - int nr_exclusive, void *key) +int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, + int nr_exclusive, void *key) { - __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key); + return __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key); } EXPORT_SYMBOL(__wake_up); diff --git a/lib/lru_cache.c b/lib/lru_cache.c index dc35464216d3..b3d9187611de 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -60,17 +60,6 @@ int lc_try_lock(struct lru_cache *lc) } while (unlikely (val == LC_PARANOIA)); /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ return 0 == val; -#if 0 - /* Alternative approach, spin in case someone enters or leaves a - * PARANOIA_ENTRY()/RETURN() section. */ - unsigned long old, new, val; - do { - old = lc->flags & LC_PARANOIA; - new = old | LC_LOCKED; - val = cmpxchg(&lc->flags, old, new); - } while (unlikely (val == (old ^ LC_PARANOIA))); - return old == val; -#endif } /** @@ -364,7 +353,7 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsig struct lc_element *e; PARANOIA_ENTRY(); - if (lc->flags & LC_STARVING) { + if (test_bit(__LC_STARVING, &lc->flags)) { ++lc->starving; RETURN(NULL); } @@ -417,7 +406,7 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsig * the LRU element, we have to wait ... */ if (!lc_unused_element_available(lc)) { - __set_bit(__LC_STARVING, &lc->flags); + set_bit(__LC_STARVING, &lc->flags); RETURN(NULL); } @@ -586,48 +575,6 @@ struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) } /** - * lc_index_of - * @lc: the lru cache to operate on - * @e: the element to query for its index position in lc->element - */ -unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) -{ - PARANOIA_LC_ELEMENT(lc, e); - return e->lc_index; -} - -/** - * lc_set - associate index with label - * @lc: the lru cache to operate on - * @enr: the label to set - * @index: the element index to associate label with. - * - * Used to initialize the active set to some previously recorded state. - */ -void lc_set(struct lru_cache *lc, unsigned int enr, int index) -{ - struct lc_element *e; - struct list_head *lh; - - if (index < 0 || index >= lc->nr_elements) - return; - - e = lc_element_by_index(lc, index); - BUG_ON(e->lc_number != e->lc_new_number); - BUG_ON(e->refcnt != 0); - - e->lc_number = e->lc_new_number = enr; - hlist_del_init(&e->colision); - if (enr == LC_FREE) - lh = &lc->free; - else { - hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); - lh = &lc->lru; - } - list_move(&e->list, lh); -} - -/** * lc_seq_dump_details - Dump a complete LRU cache to seq in textual form. * @lc: the lru cache to operate on * @seq: the &struct seq_file pointer to seq_printf into @@ -661,7 +608,6 @@ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext EXPORT_SYMBOL(lc_create); EXPORT_SYMBOL(lc_reset); EXPORT_SYMBOL(lc_destroy); -EXPORT_SYMBOL(lc_set); EXPORT_SYMBOL(lc_del); EXPORT_SYMBOL(lc_try_get); EXPORT_SYMBOL(lc_find); @@ -669,7 +615,6 @@ EXPORT_SYMBOL(lc_get); EXPORT_SYMBOL(lc_put); EXPORT_SYMBOL(lc_committed); EXPORT_SYMBOL(lc_element_by_index); -EXPORT_SYMBOL(lc_index_of); EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); EXPORT_SYMBOL(lc_try_lock); diff --git a/lib/sbitmap.c b/lib/sbitmap.c index eca462cba398..586deb333237 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -560,33 +560,41 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, } EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); -static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) +static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { int i, wake_index; if (!atomic_read(&sbq->ws_active)) - return NULL; + return; wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; - if (waitqueue_active(&ws->wait)) { - if (wake_index != atomic_read(&sbq->wake_index)) - atomic_set(&sbq->wake_index, wake_index); - return ws; - } - + /* + * Advance the index before checking the current queue. + * It improves fairness, by ensuring the queue doesn't + * need to be fully emptied before trying to wake up + * from the next one. + */ wake_index = sbq_index_inc(wake_index); + + /* + * It is sufficient to wake up at least one waiter to + * guarantee forward progress. + */ + if (waitqueue_active(&ws->wait) && + wake_up_nr(&ws->wait, nr)) + break; } - return NULL; + if (wake_index != atomic_read(&sbq->wake_index)) + atomic_set(&sbq->wake_index, wake_index); } void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { unsigned int wake_batch = READ_ONCE(sbq->wake_batch); - struct sbq_wait_state *ws = NULL; unsigned int wakeups; if (!atomic_read(&sbq->ws_active)) @@ -598,16 +606,10 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) do { if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch) return; - - if (!ws) { - ws = sbq_wake_ptr(sbq); - if (!ws) - return; - } } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt, &wakeups, wakeups + wake_batch)); - wake_up_nr(&ws->wait, wake_batch); + __sbitmap_queue_wake_up(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); |