aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/block/inline-encryption.rst12
-rw-r--r--block/bio.c98
-rw-r--r--block/blk-cgroup.c103
-rw-r--r--block/blk-cgroup.h10
-rw-r--r--block/blk-crypto-internal.h12
-rw-r--r--block/blk-crypto-profile.c1
-rw-r--r--block/blk-crypto.c37
-rw-r--r--block/blk-mq-sysfs.c11
-rw-r--r--block/blk-mq.c6
-rw-r--r--block/elevator.c22
-rw-r--r--block/fops.c7
-rw-r--r--block/genhd.c6
-rw-r--r--block/holder.c103
-rw-r--r--block/mq-deadline.c83
-rw-r--r--drivers/block/drbd/Kconfig2
-rw-r--r--drivers/block/drbd/Makefile2
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_debugfs.c2
-rw-r--r--drivers/block/drbd/drbd_debugfs.h2
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_interval.c2
-rw-r--r--drivers/block/drbd/drbd_interval.h2
-rw-r--r--drivers/block/drbd/drbd_main.c11
-rw-r--r--drivers/block/drbd/drbd_nl.c2
-rw-r--r--drivers/block/drbd/drbd_nla.c2
-rw-r--r--drivers/block/drbd/drbd_nla.h2
-rw-r--r--drivers/block/drbd/drbd_proc.c2
-rw-r--r--drivers/block/drbd/drbd_protocol.h2
-rw-r--r--drivers/block/drbd/drbd_receiver.c2
-rw-r--r--drivers/block/drbd/drbd_req.c2
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_state.c2
-rw-r--r--drivers/block/drbd/drbd_state.h2
-rw-r--r--drivers/block/drbd/drbd_state_change.h2
-rw-r--r--drivers/block/drbd/drbd_strings.c2
-rw-r--r--drivers/block/drbd/drbd_strings.h2
-rw-r--r--drivers/block/drbd/drbd_vli.h2
-rw-r--r--drivers/block/drbd/drbd_worker.c2
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm.c138
-rw-r--r--fs/crypto/inline_crypt.c14
-rw-r--r--include/linux/blk-crypto-profile.h12
-rw-r--r--include/linux/blk-crypto.h13
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/lru_cache.h3
-rw-r--r--include/linux/mempool.h5
-rw-r--r--include/linux/wait.h2
-rw-r--r--io_uring/rw.c3
-rw-r--r--kernel/cgroup/rstat.c20
-rw-r--r--kernel/sched/wait.c18
-rw-r--r--lib/lru_cache.c59
-rw-r--r--lib/sbitmap.c36
54 files changed, 544 insertions, 357 deletions
diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst
index 4d151fbe2058..f9bf18ea6509 100644
--- a/Documentation/block/inline-encryption.rst
+++ b/Documentation/block/inline-encryption.rst
@@ -142,7 +142,7 @@ Therefore, we also introduce *blk-crypto-fallback*, which is an implementation
of inline encryption using the kernel crypto API. blk-crypto-fallback is built
into the block layer, so it works on any block device without any special setup.
Essentially, when a bio with an encryption context is submitted to a
-request_queue that doesn't support that encryption context, the block layer will
+block_device that doesn't support that encryption context, the block layer will
handle en/decryption of the bio using blk-crypto-fallback.
For encryption, the data cannot be encrypted in-place, as callers usually rely
@@ -187,7 +187,7 @@ API presented to users of the block layer
``blk_crypto_config_supported()`` allows users to check ahead of time whether
inline encryption with particular crypto settings will work on a particular
-request_queue -- either via hardware or via blk-crypto-fallback. This function
+block_device -- either via hardware or via blk-crypto-fallback. This function
takes in a ``struct blk_crypto_config`` which is like blk_crypto_key, but omits
the actual bytes of the key and instead just contains the algorithm, data unit
size, etc. This function can be useful if blk-crypto-fallback is disabled.
@@ -195,7 +195,7 @@ size, etc. This function can be useful if blk-crypto-fallback is disabled.
``blk_crypto_init_key()`` allows users to initialize a blk_crypto_key.
Users must call ``blk_crypto_start_using_key()`` before actually starting to use
-a blk_crypto_key on a request_queue (even if ``blk_crypto_config_supported()``
+a blk_crypto_key on a block_device (even if ``blk_crypto_config_supported()``
was called earlier). This is needed to initialize blk-crypto-fallback if it
will be needed. This must not be called from the data path, as this may have to
allocate resources, which may deadlock in that case.
@@ -207,7 +207,7 @@ for en/decryption. Users don't need to worry about freeing the bio_crypt_ctx
later, as that happens automatically when the bio is freed or reset.
Finally, when done using inline encryption with a blk_crypto_key on a
-request_queue, users must call ``blk_crypto_evict_key()``. This ensures that
+block_device, users must call ``blk_crypto_evict_key()``. This ensures that
the key is evicted from all keyslots it may be programmed into and unlinked from
any kernel data structures it may be linked into.
@@ -221,9 +221,9 @@ as follows:
5. ``blk_crypto_evict_key()`` (after all I/O has completed)
6. Zeroize the blk_crypto_key (this has no dedicated function)
-If a blk_crypto_key is being used on multiple request_queues, then
+If a blk_crypto_key is being used on multiple block_devices, then
``blk_crypto_config_supported()`` (if used), ``blk_crypto_start_using_key()``,
-and ``blk_crypto_evict_key()`` must be called on each request_queue.
+and ``blk_crypto_evict_key()`` must be called on each block_device.
API presented to device drivers
===============================
diff --git a/block/bio.c b/block/bio.c
index aa1de6a367f9..ab59a491a883 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -25,9 +25,15 @@
#include "blk-rq-qos.h"
#include "blk-cgroup.h"
+#define ALLOC_CACHE_THRESHOLD 16
+#define ALLOC_CACHE_SLACK 64
+#define ALLOC_CACHE_MAX 256
+
struct bio_alloc_cache {
struct bio *free_list;
+ struct bio *free_list_irq;
unsigned int nr;
+ unsigned int nr_irq;
};
static struct biovec_slab {
@@ -408,6 +414,22 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
queue_work(bs->rescue_workqueue, &bs->rescue_work);
}
+static void bio_alloc_irq_cache_splice(struct bio_alloc_cache *cache)
+{
+ unsigned long flags;
+
+ /* cache->free_list must be empty */
+ if (WARN_ON_ONCE(cache->free_list))
+ return;
+
+ local_irq_save(flags);
+ cache->free_list = cache->free_list_irq;
+ cache->free_list_irq = NULL;
+ cache->nr += cache->nr_irq;
+ cache->nr_irq = 0;
+ local_irq_restore(flags);
+}
+
static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp,
struct bio_set *bs)
@@ -417,8 +439,12 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
cache = per_cpu_ptr(bs->cache, get_cpu());
if (!cache->free_list) {
- put_cpu();
- return NULL;
+ if (READ_ONCE(cache->nr_irq) >= ALLOC_CACHE_THRESHOLD)
+ bio_alloc_irq_cache_splice(cache);
+ if (!cache->free_list) {
+ put_cpu();
+ return NULL;
+ }
}
bio = cache->free_list;
cache->free_list = bio->bi_next;
@@ -462,9 +488,6 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
* for per bio allocations.
*
- * If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process
- * context, not hard/soft IRQ.
- *
* Returns: Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
@@ -526,6 +549,8 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
}
if (unlikely(!p))
return NULL;
+ if (!mempool_is_saturated(&bs->bio_pool))
+ opf &= ~REQ_ALLOC_CACHE;
bio = p + bs->front_pad;
if (nr_vecs > BIO_INLINE_VECS) {
@@ -676,11 +701,8 @@ void guard_bio_eod(struct bio *bio)
bio_truncate(bio, maxsector << 9);
}
-#define ALLOC_CACHE_MAX 512
-#define ALLOC_CACHE_SLACK 64
-
-static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
- unsigned int nr)
+static int __bio_alloc_cache_prune(struct bio_alloc_cache *cache,
+ unsigned int nr)
{
unsigned int i = 0;
struct bio *bio;
@@ -692,6 +714,17 @@ static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
if (++i == nr)
break;
}
+ return i;
+}
+
+static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
+ unsigned int nr)
+{
+ nr -= __bio_alloc_cache_prune(cache, nr);
+ if (!READ_ONCE(cache->free_list)) {
+ bio_alloc_irq_cache_splice(cache);
+ __bio_alloc_cache_prune(cache, nr);
+ }
}
static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node)
@@ -725,6 +758,35 @@ static void bio_alloc_cache_destroy(struct bio_set *bs)
bs->cache = NULL;
}
+static inline void bio_put_percpu_cache(struct bio *bio)
+{
+ struct bio_alloc_cache *cache;
+
+ cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
+ if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX) {
+ put_cpu();
+ bio_free(bio);
+ return;
+ }
+
+ bio_uninit(bio);
+
+ if ((bio->bi_opf & REQ_POLLED) && !WARN_ON_ONCE(in_interrupt())) {
+ bio->bi_next = cache->free_list;
+ cache->free_list = bio;
+ cache->nr++;
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ bio->bi_next = cache->free_list_irq;
+ cache->free_list_irq = bio;
+ cache->nr_irq++;
+ local_irq_restore(flags);
+ }
+ put_cpu();
+}
+
/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
@@ -740,20 +802,10 @@ void bio_put(struct bio *bio)
if (!atomic_dec_and_test(&bio->__bi_cnt))
return;
}
-
- if ((bio->bi_opf & REQ_ALLOC_CACHE) && !WARN_ON_ONCE(in_interrupt())) {
- struct bio_alloc_cache *cache;
-
- bio_uninit(bio);
- cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
- bio->bi_next = cache->free_list;
- cache->free_list = bio;
- if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK)
- bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK);
- put_cpu();
- } else {
+ if (bio->bi_opf & REQ_ALLOC_CACHE)
+ bio_put_percpu_cache(bio);
+ else
bio_free(bio);
- }
}
EXPORT_SYMBOL(bio_put);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 6a5c849ee061..57941d2a8ba3 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -59,6 +59,37 @@ static struct workqueue_struct *blkcg_punt_bio_wq;
#define BLKG_DESTROY_BATCH_SIZE 64
+/*
+ * Lockless lists for tracking IO stats update
+ *
+ * New IO stats are stored in the percpu iostat_cpu within blkcg_gq (blkg).
+ * There are multiple blkg's (one for each block device) attached to each
+ * blkcg. The rstat code keeps track of which cpu has IO stats updated,
+ * but it doesn't know which blkg has the updated stats. If there are many
+ * block devices in a system, the cost of iterating all the blkg's to flush
+ * out the IO stats can be high. To reduce such overhead, a set of percpu
+ * lockless lists (lhead) per blkcg are used to track the set of recently
+ * updated iostat_cpu's since the last flush. An iostat_cpu will be put
+ * onto the lockless list on the update side [blk_cgroup_bio_start()] if
+ * not there yet and then removed when being flushed [blkcg_rstat_flush()].
+ * References to blkg are gotten and then put back in the process to
+ * protect against blkg removal.
+ *
+ * Return: 0 if successful or -ENOMEM if allocation fails.
+ */
+static int init_blkcg_llists(struct blkcg *blkcg)
+{
+ int cpu;
+
+ blkcg->lhead = alloc_percpu_gfp(struct llist_head, GFP_KERNEL);
+ if (!blkcg->lhead)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu)
+ init_llist_head(per_cpu_ptr(blkcg->lhead, cpu));
+ return 0;
+}
+
/**
* blkcg_css - find the current css
*
@@ -236,8 +267,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
blkg->blkcg = blkcg;
u64_stats_init(&blkg->iostat.sync);
- for_each_possible_cpu(cpu)
+ for_each_possible_cpu(cpu) {
u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
+ per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg;
+ }
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -827,7 +860,9 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
{
struct blkcg *blkcg = css_to_blkcg(css);
- struct blkcg_gq *blkg;
+ struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
+ struct llist_node *lnode;
+ struct blkg_iostat_set *bisc, *next_bisc;
/* Root-level stats are sourced from system-wide IO stats */
if (!cgroup_parent(css->cgroup))
@@ -835,12 +870,21 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
rcu_read_lock();
- hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+ lnode = llist_del_all(lhead);
+ if (!lnode)
+ goto out;
+
+ /*
+ * Iterate only the iostat_cpu's queued in the lockless list.
+ */
+ llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) {
+ struct blkcg_gq *blkg = bisc->blkg;
struct blkcg_gq *parent = blkg->parent;
- struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
struct blkg_iostat cur;
unsigned int seq;
+ WRITE_ONCE(bisc->lqueued, false);
+
/* fetch the current per-cpu values */
do {
seq = u64_stats_fetch_begin(&bisc->sync);
@@ -853,8 +897,10 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (parent && parent->parent)
blkcg_iostat_update(parent, &blkg->iostat.cur,
&blkg->iostat.last);
+ percpu_ref_put(&blkg->refcnt);
}
+out:
rcu_read_unlock();
}
@@ -1038,10 +1084,12 @@ struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css)
*/
static void blkcg_destroy_blkgs(struct blkcg *blkcg)
{
+ int cpu;
+
might_sleep();
+ css_get(&blkcg->css);
spin_lock_irq(&blkcg->lock);
-
while (!hlist_empty(&blkcg->blkg_list)) {
struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
struct blkcg_gq, blkcg_node);
@@ -1064,6 +1112,17 @@ static void blkcg_destroy_blkgs(struct blkcg *blkcg)
}
spin_unlock_irq(&blkcg->lock);
+
+ /*
+ * Flush all the non-empty percpu lockless lists.
+ */
+ for_each_possible_cpu(cpu) {
+ struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
+
+ if (!llist_empty(lhead))
+ cgroup_rstat_css_cpu_flush(&blkcg->css, cpu);
+ }
+ css_put(&blkcg->css);
}
/**
@@ -1132,6 +1191,7 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
mutex_unlock(&blkcg_pol_mutex);
+ free_percpu(blkcg->lhead);
kfree(blkcg);
}
@@ -1139,7 +1199,6 @@ static struct cgroup_subsys_state *
blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct blkcg *blkcg;
- struct cgroup_subsys_state *ret;
int i;
mutex_lock(&blkcg_pol_mutex);
@@ -1148,12 +1207,13 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
blkcg = &blkcg_root;
} else {
blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
- if (!blkcg) {
- ret = ERR_PTR(-ENOMEM);
+ if (!blkcg)
goto unlock;
- }
}
+ if (init_blkcg_llists(blkcg))
+ goto free_blkcg;
+
for (i = 0; i < BLKCG_MAX_POLS ; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
struct blkcg_policy_data *cpd;
@@ -1168,10 +1228,9 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
continue;
cpd = pol->cpd_alloc_fn(GFP_KERNEL);
- if (!cpd) {
- ret = ERR_PTR(-ENOMEM);
+ if (!cpd)
goto free_pd_blkcg;
- }
+
blkcg->cpd[i] = cpd;
cpd->blkcg = blkcg;
cpd->plid = i;
@@ -1195,12 +1254,13 @@ free_pd_blkcg:
for (i--; i >= 0; i--)
if (blkcg->cpd[i])
blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
-
+ free_percpu(blkcg->lhead);
+free_blkcg:
if (blkcg != &blkcg_root)
kfree(blkcg);
unlock:
mutex_unlock(&blkcg_pol_mutex);
- return ret;
+ return ERR_PTR(-ENOMEM);
}
static int blkcg_css_online(struct cgroup_subsys_state *css)
@@ -1943,6 +2003,7 @@ static int blk_cgroup_io_type(struct bio *bio)
void blk_cgroup_bio_start(struct bio *bio)
{
+ struct blkcg *blkcg = bio->bi_blkg->blkcg;
int rwd = blk_cgroup_io_type(bio), cpu;
struct blkg_iostat_set *bis;
unsigned long flags;
@@ -1961,9 +2022,21 @@ void blk_cgroup_bio_start(struct bio *bio)
}
bis->cur.ios[rwd]++;
+ /*
+ * If the iostat_cpu isn't in a lockless list, put it into the
+ * list to indicate that a stat update is pending.
+ */
+ if (!READ_ONCE(bis->lqueued)) {
+ struct llist_head *lhead = this_cpu_ptr(blkcg->lhead);
+
+ llist_add(&bis->lnode, lhead);
+ WRITE_ONCE(bis->lqueued, true);
+ percpu_ref_get(&bis->blkg->refcnt);
+ }
+
u64_stats_update_end_irqrestore(&bis->sync, flags);
if (cgroup_subsys_on_dfl(io_cgrp_subsys))
- cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu);
+ cgroup_rstat_updated(blkcg->css.cgroup, cpu);
put_cpu();
}
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index aa2b286bc825..1e94e404eaa8 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -18,6 +18,7 @@
#include <linux/cgroup.h>
#include <linux/kthread.h>
#include <linux/blk-mq.h>
+#include <linux/llist.h>
struct blkcg_gq;
struct blkg_policy_data;
@@ -43,6 +44,9 @@ struct blkg_iostat {
struct blkg_iostat_set {
struct u64_stats_sync sync;
+ struct blkcg_gq *blkg;
+ struct llist_node lnode;
+ int lqueued; /* queued in llist */
struct blkg_iostat cur;
struct blkg_iostat last;
};
@@ -97,6 +101,12 @@ struct blkcg {
struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
struct list_head all_blkcgs_node;
+
+ /*
+ * List of updated percpu blkg_iostat_set's since the last flush.
+ */
+ struct llist_head __percpu *lhead;
+
#ifdef CONFIG_BLK_CGROUP_FC_APPID
char fc_app_id[FC_APPID_LEN];
#endif
diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h
index e6818ffaddbf..d31fa80454e4 100644
--- a/block/blk-crypto-internal.h
+++ b/block/blk-crypto-internal.h
@@ -65,6 +65,18 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
return rq->crypt_ctx;
}
+blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ struct blk_crypto_keyslot **slot_ptr);
+
+void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot);
+
+int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key);
+
+bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
+ const struct blk_crypto_config *cfg);
+
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
static inline int blk_crypto_sysfs_register(struct request_queue *q)
diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c
index 96c511967386..0307fb0d95d3 100644
--- a/block/blk-crypto-profile.c
+++ b/block/blk-crypto-profile.c
@@ -32,6 +32,7 @@
#include <linux/wait.h>
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
+#include "blk-crypto-internal.h"
struct blk_crypto_keyslot {
atomic_t slot_refs;
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index a496aaef85ba..6a461f4d676a 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -267,7 +267,6 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr)
{
struct bio *bio = *bio_ptr;
const struct blk_crypto_key *bc_key = bio->bi_crypt_context->bc_key;
- struct blk_crypto_profile *profile;
/* Error if bio has no data. */
if (WARN_ON_ONCE(!bio_has_data(bio))) {
@@ -284,10 +283,9 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr)
* Success if device supports the encryption context, or if we succeeded
* in falling back to the crypto API.
*/
- profile = bdev_get_queue(bio->bi_bdev)->crypto_profile;
- if (__blk_crypto_cfg_supported(profile, &bc_key->crypto_cfg))
+ if (blk_crypto_config_supported_natively(bio->bi_bdev,
+ &bc_key->crypto_cfg))
return true;
-
if (blk_crypto_fallback_bio_prep(bio_ptr))
return true;
fail:
@@ -352,22 +350,29 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
return 0;
}
+bool blk_crypto_config_supported_natively(struct block_device *bdev,
+ const struct blk_crypto_config *cfg)
+{
+ return __blk_crypto_cfg_supported(bdev_get_queue(bdev)->crypto_profile,
+ cfg);
+}
+
/*
* Check if bios with @cfg can be en/decrypted by blk-crypto (i.e. either the
- * request queue it's submitted to supports inline crypto, or the
+ * block_device it's submitted to supports inline crypto, or the
* blk-crypto-fallback is enabled and supports the cfg).
*/
-bool blk_crypto_config_supported(struct request_queue *q,
+bool blk_crypto_config_supported(struct block_device *bdev,
const struct blk_crypto_config *cfg)
{
return IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) ||
- __blk_crypto_cfg_supported(q->crypto_profile, cfg);
+ blk_crypto_config_supported_natively(bdev, cfg);
}
/**
* blk_crypto_start_using_key() - Start using a blk_crypto_key on a device
+ * @bdev: block device to operate on
* @key: A key to use on the device
- * @q: the request queue for the device
*
* Upper layers must call this function to ensure that either the hardware
* supports the key's crypto settings, or the crypto API fallback has transforms
@@ -379,10 +384,10 @@ bool blk_crypto_config_supported(struct request_queue *q,
* blk-crypto-fallback is either disabled or the needed algorithm
* is disabled in the crypto API; or another -errno code.
*/
-int blk_crypto_start_using_key(const struct blk_crypto_key *key,
- struct request_queue *q)
+int blk_crypto_start_using_key(struct block_device *bdev,
+ const struct blk_crypto_key *key)
{
- if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg))
+ if (blk_crypto_config_supported_natively(bdev, &key->crypto_cfg))
return 0;
return blk_crypto_fallback_start_using_mode(key->crypto_cfg.crypto_mode);
}
@@ -390,7 +395,7 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key,
/**
* blk_crypto_evict_key() - Evict a key from any inline encryption hardware
* it may have been programmed into
- * @q: The request queue who's associated inline encryption hardware this key
+ * @bdev: The block_device who's associated inline encryption hardware this key
* might have been programmed into
* @key: The key to evict
*
@@ -400,14 +405,16 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key,
*
* Return: 0 on success or if the key wasn't in any keyslot; -errno on error.
*/
-int blk_crypto_evict_key(struct request_queue *q,
+int blk_crypto_evict_key(struct block_device *bdev,
const struct blk_crypto_key *key)
{
- if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg))
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (blk_crypto_config_supported_natively(bdev, &key->crypto_cfg))
return __blk_crypto_evict_key(q->crypto_profile, key);
/*
- * If the request_queue didn't support the key, then blk-crypto-fallback
+ * If the block_device didn't support the key, then blk-crypto-fallback
* may have been used, so try to evict the key from blk-crypto-fallback.
*/
return blk_crypto_fallback_evict_key(key);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 93997d297d42..4515288fbe35 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -185,7 +185,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct blk_mq_ctx *ctx;
- int i, ret;
+ int i, j, ret;
if (!hctx->nr_ctx)
return 0;
@@ -197,9 +197,16 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
hctx_for_each_ctx(hctx, ctx, i) {
ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
if (ret)
- break;
+ goto out;
}
+ return 0;
+out:
+ hctx_for_each_ctx(hctx, ctx, j) {
+ if (j < i)
+ kobject_del(&ctx->kobj);
+ }
+ kobject_del(&hctx->kobj);
return ret;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ee16b4c34c6a..4e6b3ccd4989 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4381,7 +4381,7 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
struct blk_mq_tags **new_tags;
if (set->nr_hw_queues >= new_nr_hw_queues)
- return 0;
+ goto done;
new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
@@ -4393,8 +4393,8 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
sizeof(*set->tags));
kfree(set->tags);
set->tags = new_tags;
+done:
set->nr_hw_queues = new_nr_hw_queues;
-
return 0;
}
@@ -4630,9 +4630,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
INIT_LIST_HEAD(&qe->node);
qe->q = q;
+ qe->type = q->elevator->type;
/* keep a reference to the elevator module as we'll switch back */
__elevator_get(qe->type);
- qe->type = q->elevator->type;
list_add(&qe->node, head);
elevator_disable(q);
mutex_unlock(&q->sysfs_lock);
diff --git a/block/elevator.c b/block/elevator.c
index a5bdc3b1e7e5..599413620558 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -91,12 +91,11 @@ static inline bool elv_support_features(struct request_queue *q,
}
/**
- * elevator_match - Test an elevator name and features
+ * elevator_match - Check whether @e's name or alias matches @name
* @e: Scheduler to test
* @name: Elevator name to test
*
- * Return true if the elevator @e name matches @name and if @e provides all
- * the features specified by @required_features.
+ * Return true if the elevator @e's name or alias matches @name.
*/
static bool elevator_match(const struct elevator_type *e, const char *name)
{
@@ -650,10 +649,10 @@ void elevator_init_mq(struct request_queue *q)
}
/*
- * switch to new_e io scheduler. be careful not to introduce deadlocks -
- * we don't free the old io scheduler, before we have allocated what we
- * need for the new one. this way we have a chance of going back to the old
- * one, if the new one fails init for some reason.
+ * Switch to new_e io scheduler.
+ *
+ * If switching fails, we are most likely running out of memory and not able
+ * to restore the old io scheduler, so leaving the io scheduler being none.
*/
int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
{
@@ -683,6 +682,12 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
out_unfreeze:
blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
+
+ if (ret) {
+ pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n",
+ new_e->elevator_name);
+ }
+
return ret;
}
@@ -716,9 +721,6 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
if (!blk_queue_registered(q))
return -ENOENT;
- /*
- * Special case for mq, turn off scheduling
- */
if (!strncmp(elevator_name, "none", 4)) {
if (q->elevator)
elevator_disable(q);
diff --git a/block/fops.c b/block/fops.c
index b90742595317..50d245e8c913 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -405,12 +405,6 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
return ret;
}
-static int blkdev_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- return generic_writepages(mapping, wbc);
-}
-
const struct address_space_operations def_blk_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
@@ -419,7 +413,6 @@ const struct address_space_operations def_blk_aops = {
.writepage = blkdev_writepage,
.write_begin = blkdev_write_begin,
.write_end = blkdev_write_end,
- .writepages = blkdev_writepages,
.direct_IO = blkdev_direct_IO,
.migrate_folio = buffer_migrate_folio_norefs,
.is_dirty_writeback = buffer_check_dirty_writeback,
diff --git a/block/genhd.c b/block/genhd.c
index 09cde914e054..075d8da284f5 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -478,10 +478,6 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
goto out_put_holder_dir;
}
- ret = bd_register_pending_holders(disk);
- if (ret < 0)
- goto out_put_slave_dir;
-
ret = blk_register_queue(disk);
if (ret)
goto out_put_slave_dir;
@@ -528,6 +524,7 @@ out_unregister_queue:
blk_unregister_queue(disk);
out_put_slave_dir:
kobject_put(disk->slave_dir);
+ disk->slave_dir = NULL;
out_put_holder_dir:
kobject_put(disk->part0->bd_holder_dir);
out_del_integrity:
@@ -629,6 +626,7 @@ void del_gendisk(struct gendisk *disk)
kobject_put(disk->part0->bd_holder_dir);
kobject_put(disk->slave_dir);
+ disk->slave_dir = NULL;
part_stat_set_all(disk->part0, 0);
disk->part0->bd_stamp = 0;
diff --git a/block/holder.c b/block/holder.c
index 5283bc804cc1..37d18c13d958 100644
--- a/block/holder.c
+++ b/block/holder.c
@@ -4,7 +4,7 @@
struct bd_holder_disk {
struct list_head list;
- struct block_device *bdev;
+ struct kobject *holder_dir;
int refcnt;
};
@@ -14,7 +14,7 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
struct bd_holder_disk *holder;
list_for_each_entry(holder, &disk->slave_bdevs, list)
- if (holder->bdev == bdev)
+ if (holder->holder_dir == bdev->bd_holder_dir)
return holder;
return NULL;
}
@@ -29,19 +29,6 @@ static void del_symlink(struct kobject *from, struct kobject *to)
sysfs_remove_link(from, kobject_name(to));
}
-static int __link_disk_holder(struct block_device *bdev, struct gendisk *disk)
-{
- int ret;
-
- ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
- if (ret)
- return ret;
- ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
- if (ret)
- del_symlink(disk->slave_dir, bdev_kobj(bdev));
- return ret;
-}
-
/**
* bd_link_disk_holder - create symlinks between holding disk and slave bdev
* @bdev: the claimed slave bdev
@@ -75,12 +62,30 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
struct bd_holder_disk *holder;
int ret = 0;
- mutex_lock(&disk->open_mutex);
+ if (WARN_ON_ONCE(!disk->slave_dir))
+ return -EINVAL;
+
+ if (bdev->bd_disk == disk)
+ return -EINVAL;
+
+ /*
+ * del_gendisk drops the initial reference to bd_holder_dir, so we
+ * need to keep our own here to allow for cleanup past that point.
+ */
+ mutex_lock(&bdev->bd_disk->open_mutex);
+ if (!disk_live(bdev->bd_disk)) {
+ mutex_unlock(&bdev->bd_disk->open_mutex);
+ return -ENODEV;
+ }
+ kobject_get(bdev->bd_holder_dir);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_lock(&disk->open_mutex);
WARN_ON_ONCE(!bdev->bd_holder);
holder = bd_find_holder_disk(bdev, disk);
if (holder) {
+ kobject_put(bdev->bd_holder_dir);
holder->refcnt++;
goto out_unlock;
}
@@ -92,36 +97,32 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
}
INIT_LIST_HEAD(&holder->list);
- holder->bdev = bdev;
holder->refcnt = 1;
- if (disk->slave_dir) {
- ret = __link_disk_holder(bdev, disk);
- if (ret) {
- kfree(holder);
- goto out_unlock;
- }
- }
+ holder->holder_dir = bdev->bd_holder_dir;
+ ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
+ if (ret)
+ goto out_free_holder;
+ ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+ if (ret)
+ goto out_del_symlink;
list_add(&holder->list, &disk->slave_bdevs);
- /*
- * del_gendisk drops the initial reference to bd_holder_dir, so we need
- * to keep our own here to allow for cleanup past that point.
- */
- kobject_get(bdev->bd_holder_dir);
+ mutex_unlock(&disk->open_mutex);
+ return 0;
+
+out_del_symlink:
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
+out_free_holder:
+ kfree(holder);
out_unlock:
mutex_unlock(&disk->open_mutex);
+ if (ret)
+ kobject_put(bdev->bd_holder_dir);
return ret;
}
EXPORT_SYMBOL_GPL(bd_link_disk_holder);
-static void __unlink_disk_holder(struct block_device *bdev,
- struct gendisk *disk)
-{
- del_symlink(disk->slave_dir, bdev_kobj(bdev));
- del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
-}
-
/**
* bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
* @bdev: the calimed slave bdev
@@ -136,36 +137,18 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
struct bd_holder_disk *holder;
+ if (WARN_ON_ONCE(!disk->slave_dir))
+ return;
+
mutex_lock(&disk->open_mutex);
holder = bd_find_holder_disk(bdev, disk);
if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
- if (disk->slave_dir)
- __unlink_disk_holder(bdev, disk);
- kobject_put(bdev->bd_holder_dir);
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
+ del_symlink(holder->holder_dir, &disk_to_dev(disk)->kobj);
+ kobject_put(holder->holder_dir);
list_del_init(&holder->list);
kfree(holder);
}
mutex_unlock(&disk->open_mutex);
}
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
-
-int bd_register_pending_holders(struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
- int ret;
-
- mutex_lock(&disk->open_mutex);
- list_for_each_entry(holder, &disk->slave_bdevs, list) {
- ret = __link_disk_holder(holder->bdev, disk);
- if (ret)
- goto out_undo;
- }
- mutex_unlock(&disk->open_mutex);
- return 0;
-
-out_undo:
- list_for_each_entry_continue_reverse(holder, &disk->slave_bdevs, list)
- __unlink_disk_holder(holder->bdev, disk);
- mutex_unlock(&disk->open_mutex);
- return ret;
-}
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 5639921dfa92..f10c2a0d18d4 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -131,6 +131,20 @@ static u8 dd_rq_ioclass(struct request *rq)
}
/*
+ * get the request before `rq' in sector-sorted order
+ */
+static inline struct request *
+deadline_earlier_request(struct request *rq)
+{
+ struct rb_node *node = rb_prev(&rq->rb_node);
+
+ if (node)
+ return rb_entry_rq(node);
+
+ return NULL;
+}
+
+/*
* get the request after `rq' in sector-sorted order
*/
static inline struct request *
@@ -278,6 +292,39 @@ static inline int deadline_check_fifo(struct dd_per_prio *per_prio,
}
/*
+ * Check if rq has a sequential request preceding it.
+ */
+static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq)
+{
+ struct request *prev = deadline_earlier_request(rq);
+
+ if (!prev)
+ return false;
+
+ return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq);
+}
+
+/*
+ * Skip all write requests that are sequential from @rq, even if we cross
+ * a zone boundary.
+ */
+static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
+ struct request *rq)
+{
+ sector_t pos = blk_rq_pos(rq);
+ sector_t skipped_sectors = 0;
+
+ while (rq) {
+ if (blk_rq_pos(rq) != pos + skipped_sectors)
+ break;
+ skipped_sectors += blk_rq_sectors(rq);
+ rq = deadline_latter_request(rq);
+ }
+
+ return rq;
+}
+
+/*
* For the specified data direction, return the next request to
* dispatch using arrival ordered lists.
*/
@@ -297,11 +344,16 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
/*
* Look for a write request that can be dispatched, that is one with
- * an unlocked target zone.
+ * an unlocked target zone. For some HDDs, breaking a sequential
+ * write stream can lead to lower throughput, so make sure to preserve
+ * sequential write streams, even if that stream crosses into the next
+ * zones and these zones are unlocked.
*/
spin_lock_irqsave(&dd->zone_lock, flags);
list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
- if (blk_req_can_dispatch_to_zone(rq))
+ if (blk_req_can_dispatch_to_zone(rq) &&
+ (blk_queue_nonrot(rq->q) ||
+ !deadline_is_seq_write(dd, rq)))
goto out;
}
rq = NULL;
@@ -331,13 +383,19 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
/*
* Look for a write request that can be dispatched, that is one with
- * an unlocked target zone.
+ * an unlocked target zone. For some HDDs, breaking a sequential
+ * write stream can lead to lower throughput, so make sure to preserve
+ * sequential write streams, even if that stream crosses into the next
+ * zones and these zones are unlocked.
*/
spin_lock_irqsave(&dd->zone_lock, flags);
while (rq) {
if (blk_req_can_dispatch_to_zone(rq))
break;
- rq = deadline_latter_request(rq);
+ if (blk_queue_nonrot(rq->q))
+ rq = deadline_latter_request(rq);
+ else
+ rq = deadline_skip_seq_writes(dd, rq);
}
spin_unlock_irqrestore(&dd->zone_lock, flags);
@@ -789,6 +847,18 @@ static void dd_prepare_request(struct request *rq)
rq->elv.priv[0] = NULL;
}
+static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx)
+{
+ struct deadline_data *dd = hctx->queue->elevator->elevator_data;
+ enum dd_prio p;
+
+ for (p = 0; p <= DD_PRIO_MAX; p++)
+ if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE]))
+ return true;
+
+ return false;
+}
+
/*
* Callback from inside blk_mq_free_request().
*
@@ -828,9 +898,10 @@ static void dd_finish_request(struct request *rq)
spin_lock_irqsave(&dd->zone_lock, flags);
blk_req_zone_write_unlock(rq);
- if (!list_empty(&per_prio->fifo_list[DD_WRITE]))
- blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
spin_unlock_irqrestore(&dd->zone_lock, flags);
+
+ if (dd_has_write_work(rq->mq_hctx))
+ blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
}
}
diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig
index cbacddc55a1d..6fb4e38fca88 100644
--- a/drivers/block/drbd/Kconfig
+++ b/drivers/block/drbd/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0-only
#
# DRBD device driver configuration
#
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index 8bd534697d1b..c93e462130ff 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0-only
drbd-y := drbd_bitmap.o drbd_proc.o
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index e27478ae579c..5db147f3c02d 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_actlog.c
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 7d9db33363de..b90a5c1003fc 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_bitmap.c
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index b3b9cd5628fd..a72c096aa5b1 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) "drbd debugfs: " fmt
#include <linux/kernel.h>
#include <linux/module.h>
diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h
index 58e31cef0844..ee3d66eb40c6 100644
--- a/drivers/block/drbd/drbd_debugfs.h
+++ b/drivers/block/drbd/drbd_debugfs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/debugfs.h>
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 913cf4c55cba..e29bd10ac52f 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
drbd_int.h
diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c
index f07b4378388b..5024ffd6143d 100644
--- a/drivers/block/drbd/drbd_interval.c
+++ b/drivers/block/drbd/drbd_interval.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
#include <asm/bug.h>
#include <linux/rbtree_augmented.h>
#include "drbd_interval.h"
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index b8c2dee5edc8..366489b72fe9 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __DRBD_INTERVAL_H
#define __DRBD_INTERVAL_H
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index f3e4db16fd07..e02db1dccab1 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd.c
@@ -2217,7 +2217,8 @@ void drbd_destroy_device(struct kref *kref)
kref_put(&peer_device->connection->kref, drbd_destroy_connection);
kfree(peer_device);
}
- memset(device, 0xfd, sizeof(*device));
+ if (device->submit.wq)
+ destroy_workqueue(device->submit.wq);
kfree(device);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2309,7 +2310,6 @@ void drbd_destroy_resource(struct kref *kref)
idr_destroy(&resource->devices);
free_cpumask_var(resource->cpu_mask);
kfree(resource->name);
- memset(resource, 0xf2, sizeof(*resource));
kfree(resource);
}
@@ -2650,7 +2650,6 @@ void drbd_destroy_connection(struct kref *kref)
drbd_free_socket(&connection->data);
kfree(connection->int_dig_in);
kfree(connection->int_dig_vv);
- memset(connection, 0xfc, sizeof(*connection));
kfree(connection);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2774,7 +2773,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
err = add_disk(disk);
if (err)
- goto out_idr_remove_from_resource;
+ goto out_destroy_workqueue;
/* inherit the connection state */
device->state.conn = first_connection(resource)->cstate;
@@ -2788,6 +2787,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_debugfs_device_add(device);
return NO_ERROR;
+out_destroy_workqueue:
+ destroy_workqueue(device->submit.wq);
out_idr_remove_from_resource:
for_each_connection(connection, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 63f589926d85..cb55b28fba78 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_nl.c
diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c
index 6a09b0b98018..df0d241d3f6a 100644
--- a/drivers/block/drbd/drbd_nla.c
+++ b/drivers/block/drbd/drbd_nla.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
#include <net/netlink.h>
#include <linux/drbd_genl_api.h>
diff --git a/drivers/block/drbd/drbd_nla.h b/drivers/block/drbd/drbd_nla.h
index f5eaffb6474e..d3555df0d353 100644
--- a/drivers/block/drbd/drbd_nla.h
+++ b/drivers/block/drbd/drbd_nla.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __DRBD_NLA_H
#define __DRBD_NLA_H
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 3c0193de2498..2227fb0db1ce 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_proc.c
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h
index a882b65ab5d2..56bbca9d7700 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __DRBD_PROTOCOL_H
#define __DRBD_PROTOCOL_H
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 93d6df4dc5a4..e045fb55f3bf 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_receiver.c
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 7f9bcc82fc9c..ced15557197a 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_req.c
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 6237fa1dcb0e..b4017b5c3fbc 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
drbd_req.h
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 3f7bf9f2d874..75d13ea0024f 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_state.c
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index f87371e55e68..cbaeb8018dbf 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef DRBD_STATE_H
#define DRBD_STATE_H
diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h
index d5b0479bc9a6..9d78d8e3912e 100644
--- a/drivers/block/drbd/drbd_state_change.h
+++ b/drivers/block/drbd/drbd_state_change.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef DRBD_STATE_CHANGE_H
#define DRBD_STATE_CHANGE_H
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
index fc01307607ea..0a06f744b096 100644
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd.h
diff --git a/drivers/block/drbd/drbd_strings.h b/drivers/block/drbd/drbd_strings.h
index 87b94a27358a..0201f6590f6a 100644
--- a/drivers/block/drbd/drbd_strings.h
+++ b/drivers/block/drbd/drbd_strings.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __DRBD_STRINGS_H
#define __DRBD_STRINGS_H
diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h
index 01e3babc5277..1ee81e3c2152 100644
--- a/drivers/block/drbd/drbd_vli.h
+++ b/drivers/block/drbd/drbd_vli.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
-*- linux-c -*-
drbd_receiver.c
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index c69beefc9d5c..3df033bfccf8 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_worker.c
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 078da18bb86d..8541d5688f3a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1215,7 +1215,7 @@ static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
struct dm_keyslot_evict_args *args = data;
int err;
- err = blk_crypto_evict_key(bdev_get_queue(dev->bdev), args->key);
+ err = blk_crypto_evict_key(dev->bdev, args->key);
if (!args->err)
args->err = err;
/* Always try to evict the key from all devices. */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 95a1ee3d314e..e1ea3a7bd9d9 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -732,28 +732,48 @@ static char *_dm_claim_ptr = "I belong to device-mapper";
/*
* Open a table device so we can use it as a map destination.
*/
-static int open_table_device(struct table_device *td, dev_t dev,
- struct mapped_device *md)
+static struct table_device *open_table_device(struct mapped_device *md,
+ dev_t dev, fmode_t mode)
{
+ struct table_device *td;
struct block_device *bdev;
u64 part_off;
int r;
- BUG_ON(td->dm_dev.bdev);
+ td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
+ if (!td)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&td->count, 1);
- bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr);
+ if (IS_ERR(bdev)) {
+ r = PTR_ERR(bdev);
+ goto out_free_td;
+ }
- r = bd_link_disk_holder(bdev, dm_disk(md));
- if (r) {
- blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
- return r;
+ /*
+ * We can be called before the dm disk is added. In that case we can't
+ * register the holder relation here. It will be done once add_disk was
+ * called.
+ */
+ if (md->disk->slave_dir) {
+ r = bd_link_disk_holder(bdev, md->disk);
+ if (r)
+ goto out_blkdev_put;
}
+ td->dm_dev.mode = mode;
td->dm_dev.bdev = bdev;
td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, NULL, NULL);
- return 0;
+ format_dev_t(td->dm_dev.name, dev);
+ list_add(&td->list, &md->table_devices);
+ return td;
+
+out_blkdev_put:
+ blkdev_put(bdev, mode | FMODE_EXCL);
+out_free_td:
+ kfree(td);
+ return ERR_PTR(r);
}
/*
@@ -761,14 +781,12 @@ static int open_table_device(struct table_device *td, dev_t dev,
*/
static void close_table_device(struct table_device *td, struct mapped_device *md)
{
- if (!td->dm_dev.bdev)
- return;
-
- bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
+ if (md->disk->slave_dir)
+ bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
put_dax(td->dm_dev.dax_dev);
- td->dm_dev.bdev = NULL;
- td->dm_dev.dax_dev = NULL;
+ list_del(&td->list);
+ kfree(td);
}
static struct table_device *find_table_device(struct list_head *l, dev_t dev,
@@ -786,31 +804,16 @@ static struct table_device *find_table_device(struct list_head *l, dev_t dev,
int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
struct dm_dev **result)
{
- int r;
struct table_device *td;
mutex_lock(&md->table_devices_lock);
td = find_table_device(&md->table_devices, dev, mode);
if (!td) {
- td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
- if (!td) {
+ td = open_table_device(md, dev, mode);
+ if (IS_ERR(td)) {
mutex_unlock(&md->table_devices_lock);
- return -ENOMEM;
+ return PTR_ERR(td);
}
-
- td->dm_dev.mode = mode;
- td->dm_dev.bdev = NULL;
-
- if ((r = open_table_device(td, dev, md))) {
- mutex_unlock(&md->table_devices_lock);
- kfree(td);
- return r;
- }
-
- format_dev_t(td->dm_dev.name, dev);
-
- refcount_set(&td->count, 1);
- list_add(&td->list, &md->table_devices);
} else {
refcount_inc(&td->count);
}
@@ -825,27 +828,11 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
struct table_device *td = container_of(d, struct table_device, dm_dev);
mutex_lock(&md->table_devices_lock);
- if (refcount_dec_and_test(&td->count)) {
+ if (refcount_dec_and_test(&td->count))
close_table_device(td, md);
- list_del(&td->list);
- kfree(td);
- }
mutex_unlock(&md->table_devices_lock);
}
-static void free_table_devices(struct list_head *devices)
-{
- struct list_head *tmp, *next;
-
- list_for_each_safe(tmp, next, devices) {
- struct table_device *td = list_entry(tmp, struct table_device, list);
-
- DMWARN("dm_destroy: %s still exists with %d references",
- td->dm_dev.name, refcount_read(&td->count));
- kfree(td);
- }
-}
-
/*
* Get the geometry associated with a dm device
*/
@@ -1972,8 +1959,21 @@ static void cleanup_mapped_device(struct mapped_device *md)
md->disk->private_data = NULL;
spin_unlock(&_minor_lock);
if (dm_get_md_type(md) != DM_TYPE_NONE) {
+ struct table_device *td;
+
dm_sysfs_exit(md);
+ list_for_each_entry(td, &md->table_devices, list) {
+ bd_unlink_disk_holder(td->dm_dev.bdev,
+ md->disk);
+ }
+
+ /*
+ * Hold lock to make sure del_gendisk() won't concurrent
+ * with open/close_table_device().
+ */
+ mutex_lock(&md->table_devices_lock);
del_gendisk(md->disk);
+ mutex_unlock(&md->table_devices_lock);
}
dm_queue_destroy_crypto_profile(md->queue);
put_disk(md->disk);
@@ -2122,7 +2122,7 @@ static void free_dev(struct mapped_device *md)
cleanup_mapped_device(md);
- free_table_devices(&md->table_devices);
+ WARN_ON_ONCE(!list_empty(&md->table_devices));
dm_stats_cleanup(&md->stats);
free_minor(minor);
@@ -2305,6 +2305,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
{
enum dm_queue_mode type = dm_table_get_type(t);
struct queue_limits limits;
+ struct table_device *td;
int r;
switch (type) {
@@ -2333,17 +2334,40 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
if (r)
return r;
+ /*
+ * Hold lock to make sure add_disk() and del_gendisk() won't concurrent
+ * with open_table_device() and close_table_device().
+ */
+ mutex_lock(&md->table_devices_lock);
r = add_disk(md->disk);
+ mutex_unlock(&md->table_devices_lock);
if (r)
return r;
- r = dm_sysfs_init(md);
- if (r) {
- del_gendisk(md->disk);
- return r;
+ /*
+ * Register the holder relationship for devices added before the disk
+ * was live.
+ */
+ list_for_each_entry(td, &md->table_devices, list) {
+ r = bd_link_disk_holder(td->dm_dev.bdev, md->disk);
+ if (r)
+ goto out_undo_holders;
}
+
+ r = dm_sysfs_init(md);
+ if (r)
+ goto out_undo_holders;
+
md->type = type;
return 0;
+
+out_undo_holders:
+ list_for_each_entry_continue_reverse(td, &md->table_devices, list)
+ bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
+ mutex_lock(&md->table_devices_lock);
+ del_gendisk(md->disk);
+ mutex_unlock(&md->table_devices_lock);
+ return r;
}
struct mapped_device *dm_get_md(dev_t dev)
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index cea8b14007e6..8bfb3ce86476 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -12,7 +12,7 @@
* provides the key and IV to use.
*/
-#include <linux/blk-crypto-profile.h>
+#include <linux/blk-crypto.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/sched/mm.h>
@@ -77,10 +77,8 @@ static void fscrypt_log_blk_crypto_impl(struct fscrypt_mode *mode,
unsigned int i;
for (i = 0; i < num_devs; i++) {
- struct request_queue *q = bdev_get_queue(devs[i]);
-
if (!IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) ||
- __blk_crypto_cfg_supported(q->crypto_profile, cfg)) {
+ blk_crypto_config_supported_natively(devs[i], cfg)) {
if (!xchg(&mode->logged_blk_crypto_native, 1))
pr_info("fscrypt: %s using blk-crypto (native)\n",
mode->friendly_name);
@@ -139,8 +137,7 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci)
return PTR_ERR(devs);
for (i = 0; i < num_devs; i++) {
- if (!blk_crypto_config_supported(bdev_get_queue(devs[i]),
- &crypto_cfg))
+ if (!blk_crypto_config_supported(devs[i], &crypto_cfg))
goto out_free_devs;
}
@@ -184,8 +181,7 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
goto fail;
}
for (i = 0; i < num_devs; i++) {
- err = blk_crypto_start_using_key(blk_key,
- bdev_get_queue(devs[i]));
+ err = blk_crypto_start_using_key(devs[i], blk_key);
if (err)
break;
}
@@ -224,7 +220,7 @@ void fscrypt_destroy_inline_crypt_key(struct super_block *sb,
devs = fscrypt_get_devices(sb, &num_devs);
if (!IS_ERR(devs)) {
for (i = 0; i < num_devs; i++)
- blk_crypto_evict_key(bdev_get_queue(devs[i]), blk_key);
+ blk_crypto_evict_key(devs[i], blk_key);
kfree(devs);
}
kfree_sensitive(blk_key);
diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h
index bbab65bd5428..e6802b69cdd6 100644
--- a/include/linux/blk-crypto-profile.h
+++ b/include/linux/blk-crypto-profile.h
@@ -138,18 +138,6 @@ int devm_blk_crypto_profile_init(struct device *dev,
unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot);
-blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
- const struct blk_crypto_key *key,
- struct blk_crypto_keyslot **slot_ptr);
-
-void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot);
-
-bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
- const struct blk_crypto_config *cfg);
-
-int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
- const struct blk_crypto_key *key);
-
void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile);
void blk_crypto_profile_destroy(struct blk_crypto_profile *profile);
diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h
index 69b24fe92cbf..a33d32f5c268 100644
--- a/include/linux/blk-crypto.h
+++ b/include/linux/blk-crypto.h
@@ -71,9 +71,6 @@ struct bio_crypt_ctx {
#include <linux/blk_types.h>
#include <linux/blkdev.h>
-struct request;
-struct request_queue;
-
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
static inline bool bio_has_crypt_ctx(struct bio *bio)
@@ -94,13 +91,15 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
unsigned int dun_bytes,
unsigned int data_unit_size);
-int blk_crypto_start_using_key(const struct blk_crypto_key *key,
- struct request_queue *q);
+int blk_crypto_start_using_key(struct block_device *bdev,
+ const struct blk_crypto_key *key);
-int blk_crypto_evict_key(struct request_queue *q,
+int blk_crypto_evict_key(struct block_device *bdev,
const struct blk_crypto_key *key);
-bool blk_crypto_config_supported(struct request_queue *q,
+bool blk_crypto_config_supported_natively(struct block_device *bdev,
+ const struct blk_crypto_config *cfg);
+bool blk_crypto_config_supported(struct block_device *bdev,
const struct blk_crypto_config *cfg);
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9188aa3f6259..516e45246868 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -833,7 +833,6 @@ void set_capacity(struct gendisk *disk, sector_t size);
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
-int bd_register_pending_holders(struct gendisk *disk);
#else
static inline int bd_link_disk_holder(struct block_device *bdev,
struct gendisk *disk)
@@ -844,10 +843,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
struct gendisk *disk)
{
}
-static inline int bd_register_pending_holders(struct gendisk *disk)
-{
- return 0;
-}
#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
dev_t part_devt(struct gendisk *disk, u8 partno);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 528bd44b59e2..6c4e66b3fa84 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -766,6 +766,7 @@ void cgroup_rstat_flush(struct cgroup *cgrp);
void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
void cgroup_rstat_flush_hold(struct cgroup *cgrp);
void cgroup_rstat_flush_release(void);
+void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu);
/*
* Basic resource stats.
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 07add7882a5d..c9afcdd9324c 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -199,7 +199,6 @@ struct lru_cache {
unsigned long flags;
- void *lc_private;
const char *name;
/* nr_elements there */
@@ -241,7 +240,6 @@ extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
unsigned e_count, size_t e_size, size_t e_off);
extern void lc_reset(struct lru_cache *lc);
extern void lc_destroy(struct lru_cache *lc);
-extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
extern void lc_del(struct lru_cache *lc, struct lc_element *element);
extern struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr);
@@ -297,6 +295,5 @@ extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
container_of(ptr, type, member)
extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i);
-extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e);
#endif
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 0c964ac107c2..4aae6c06c5f2 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -30,6 +30,11 @@ static inline bool mempool_initialized(mempool_t *pool)
return pool->elements != NULL;
}
+static inline bool mempool_is_saturated(mempool_t *pool)
+{
+ return READ_ONCE(pool->curr_nr) >= pool->min_nr;
+}
+
void mempool_exit(mempool_t *pool);
int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn, void *pool_data,
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 7f5a51aae0a7..a0307b516b09 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -209,7 +209,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
list_del(&wq_entry->entry);
}
-void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
unsigned int mode, void *key, wait_queue_entry_t *bookmark);
diff --git a/io_uring/rw.c b/io_uring/rw.c
index bb47cc4da713..5c91cc80b348 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -665,6 +665,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
ret = kiocb_set_rw_flags(kiocb, rw->flags);
if (unlikely(ret))
return ret;
+ kiocb->ki_flags |= IOCB_ALLOC_CACHE;
/*
* If the file is marked O_NONBLOCK, still allow retry for it if it
@@ -680,7 +681,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
return -EOPNOTSUPP;
kiocb->private = NULL;
- kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
+ kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
req->iopoll_completed = 0;
} else {
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 793ecff29038..910e633869b0 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -281,6 +281,26 @@ void cgroup_rstat_flush_release(void)
spin_unlock_irq(&cgroup_rstat_lock);
}
+/**
+ * cgroup_rstat_css_cpu_flush - flush stats for the given css and cpu
+ * @css: target css to be flush
+ * @cpu: the cpu that holds the stats to be flush
+ *
+ * A lightweight rstat flush operation for a given css and cpu.
+ * Only the cpu_lock is being held for mutual exclusion, the cgroup_rstat_lock
+ * isn't used.
+ */
+void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu)
+{
+ raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
+
+ raw_spin_lock_irq(cpu_lock);
+ rcu_read_lock();
+ css->ss->css_rstat_flush(css, cpu);
+ rcu_read_unlock();
+ raw_spin_unlock_irq(cpu_lock);
+}
+
int cgroup_rstat_init(struct cgroup *cgrp)
{
int cpu;
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 9860bb9a847c..133b74730738 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -121,11 +121,12 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
return nr_exclusive;
}
-static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
+static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
unsigned long flags;
wait_queue_entry_t bookmark;
+ int remaining = nr_exclusive;
bookmark.flags = 0;
bookmark.private = NULL;
@@ -134,10 +135,12 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int
do {
spin_lock_irqsave(&wq_head->lock, flags);
- nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
+ remaining = __wake_up_common(wq_head, mode, remaining,
wake_flags, key, &bookmark);
spin_unlock_irqrestore(&wq_head->lock, flags);
} while (bookmark.flags & WQ_FLAG_BOOKMARK);
+
+ return nr_exclusive - remaining;
}
/**
@@ -147,13 +150,14 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: is directly passed to the wakeup function
*
- * If this function wakes up a task, it executes a full memory barrier before
- * accessing the task state.
+ * If this function wakes up a task, it executes a full memory barrier
+ * before accessing the task state. Returns the number of exclusive
+ * tasks that were awaken.
*/
-void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
- int nr_exclusive, void *key)
+int __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
+ int nr_exclusive, void *key)
{
- __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
+ return __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
}
EXPORT_SYMBOL(__wake_up);
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index dc35464216d3..b3d9187611de 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -60,17 +60,6 @@ int lc_try_lock(struct lru_cache *lc)
} while (unlikely (val == LC_PARANOIA));
/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
return 0 == val;
-#if 0
- /* Alternative approach, spin in case someone enters or leaves a
- * PARANOIA_ENTRY()/RETURN() section. */
- unsigned long old, new, val;
- do {
- old = lc->flags & LC_PARANOIA;
- new = old | LC_LOCKED;
- val = cmpxchg(&lc->flags, old, new);
- } while (unlikely (val == (old ^ LC_PARANOIA)));
- return old == val;
-#endif
}
/**
@@ -364,7 +353,7 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsig
struct lc_element *e;
PARANOIA_ENTRY();
- if (lc->flags & LC_STARVING) {
+ if (test_bit(__LC_STARVING, &lc->flags)) {
++lc->starving;
RETURN(NULL);
}
@@ -417,7 +406,7 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsig
* the LRU element, we have to wait ...
*/
if (!lc_unused_element_available(lc)) {
- __set_bit(__LC_STARVING, &lc->flags);
+ set_bit(__LC_STARVING, &lc->flags);
RETURN(NULL);
}
@@ -586,48 +575,6 @@ struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
}
/**
- * lc_index_of
- * @lc: the lru cache to operate on
- * @e: the element to query for its index position in lc->element
- */
-unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
-{
- PARANOIA_LC_ELEMENT(lc, e);
- return e->lc_index;
-}
-
-/**
- * lc_set - associate index with label
- * @lc: the lru cache to operate on
- * @enr: the label to set
- * @index: the element index to associate label with.
- *
- * Used to initialize the active set to some previously recorded state.
- */
-void lc_set(struct lru_cache *lc, unsigned int enr, int index)
-{
- struct lc_element *e;
- struct list_head *lh;
-
- if (index < 0 || index >= lc->nr_elements)
- return;
-
- e = lc_element_by_index(lc, index);
- BUG_ON(e->lc_number != e->lc_new_number);
- BUG_ON(e->refcnt != 0);
-
- e->lc_number = e->lc_new_number = enr;
- hlist_del_init(&e->colision);
- if (enr == LC_FREE)
- lh = &lc->free;
- else {
- hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
- lh = &lc->lru;
- }
- list_move(&e->list, lh);
-}
-
-/**
* lc_seq_dump_details - Dump a complete LRU cache to seq in textual form.
* @lc: the lru cache to operate on
* @seq: the &struct seq_file pointer to seq_printf into
@@ -661,7 +608,6 @@ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext
EXPORT_SYMBOL(lc_create);
EXPORT_SYMBOL(lc_reset);
EXPORT_SYMBOL(lc_destroy);
-EXPORT_SYMBOL(lc_set);
EXPORT_SYMBOL(lc_del);
EXPORT_SYMBOL(lc_try_get);
EXPORT_SYMBOL(lc_find);
@@ -669,7 +615,6 @@ EXPORT_SYMBOL(lc_get);
EXPORT_SYMBOL(lc_put);
EXPORT_SYMBOL(lc_committed);
EXPORT_SYMBOL(lc_element_by_index);
-EXPORT_SYMBOL(lc_index_of);
EXPORT_SYMBOL(lc_seq_printf_stats);
EXPORT_SYMBOL(lc_seq_dump_details);
EXPORT_SYMBOL(lc_try_lock);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index eca462cba398..586deb333237 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -560,33 +560,41 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
-static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
+static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
{
int i, wake_index;
if (!atomic_read(&sbq->ws_active))
- return NULL;
+ return;
wake_index = atomic_read(&sbq->wake_index);
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
struct sbq_wait_state *ws = &sbq->ws[wake_index];
- if (waitqueue_active(&ws->wait)) {
- if (wake_index != atomic_read(&sbq->wake_index))
- atomic_set(&sbq->wake_index, wake_index);
- return ws;
- }
-
+ /*
+ * Advance the index before checking the current queue.
+ * It improves fairness, by ensuring the queue doesn't
+ * need to be fully emptied before trying to wake up
+ * from the next one.
+ */
wake_index = sbq_index_inc(wake_index);
+
+ /*
+ * It is sufficient to wake up at least one waiter to
+ * guarantee forward progress.
+ */
+ if (waitqueue_active(&ws->wait) &&
+ wake_up_nr(&ws->wait, nr))
+ break;
}
- return NULL;
+ if (wake_index != atomic_read(&sbq->wake_index))
+ atomic_set(&sbq->wake_index, wake_index);
}
void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
{
unsigned int wake_batch = READ_ONCE(sbq->wake_batch);
- struct sbq_wait_state *ws = NULL;
unsigned int wakeups;
if (!atomic_read(&sbq->ws_active))
@@ -598,16 +606,10 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
do {
if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch)
return;
-
- if (!ws) {
- ws = sbq_wake_ptr(sbq);
- if (!ws)
- return;
- }
} while (!atomic_try_cmpxchg(&sbq->wakeup_cnt,
&wakeups, wakeups + wake_batch));
- wake_up_nr(&ws->wait, wake_batch);
+ __sbitmap_queue_wake_up(sbq, wake_batch);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);