aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig20
-rw-r--r--block/badblocks.c6
-rw-r--r--block/bdev.c260
-rw-r--r--block/blk-cgroup.c13
-rw-r--r--block/blk-cgroup.h2
-rw-r--r--block/blk-core.c14
-rw-r--r--block/blk-mq.c89
-rw-r--r--block/blk-pm.c33
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/blk-throttle.c2
10 files changed, 261 insertions, 180 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 55ae2286a4de..1de4682d48cc 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -78,6 +78,26 @@ config BLK_DEV_INTEGRITY_T10
select CRC_T10DIF
select CRC64_ROCKSOFT
+config BLK_DEV_WRITE_MOUNTED
+ bool "Allow writing to mounted block devices"
+ default y
+ help
+ When a block device is mounted, writing to its buffer cache is very
+ likely going to cause filesystem corruption. It is also rather easy to
+ crash the kernel in this way since the filesystem has no practical way
+ of detecting these writes to buffer cache and verifying its metadata
+ integrity. However there are some setups that need this capability
+ like running fsck on read-only mounted root device, modifying some
+ features on mounted ext4 filesystem, and similar. If you say N, the
+ kernel will prevent processes from writing to block devices that are
+ mounted by filesystems which provides some more protection from runaway
+ privileged processes and generally makes it much harder to crash
+ filesystem drivers. Note however that this does not prevent
+ underlying device(s) from being modified by other means, e.g. by
+ directly submitting SCSI commands or through access to lower layers of
+ storage stack. If in doubt, say Y. The configuration can be overridden
+ with the bdev_allow_write_mounted boot option.
+
config BLK_DEV_ZONED
bool "Zoned block device support"
select MQ_IOSCHED_DEADLINE
diff --git a/block/badblocks.c b/block/badblocks.c
index fc92d4e18aa3..db4ec8b9b2a8 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -1312,12 +1312,14 @@ re_check:
prev = prev_badblocks(bb, &bad, hint);
/* start after all badblocks */
- if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) {
+ if ((prev >= 0) &&
+ ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) {
len = sectors;
goto update_sectors;
}
- if (overlap_front(bb, prev, &bad)) {
+ /* Overlapped with front badblocks record */
+ if ((prev >= 0) && overlap_front(bb, prev, &bad)) {
if (BB_ACK(p[prev]))
acked_badblocks++;
else
diff --git a/block/bdev.c b/block/bdev.c
index e4cfb7adb645..e9f1b12bd75c 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -30,6 +30,9 @@
#include "../fs/internal.h"
#include "blk.h"
+/* Should we allow writing to mounted block devices? */
+static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED);
+
struct bdev_inode {
struct block_device bdev;
struct inode vfs_inode;
@@ -207,85 +210,88 @@ int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
EXPORT_SYMBOL(sync_blockdev_range);
/**
- * freeze_bdev - lock a filesystem and force it into a consistent state
+ * bdev_freeze - lock a filesystem and force it into a consistent state
* @bdev: blockdevice to lock
*
* If a superblock is found on this device, we take the s_umount semaphore
* on it to make sure nobody unmounts until the snapshot creation is done.
* The reference counter (bd_fsfreeze_count) guarantees that only the last
* unfreeze process can unfreeze the frozen filesystem actually when multiple
- * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
- * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
+ * freeze requests arrive simultaneously. It counts up in bdev_freeze() and
+ * count down in bdev_thaw(). When it becomes 0, thaw_bdev() will unfreeze
* actually.
+ *
+ * Return: On success zero is returned, negative error code on failure.
*/
-int freeze_bdev(struct block_device *bdev)
+int bdev_freeze(struct block_device *bdev)
{
- struct super_block *sb;
int error = 0;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (++bdev->bd_fsfreeze_count > 1)
- goto done;
-
- sb = get_active_super(bdev);
- if (!sb)
- goto sync;
- if (sb->s_op->freeze_super)
- error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
- else
- error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
- deactivate_super(sb);
- if (error) {
- bdev->bd_fsfreeze_count--;
- goto done;
+ if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) {
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return 0;
}
- bdev->bd_fsfreeze_sb = sb;
-sync:
- sync_blockdev(bdev);
-done:
+ mutex_lock(&bdev->bd_holder_lock);
+ if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) {
+ error = bdev->bd_holder_ops->freeze(bdev);
+ lockdep_assert_not_held(&bdev->bd_holder_lock);
+ } else {
+ mutex_unlock(&bdev->bd_holder_lock);
+ error = sync_blockdev(bdev);
+ }
+
+ if (error)
+ atomic_dec(&bdev->bd_fsfreeze_count);
+
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return error;
}
-EXPORT_SYMBOL(freeze_bdev);
+EXPORT_SYMBOL(bdev_freeze);
/**
- * thaw_bdev - unlock filesystem
+ * bdev_thaw - unlock filesystem
* @bdev: blockdevice to unlock
*
- * Unlocks the filesystem and marks it writeable again after freeze_bdev().
+ * Unlocks the filesystem and marks it writeable again after bdev_freeze().
+ *
+ * Return: On success zero is returned, negative error code on failure.
*/
-int thaw_bdev(struct block_device *bdev)
+int bdev_thaw(struct block_device *bdev)
{
- struct super_block *sb;
- int error = -EINVAL;
+ int error = -EINVAL, nr_freeze;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (!bdev->bd_fsfreeze_count)
+
+ /*
+ * If this returns < 0 it means that @bd_fsfreeze_count was
+ * already 0 and no decrement was performed.
+ */
+ nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count);
+ if (nr_freeze < 0)
goto out;
error = 0;
- if (--bdev->bd_fsfreeze_count > 0)
+ if (nr_freeze > 0)
goto out;
- sb = bdev->bd_fsfreeze_sb;
- if (!sb)
- goto out;
+ mutex_lock(&bdev->bd_holder_lock);
+ if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) {
+ error = bdev->bd_holder_ops->thaw(bdev);
+ lockdep_assert_not_held(&bdev->bd_holder_lock);
+ } else {
+ mutex_unlock(&bdev->bd_holder_lock);
+ }
- if (sb->s_op->thaw_super)
- error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
- else
- error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
if (error)
- bdev->bd_fsfreeze_count++;
- else
- bdev->bd_fsfreeze_sb = NULL;
+ atomic_inc(&bdev->bd_fsfreeze_count);
out:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return error;
}
-EXPORT_SYMBOL(thaw_bdev);
+EXPORT_SYMBOL(bdev_thaw);
/*
* pseudo-fs
@@ -425,6 +431,8 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
void bdev_add(struct block_device *bdev, dev_t dev)
{
+ if (bdev_stable_writes(bdev))
+ mapping_set_stable_writes(bdev->bd_inode->i_mapping);
bdev->bd_dev = dev;
bdev->bd_inode->i_rdev = dev;
bdev->bd_inode->i_ino = dev;
@@ -727,9 +735,60 @@ void blkdev_put_no_open(struct block_device *bdev)
{
put_device(&bdev->bd_device);
}
-
+
+static bool bdev_writes_blocked(struct block_device *bdev)
+{
+ return bdev->bd_writers == -1;
+}
+
+static void bdev_block_writes(struct block_device *bdev)
+{
+ bdev->bd_writers = -1;
+}
+
+static void bdev_unblock_writes(struct block_device *bdev)
+{
+ bdev->bd_writers = 0;
+}
+
+static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
+{
+ if (bdev_allow_write_mounted)
+ return true;
+ /* Writes blocked? */
+ if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev))
+ return false;
+ if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0)
+ return false;
+ return true;
+}
+
+static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
+{
+ if (bdev_allow_write_mounted)
+ return;
+
+ /* Claim exclusive or shared write access. */
+ if (mode & BLK_OPEN_RESTRICT_WRITES)
+ bdev_block_writes(bdev);
+ else if (mode & BLK_OPEN_WRITE)
+ bdev->bd_writers++;
+}
+
+static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode)
+{
+ if (bdev_allow_write_mounted)
+ return;
+
+ /* Yield exclusive or shared write access. */
+ if (mode & BLK_OPEN_RESTRICT_WRITES)
+ bdev_unblock_writes(bdev);
+ else if (mode & BLK_OPEN_WRITE)
+ bdev->bd_writers--;
+}
+
/**
- * blkdev_get_by_dev - open a block device by device number
+ * bdev_open_by_dev - open a block device by device number
* @dev: device number of block device to open
* @mode: open mode (BLK_OPEN_*)
* @holder: exclusive holder identifier
@@ -741,32 +800,46 @@ void blkdev_put_no_open(struct block_device *bdev)
*
* Use this interface ONLY if you really do not have anything better - i.e. when
* you are behind a truly sucky interface and all you are given is a device
- * number. Everything else should use blkdev_get_by_path().
+ * number. Everything else should use bdev_open_by_path().
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * Reference to the block_device on success, ERR_PTR(-errno) on failure.
+ * Handle with a reference to the block_device on success, ERR_PTR(-errno) on
+ * failure.
*/
-struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
- const struct blk_holder_ops *hops)
+struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+ const struct blk_holder_ops *hops)
{
- bool unblock_events = true;
+ struct bdev_handle *handle = kmalloc(sizeof(struct bdev_handle),
+ GFP_KERNEL);
struct block_device *bdev;
+ bool unblock_events = true;
struct gendisk *disk;
int ret;
+ if (!handle)
+ return ERR_PTR(-ENOMEM);
+
ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
MAJOR(dev), MINOR(dev),
((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
if (ret)
- return ERR_PTR(ret);
+ goto free_handle;
+
+ /* Blocking writes requires exclusive opener */
+ if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) {
+ ret = -EINVAL;
+ goto free_handle;
+ }
bdev = blkdev_get_no_open(dev);
- if (!bdev)
- return ERR_PTR(-ENXIO);
+ if (!bdev) {
+ ret = -ENXIO;
+ goto free_handle;
+ }
disk = bdev->bd_disk;
if (holder) {
@@ -789,12 +862,16 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
goto abort_claiming;
if (!try_module_get(disk->fops->owner))
goto abort_claiming;
+ ret = -EBUSY;
+ if (!bdev_may_open(bdev, mode))
+ goto abort_claiming;
if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode);
else
ret = blkdev_get_whole(bdev, mode);
if (ret)
goto put_module;
+ bdev_claim_write_access(bdev, mode);
if (holder) {
bd_finish_claiming(bdev, holder, hops);
@@ -815,7 +892,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
if (unblock_events)
disk_unblock_events(disk);
- return bdev;
+ handle->bdev = bdev;
+ handle->holder = holder;
+ handle->mode = mode;
+ return handle;
put_module:
module_put(disk->fops->owner);
abort_claiming:
@@ -825,34 +905,14 @@ abort_claiming:
disk_unblock_events(disk);
put_blkdev:
blkdev_put_no_open(bdev);
+free_handle:
+ kfree(handle);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(blkdev_get_by_dev);
-
-struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
- const struct blk_holder_ops *hops)
-{
- struct bdev_handle *handle = kmalloc(sizeof(*handle), GFP_KERNEL);
- struct block_device *bdev;
-
- if (!handle)
- return ERR_PTR(-ENOMEM);
- bdev = blkdev_get_by_dev(dev, mode, holder, hops);
- if (IS_ERR(bdev)) {
- kfree(handle);
- return ERR_CAST(bdev);
- }
- handle->bdev = bdev;
- handle->holder = holder;
- if (holder)
- mode |= BLK_OPEN_EXCL;
- handle->mode = mode;
- return handle;
-}
EXPORT_SYMBOL(bdev_open_by_dev);
/**
- * blkdev_get_by_path - open a block device by name
+ * bdev_open_by_path - open a block device by name
* @path: path to the block device to open
* @mode: open mode (BLK_OPEN_*)
* @holder: exclusive holder identifier
@@ -866,29 +926,9 @@ EXPORT_SYMBOL(bdev_open_by_dev);
* Might sleep.
*
* RETURNS:
- * Reference to the block_device on success, ERR_PTR(-errno) on failure.
+ * Handle with a reference to the block_device on success, ERR_PTR(-errno) on
+ * failure.
*/
-struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
- void *holder, const struct blk_holder_ops *hops)
-{
- struct block_device *bdev;
- dev_t dev;
- int error;
-
- error = lookup_bdev(path, &dev);
- if (error)
- return ERR_PTR(error);
-
- bdev = blkdev_get_by_dev(dev, mode, holder, hops);
- if (!IS_ERR(bdev) && (mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
- blkdev_put(bdev, holder);
- return ERR_PTR(-EACCES);
- }
-
- return bdev;
-}
-EXPORT_SYMBOL(blkdev_get_by_path);
-
struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hops)
{
@@ -911,8 +951,9 @@ struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
}
EXPORT_SYMBOL(bdev_open_by_path);
-void blkdev_put(struct block_device *bdev, void *holder)
+void bdev_release(struct bdev_handle *handle)
{
+ struct block_device *bdev = handle->bdev;
struct gendisk *disk = bdev->bd_disk;
/*
@@ -926,8 +967,10 @@ void blkdev_put(struct block_device *bdev, void *holder)
sync_blockdev(bdev);
mutex_lock(&disk->open_mutex);
- if (holder)
- bd_end_claim(bdev, holder);
+ bdev_yield_write_access(bdev, handle->mode);
+
+ if (handle->holder)
+ bd_end_claim(bdev, handle->holder);
/*
* Trigger event checking and tell drivers to flush MEDIA_CHANGE
@@ -944,12 +987,6 @@ void blkdev_put(struct block_device *bdev, void *holder)
module_put(disk->fops->owner);
blkdev_put_no_open(bdev);
-}
-EXPORT_SYMBOL(blkdev_put);
-
-void bdev_release(struct bdev_handle *handle)
-{
- blkdev_put(handle->bdev, handle->holder);
kfree(handle);
}
EXPORT_SYMBOL(bdev_release);
@@ -1100,3 +1137,12 @@ void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
blkdev_put_no_open(bdev);
}
+
+static int __init setup_bdev_allow_write_mounted(char *str)
+{
+ if (kstrtobool(str, &bdev_allow_write_mounted))
+ pr_warn("Invalid option string for bdev_allow_write_mounted:"
+ " '%s'\n", str);
+ return 1;
+}
+__setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4a42ea2972ad..4b48c2c44098 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -577,6 +577,7 @@ static void blkg_destroy_all(struct gendisk *disk)
struct request_queue *q = disk->queue;
struct blkcg_gq *blkg, *n;
int count = BLKG_DESTROY_BATCH_SIZE;
+ int i;
restart:
spin_lock_irq(&q->queue_lock);
@@ -602,6 +603,18 @@ restart:
}
}
+ /*
+ * Mark policy deactivated since policy offline has been done, and
+ * the free is scheduled, so future blkcg_deactivate_policy() can
+ * be bypassed
+ */
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkcg_policy *pol = blkcg_policy[i];
+
+ if (pol)
+ __clear_bit(pol->plid, q->blkcg_pols);
+ }
+
q->root_blkg = NULL;
spin_unlock_irq(&q->queue_lock);
}
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 624c03c8fe64..fd482439afbc 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -249,8 +249,6 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
{
struct blkcg_gq *blkg;
- WARN_ON_ONCE(!rcu_read_lock_held());
-
if (blkcg == &blkcg_root)
return q->root_blkg;
diff --git a/block/blk-core.c b/block/blk-core.c
index fdf25b8d6e78..2eca76ccf4ee 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -501,9 +501,17 @@ static inline void bio_check_ro(struct bio *bio)
if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return;
- pr_warn_ratelimited("Trying to write to read-only block-device %pg\n",
- bio->bi_bdev);
- /* Older lvm-tools actually trigger this */
+
+ if (bio->bi_bdev->bd_ro_warned)
+ return;
+
+ bio->bi_bdev->bd_ro_warned = true;
+ /*
+ * Use ioctl to set underlying disk of raid/dm to read-only
+ * will trigger this.
+ */
+ pr_warn("Trying to write to read-only block-device %pg\n",
+ bio->bi_bdev);
}
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e2d11183f62e..ac18f802c027 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1512,14 +1512,26 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
+static bool blk_is_flush_data_rq(struct request *rq)
+{
+ return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq);
+}
+
static bool blk_mq_rq_inflight(struct request *rq, void *priv)
{
/*
* If we find a request that isn't idle we know the queue is busy
* as it's checked in the iter.
* Return false to stop the iteration.
+ *
+ * In case of queue quiesce, if one flush data request is completed,
+ * don't count it as inflight given the flush sequence is suspended,
+ * and the original flush data request is invisible to driver, just
+ * like other pending requests because of quiesce
*/
- if (blk_mq_request_started(rq)) {
+ if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) &&
+ blk_is_flush_data_rq(rq) &&
+ blk_mq_request_completed(rq))) {
bool *busy = priv;
*busy = true;
@@ -2858,11 +2870,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
};
struct request *rq;
- if (unlikely(bio_queue_enter(bio)))
- return NULL;
-
if (blk_mq_attempt_bio_merge(q, bio, nsegs))
- goto queue_exit;
+ return NULL;
rq_qos_throttle(q, bio);
@@ -2878,35 +2887,23 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
rq_qos_cleanup(q, bio);
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
-queue_exit:
- blk_queue_exit(q);
return NULL;
}
-static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
- struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
+/* return true if this @rq can be used for @bio */
+static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+ struct bio *bio)
{
- struct request *rq;
- enum hctx_type type, hctx_type;
+ enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
+ enum hctx_type hctx_type = rq->mq_hctx->type;
- if (!plug)
- return NULL;
- rq = rq_list_peek(&plug->cached_rq);
- if (!rq || rq->q != q)
- return NULL;
-
- if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
- *bio = NULL;
- return NULL;
- }
+ WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
- type = blk_mq_get_hctx_type((*bio)->bi_opf);
- hctx_type = rq->mq_hctx->type;
if (type != hctx_type &&
!(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
- return NULL;
- if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
- return NULL;
+ return false;
+ if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+ return false;
/*
* If any qos ->throttle() end up blocking, we will have flushed the
@@ -2914,12 +2911,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
* before we throttle.
*/
plug->cached_rq = rq_list_next(rq);
- rq_qos_throttle(q, *bio);
+ rq_qos_throttle(rq->q, bio);
blk_mq_rq_time_init(rq, 0);
- rq->cmd_flags = (*bio)->bi_opf;
+ rq->cmd_flags = bio->bi_opf;
INIT_LIST_HEAD(&rq->queuelist);
- return rq;
+ return true;
}
static void bio_set_ioprio(struct bio *bio)
@@ -2949,7 +2946,7 @@ void blk_mq_submit_bio(struct bio *bio)
struct blk_plug *plug = blk_mq_plug(bio);
const int is_sync = op_is_sync(bio->bi_opf);
struct blk_mq_hw_ctx *hctx;
- struct request *rq;
+ struct request *rq = NULL;
unsigned int nr_segs = 1;
blk_status_t ret;
@@ -2960,20 +2957,36 @@ void blk_mq_submit_bio(struct bio *bio)
return;
}
- if (!bio_integrity_prep(bio))
- return;
-
bio_set_ioprio(bio);
- rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
- if (!rq) {
- if (!bio)
+ if (plug) {
+ rq = rq_list_peek(&plug->cached_rq);
+ if (rq && rq->q != q)
+ rq = NULL;
+ }
+ if (rq) {
+ if (!bio_integrity_prep(bio))
return;
- rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
- if (unlikely(!rq))
+ if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
return;
+ if (blk_mq_can_use_cached_rq(rq, plug, bio))
+ goto done;
+ percpu_ref_get(&q->q_usage_counter);
+ } else {
+ if (unlikely(bio_queue_enter(bio)))
+ return;
+ if (!bio_integrity_prep(bio))
+ goto fail;
}
+ rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+ if (unlikely(!rq)) {
+fail:
+ blk_queue_exit(q);
+ return;
+ }
+
+done:
trace_block_getrq(bio);
rq_qos_track(q, rq, bio);
diff --git a/block/blk-pm.c b/block/blk-pm.c
index 6b72b2e03fc8..42e842074715 100644
--- a/block/blk-pm.c
+++ b/block/blk-pm.c
@@ -163,39 +163,16 @@ EXPORT_SYMBOL(blk_pre_runtime_resume);
* @q: the queue of the device
*
* Description:
- * For historical reasons, this routine merely calls blk_set_runtime_active()
- * to do the real work of restarting the queue. It does this regardless of
- * whether the device's runtime-resume succeeded; even if it failed the
+ * Restart the queue of a runtime suspended device. It does this regardless
+ * of whether the device's runtime-resume succeeded; even if it failed the
* driver or error handler will need to communicate with the device.
*
* This function should be called near the end of the device's
- * runtime_resume callback.
+ * runtime_resume callback to correct queue runtime PM status and re-enable
+ * peeking requests from the queue.
*/
void blk_post_runtime_resume(struct request_queue *q)
{
- blk_set_runtime_active(q);
-}
-EXPORT_SYMBOL(blk_post_runtime_resume);
-
-/**
- * blk_set_runtime_active - Force runtime status of the queue to be active
- * @q: the queue of the device
- *
- * If the device is left runtime suspended during system suspend the resume
- * hook typically resumes the device and corrects runtime status
- * accordingly. However, that does not affect the queue runtime PM status
- * which is still "suspended". This prevents processing requests from the
- * queue.
- *
- * This function can be used in driver's resume hook to correct queue
- * runtime PM status and re-enable peeking requests from the queue. It
- * should be called before first request is added to the queue.
- *
- * This function is also called by blk_post_runtime_resume() for
- * runtime resumes. It does everything necessary to restart the queue.
- */
-void blk_set_runtime_active(struct request_queue *q)
-{
int old_status;
if (!q->dev)
@@ -211,4 +188,4 @@ void blk_set_runtime_active(struct request_queue *q)
if (old_status != RPM_ACTIVE)
blk_clear_pm_only(q);
}
-EXPORT_SYMBOL(blk_set_runtime_active);
+EXPORT_SYMBOL(blk_post_runtime_resume);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 63e481262336..0b2d04766324 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -615,6 +615,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
#endif
+/* Common attributes for bio-based and request-based queues. */
static struct attribute *queue_attrs[] = {
&queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr,
@@ -659,6 +660,7 @@ static struct attribute *queue_attrs[] = {
NULL,
};
+/* Request-based queue attributes that are not relevant for bio-based queues. */
static struct attribute *blk_mq_queue_attrs[] = {
&queue_requests_entry.attr,
&elv_iosched_entry.attr,
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 13e4377a8b28..16f5766620a4 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1320,6 +1320,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE),
tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE));
+ rcu_read_lock();
/*
* Update has_rules[] flags for the updated tg's subtree. A tg is
* considered to have rules if either the tg itself or any of its
@@ -1347,6 +1348,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
this_tg->latency_target = max(this_tg->latency_target,
parent_tg->latency_target);
}
+ rcu_read_unlock();
/*
* We're already holding queue_lock and know @tg is valid. Let's