From 14cb0dc6479dc5ebc63b3a459a5d89a2f1b39fed Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 15:40:43 +0800 Subject: block: don't let passthrough IO go into .make_request_fn() Commit a8821f3f3("block: Improvements to bounce-buffer handling") tries to make sure that the bio to .make_request_fn won't exceed BIO_MAX_PAGES, but ignores that passthrough I/O can use blk_queue_bounce() too. Especially, passthrough IO may not be sector-aligned, and the check of 'sectors < bio_sectors(*bio_orig)' inside __blk_queue_bounce() may become true even though the max bvec number doesn't exceed BIO_MAX_PAGES, then cause the bio splitted, and the original passthrough bio is submited to generic_make_request(). This patch fixes this issue by checking if the bio is passthrough IO, and use bio_kmalloc() to allocate the cloned passthrough bio. Cc: NeilBrown Fixes: a8821f3f3("block: Improvements to bounce-buffer handling") Tested-by: Michele Ballabio Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8089ca17db9a..abd06f540863 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -241,14 +241,24 @@ struct request { struct request *next_rq; }; +static inline bool blk_op_is_scsi(unsigned int op) +{ + return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT; +} + +static inline bool blk_op_is_private(unsigned int op) +{ + return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; +} + static inline bool blk_rq_is_scsi(struct request *rq) { - return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; + return blk_op_is_scsi(req_op(rq)); } static inline bool blk_rq_is_private(struct request *rq) { - return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; + return blk_op_is_private(req_op(rq)); } static inline bool blk_rq_is_passthrough(struct request *rq) @@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq) return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); } +static inline bool bio_is_passthrough(struct bio *bio) +{ + unsigned op = bio_op(bio); + + return blk_op_is_scsi(op) || blk_op_is_private(op); +} + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; -- cgit v1.2.3-73-gaa49b From 0abc2a10389f0c9070f76ca906c7382788036b93 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Dec 2017 15:40:44 +0800 Subject: block: fix blk_rq_append_bio Commit caa4b02476e3(blk-map: call blk_queue_bounce from blk_rq_append_bio) moves blk_queue_bounce() into blk_rq_append_bio(), but don't consider the fact that the bounced bio becomes invisible to caller since the parameter type is 'struct bio *'. Make it a pointer to a pointer to a bio, so the caller sees the right bio also after a bounce. Fixes: caa4b02476e3 ("blk-map: call blk_queue_bounce from blk_rq_append_bio") Cc: Christoph Hellwig Reported-by: Michele Ballabio (handling failure of blk_rq_append_bio(), only call bio_get() after blk_rq_append_bio() returns OK) Tested-by: Michele Ballabio Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-map.c | 38 ++++++++++++++++++++++---------------- drivers/scsi/osd/osd_initiator.c | 4 +++- drivers/target/target_core_pscsi.c | 4 ++-- include/linux/blkdev.h | 2 +- 4 files changed, 28 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/block/blk-map.c b/block/blk-map.c index b21f8e86f120..d3a94719f03f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -12,22 +12,29 @@ #include "blk.h" /* - * Append a bio to a passthrough request. Only works can be merged into - * the request based on the driver constraints. + * Append a bio to a passthrough request. Only works if the bio can be merged + * into the request based on the driver constraints. */ -int blk_rq_append_bio(struct request *rq, struct bio *bio) +int blk_rq_append_bio(struct request *rq, struct bio **bio) { - blk_queue_bounce(rq->q, &bio); + struct bio *orig_bio = *bio; + + blk_queue_bounce(rq->q, bio); if (!rq->bio) { - blk_rq_bio_prep(rq->q, rq, bio); + blk_rq_bio_prep(rq->q, rq, *bio); } else { - if (!ll_back_merge_fn(rq->q, rq, bio)) + if (!ll_back_merge_fn(rq->q, rq, *bio)) { + if (orig_bio != *bio) { + bio_put(*bio); + *bio = orig_bio; + } return -EINVAL; + } - rq->biotail->bi_next = bio; - rq->biotail = bio; - rq->__data_len += bio->bi_iter.bi_size; + rq->biotail->bi_next = *bio; + rq->biotail = *bio; + rq->__data_len += (*bio)->bi_iter.bi_size; } return 0; @@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq, * We link the bounce buffer in and could have to traverse it * later so we have to get a ref to prevent it from being freed */ - ret = blk_rq_append_bio(rq, bio); - bio_get(bio); + ret = blk_rq_append_bio(rq, &bio); if (ret) { - bio_endio(bio); __blk_rq_unmap_user(orig_bio); - bio_put(bio); return ret; } + bio_get(bio); return 0; } @@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, int reading = rq_data_dir(rq) == READ; unsigned long addr = (unsigned long) kbuf; int do_copy = 0; - struct bio *bio; + struct bio *bio, *orig_bio; int ret; if (len > (queue_max_hw_sectors(q) << 9)) @@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (do_copy) rq->rq_flags |= RQF_COPY_USER; - ret = blk_rq_append_bio(rq, bio); + orig_bio = bio; + ret = blk_rq_append_bio(rq, &bio); if (unlikely(ret)) { /* request is too big */ - bio_put(bio); + bio_put(orig_bio); return ret; } diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index a4f28b7e4c65..e18877177f1b 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write, return req; for_each_bio(bio) { - ret = blk_rq_append_bio(req, bio); + struct bio *bounce_bio = bio; + + ret = blk_rq_append_bio(req, &bounce_bio); if (ret) return ERR_PTR(ret); } diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 7c69b4a9694d..0d99b242e82e 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, " %d i: %d bio: %p, allocating another" " bio\n", bio->bi_vcnt, i, bio); - rc = blk_rq_append_bio(req, bio); + rc = blk_rq_append_bio(req, &bio); if (rc) { pr_err("pSCSI: failed to append bio\n"); goto fail; @@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, } if (bio) { - rc = blk_rq_append_bio(req, bio); + rc = blk_rq_append_bio(req, &bio); if (rc) { pr_err("pSCSI: failed to append bio\n"); goto fail; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index abd06f540863..100d0df38026 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -965,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); -extern int blk_rq_append_bio(struct request *rq, struct bio *bio); +extern int blk_rq_append_bio(struct request *rq, struct bio **bio); extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_queue_split(struct request_queue *, struct bio **); extern void blk_recount_segments(struct request_queue *, struct bio *); -- cgit v1.2.3-73-gaa49b From 111be883981748acc9a56e855c8336404a8e787c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Dec 2017 11:10:17 -0700 Subject: block-throttle: avoid double charge If a bio is throttled and split after throttling, the bio could be resubmited and enters the throttling again. This will cause part of the bio to be charged multiple times. If the cgroup has an IO limit, the double charge will significantly harm the performance. The bio split becomes quite common after arbitrary bio size change. To fix this, we always set the BIO_THROTTLED flag if a bio is throttled. If the bio is cloned/split, we copy the flag to new bio too to avoid a double charge. However, cloned bio could be directed to a new disk, keeping the flag be a problem. The observation is we always set new disk for the bio in this case, so we can clear the flag in bio_set_dev(). This issue exists for a long time, arbitrary bio size change just makes it worse, so this should go into stable at least since v4.2. V1-> V2: Not add extra field in bio based on discussion with Tejun Cc: Vivek Goyal Cc: stable@vger.kernel.org Acked-by: Tejun Heo Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/bio.c | 2 ++ block/blk-throttle.c | 8 +------- include/linux/bio.h | 2 ++ include/linux/blk_types.h | 9 ++++----- 4 files changed, 9 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/block/bio.c b/block/bio.c index 8bfdea58159b..9ef6cf3addb3 100644 --- a/block/bio.c +++ b/block/bio.c @@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_disk = bio_src->bi_disk; bio->bi_partno = bio_src->bi_partno; bio_set_flag(bio, BIO_CLONED); + if (bio_flagged(bio_src, BIO_THROTTLED)) + bio_set_flag(bio, BIO_THROTTLED); bio->bi_opf = bio_src->bi_opf; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter = bio_src->bi_iter; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 825bc29767e6..d19f416d6101 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2226,13 +2226,7 @@ again: out_unlock: spin_unlock_irq(q->queue_lock); out: - /* - * As multiple blk-throtls may stack in the same issue path, we - * don't want bios to leave with the flag set. Clear the flag if - * being issued. - */ - if (!throttled) - bio_clear_flag(bio, BIO_THROTTLED); + bio_set_flag(bio, BIO_THROTTLED); #ifdef CONFIG_BLK_DEV_THROTTLING_LOW if (throttled || !td->track_bio_latency) diff --git a/include/linux/bio.h b/include/linux/bio.h index 82f0c8fd7be8..23d29b39f71e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx); #define bio_set_dev(bio, bdev) \ do { \ + if ((bio)->bi_disk != (bdev)->bd_disk) \ + bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ } while (0) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1e628e032da..9e7d8bd776d2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -50,8 +50,6 @@ struct blk_issue_stat { struct bio { struct bio *bi_next; /* request queue link */ struct gendisk *bi_disk; - u8 bi_partno; - blk_status_t bi_status; unsigned int bi_opf; /* bottom bits req flags, * top bits REQ_OP. Use * accessors. @@ -59,8 +57,8 @@ struct bio { unsigned short bi_flags; /* status, etc and bvec pool number */ unsigned short bi_ioprio; unsigned short bi_write_hint; - - struct bvec_iter bi_iter; + blk_status_t bi_status; + u8 bi_partno; /* Number of segments in this BIO after * physical address coalescing is performed. @@ -74,8 +72,9 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; - atomic_t __bi_remaining; + struct bvec_iter bi_iter; + atomic_t __bi_remaining; bio_end_io_t *bi_end_io; void *bi_private; -- cgit v1.2.3-73-gaa49b From 4ccafe032005e9b96acbef2e389a4de5b1254add Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Dec 2017 13:13:58 -0700 Subject: block: unalign call_single_data in struct request A previous change blindly added massive alignment to the call_single_data structure in struct request. This ballooned it in size from 296 to 320 bytes on my setup, for no valid reason at all. Use the unaligned struct __call_single_data variant instead. Fixes: 966a967116e69 ("smp: Avoid using two cache lines for struct call_single_data") Cc: stable@vger.kernel.org # v4.14 Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 100d0df38026..0ce8a372d506 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t; struct request { struct list_head queuelist; union { - call_single_data_t csd; + struct __call_single_data csd; u64 fifo_time; }; -- cgit v1.2.3-73-gaa49b