From c6b7a3a26e809c9d2a51ae303764c1d2994f31cf Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 24 Jun 2023 21:01:05 +0800 Subject: blk-mq: fix two misuses on RQF_USE_SCHED Request allocated from sched tags can't be issued via ->queue_rqs() directly, since driver tag isn't allocated yet. This is the 1st misuse of RQF_USE_SCHED for figuring out plug->has_elevator. Request allocated from sched tags can't be ended by blk_mq_end_request_batch() too, fix the 2nd RQF_USE_SCHED misuse in blk_mq_add_to_batch(). Without this patch, NVMe uring cmd passthrough IO workload can run into hang easily with real io scheduler. Fixes: dd6216bb16e8 ("blk-mq: make sure elevator callbacks aren't called for passthrough request") Reported-by: Guangwu Zhang Closes: https://lore.kernel.org/linux-block/CAGS2=YrBjpLPOKa-gzcKuuOG60AGth5794PNCDwatdnnscB9ug@mail.gmail.com/ Cc: Christoph Hellwig Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230624130105.1443879-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f401067ac03a..aaed687a454c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -852,7 +852,11 @@ static inline bool blk_mq_add_to_batch(struct request *req, struct io_comp_batch *iob, int ioerror, void (*complete)(struct io_comp_batch *)) { - if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror || + /* + * blk_mq_end_request_batch() can't end request allocated from + * sched tags + */ + if (!iob || (req->rq_flags & RQF_SCHED_TAGS) || ioerror || (req->end_io && !blk_rq_is_passthrough(req))) return false; -- cgit From f6c80cffcd47a2d41943e3a41fbe9034d9f6d7b0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 12 Jun 2023 12:03:42 -0700 Subject: block: add request polling helper Provide a direct request polling will for drivers. The interface does not require a bio, and can skip the overhead associated with polling those. The biggest gain from skipping the relatively expensive xarray lookup unnecessary when you already have the request. With this, the simple rq/qc conversion functions have only one caller each, so open code this and remove the helpers. Signed-off-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230612190343.2087040-2-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 48 ++++++++++++++++++++++++++++++++---------------- include/linux/blk-mq.h | 2 ++ 2 files changed, 34 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 98eb31ff914d..5504719b970d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -49,17 +49,8 @@ static void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags); static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list); - -static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, - blk_qc_t qc) -{ - return xa_load(&q->hctx_table, qc); -} - -static inline blk_qc_t blk_rq_to_qc(struct request *rq) -{ - return rq->mq_hctx->queue_num; -} +static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + struct io_comp_batch *iob, unsigned int flags); /* * Check if any of the ctx, dispatch list or elevator @@ -1248,7 +1239,7 @@ void blk_mq_start_request(struct request *rq) q->integrity.profile->prepare_fn(rq); #endif if (rq->bio && rq->bio->bi_opf & REQ_POLLED) - WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq)); + WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num); } EXPORT_SYMBOL(blk_mq_start_request); @@ -1354,7 +1345,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll); static void blk_rq_poll_completion(struct request *rq, struct completion *wait) { do { - blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0); + blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0); cond_resched(); } while (!completion_done(wait)); } @@ -4749,10 +4740,9 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) } EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); -int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, - unsigned int flags) +static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + struct io_comp_batch *iob, unsigned int flags) { - struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); long state = get_current_state(); int ret; @@ -4777,6 +4767,32 @@ int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch * return 0; } +int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, + struct io_comp_batch *iob, unsigned int flags) +{ + struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie); + + return blk_hctx_poll(q, hctx, iob, flags); +} + +int blk_rq_poll(struct request *rq, struct io_comp_batch *iob, + unsigned int poll_flags) +{ + struct request_queue *q = rq->q; + int ret; + + if (!blk_rq_is_poll(rq)) + return 0; + if (!percpu_ref_tryget(&q->q_usage_counter)) + return 0; + + ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags); + blk_queue_exit(q); + + return ret; +} +EXPORT_SYMBOL_GPL(blk_rq_poll); + unsigned int blk_mq_rq_cpu(struct request *rq) { return rq->mq_ctx->cpu; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index aaed687a454c..2b7fb8e87793 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -715,6 +715,8 @@ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_free_request(struct request *rq); +int blk_rq_poll(struct request *rq, struct io_comp_batch *iob, + unsigned int poll_flags); bool blk_mq_queue_inflight(struct request_queue *q); -- cgit