From 51d638b1f56a0bfd9219800620994794a1a2b219 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 7 May 2017 00:14:22 -0700 Subject: [PATCH 01/10] block/mq: fix potential deadlock during cpu hotplug This can be triggered by hot-unplug one cpu. ====================================================== [ INFO: possible circular locking dependency detected ] 4.11.0+ #17 Not tainted ------------------------------------------------------- step_after_susp/2640 is trying to acquire lock: (all_q_mutex){+.+...}, at: [] blk_mq_queue_reinit_work+0x18/0x110 but task is already holding lock: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x7f/0xe0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (cpu_hotplug.lock){+.+.+.}: lock_acquire+0x11c/0x230 __mutex_lock+0x92/0x990 mutex_lock_nested+0x1b/0x20 get_online_cpus+0x64/0x80 blk_mq_init_allocated_queue+0x3a0/0x4e0 blk_mq_init_queue+0x3a/0x60 loop_add+0xe5/0x280 loop_init+0x124/0x177 do_one_initcall+0x53/0x1c0 kernel_init_freeable+0x1e3/0x27f kernel_init+0xe/0x100 ret_from_fork+0x31/0x40 -> #0 (all_q_mutex){+.+...}: __lock_acquire+0x189a/0x18a0 lock_acquire+0x11c/0x230 __mutex_lock+0x92/0x990 mutex_lock_nested+0x1b/0x20 blk_mq_queue_reinit_work+0x18/0x110 blk_mq_queue_reinit_dead+0x1c/0x20 cpuhp_invoke_callback+0x1f2/0x810 cpuhp_down_callbacks+0x42/0x80 _cpu_down+0xb2/0xe0 freeze_secondary_cpus+0xb6/0x390 suspend_devices_and_enter+0x3b3/0xa40 pm_suspend+0x129/0x490 state_store+0x82/0xf0 kobj_attr_store+0xf/0x20 sysfs_kf_write+0x45/0x60 kernfs_fop_write+0x135/0x1c0 __vfs_write+0x37/0x160 vfs_write+0xcd/0x1d0 SyS_write+0x58/0xc0 do_syscall_64+0x8f/0x710 return_from_SYSCALL_64+0x0/0x7a other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(cpu_hotplug.lock); lock(all_q_mutex); lock(cpu_hotplug.lock); lock(all_q_mutex); *** DEADLOCK *** 8 locks held by step_after_susp/2640: #0: (sb_writers#6){.+.+.+}, at: [] vfs_write+0x1ad/0x1d0 #1: (&of->mutex){+.+.+.}, at: [] kernfs_fop_write+0x101/0x1c0 #2: (s_active#166){.+.+.+}, at: [] kernfs_fop_write+0x109/0x1c0 #3: (pm_mutex){+.+...}, at: [] pm_suspend+0x21d/0x490 #4: (acpi_scan_lock){+.+.+.}, at: [] acpi_scan_lock_acquire+0x17/0x20 #5: (cpu_add_remove_lock){+.+.+.}, at: [] freeze_secondary_cpus+0x27/0x390 #6: (cpu_hotplug.dep_map){++++++}, at: [] cpu_hotplug_begin+0x5/0xe0 #7: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x7f/0xe0 stack backtrace: CPU: 3 PID: 2640 Comm: step_after_susp Not tainted 4.11.0+ #17 Hardware name: Dell Inc. OptiPlex 7040/0JCTF8, BIOS 1.4.9 09/12/2016 Call Trace: dump_stack+0x99/0xce print_circular_bug+0x1fa/0x270 __lock_acquire+0x189a/0x18a0 lock_acquire+0x11c/0x230 ? lock_acquire+0x11c/0x230 ? blk_mq_queue_reinit_work+0x18/0x110 ? blk_mq_queue_reinit_work+0x18/0x110 __mutex_lock+0x92/0x990 ? blk_mq_queue_reinit_work+0x18/0x110 ? kmem_cache_free+0x2cb/0x330 ? anon_transport_class_unregister+0x20/0x20 ? blk_mq_queue_reinit_work+0x110/0x110 mutex_lock_nested+0x1b/0x20 ? mutex_lock_nested+0x1b/0x20 blk_mq_queue_reinit_work+0x18/0x110 blk_mq_queue_reinit_dead+0x1c/0x20 cpuhp_invoke_callback+0x1f2/0x810 ? __flow_cache_shrink+0x160/0x160 cpuhp_down_callbacks+0x42/0x80 _cpu_down+0xb2/0xe0 freeze_secondary_cpus+0xb6/0x390 suspend_devices_and_enter+0x3b3/0xa40 ? rcu_read_lock_sched_held+0x79/0x80 pm_suspend+0x129/0x490 state_store+0x82/0xf0 kobj_attr_store+0xf/0x20 sysfs_kf_write+0x45/0x60 kernfs_fop_write+0x135/0x1c0 __vfs_write+0x37/0x160 ? rcu_read_lock_sched_held+0x79/0x80 ? rcu_sync_lockdep_assert+0x2f/0x60 ? __sb_start_write+0xd9/0x1c0 ? vfs_write+0x1ad/0x1d0 vfs_write+0xcd/0x1d0 SyS_write+0x58/0xc0 ? rcu_read_lock_sched_held+0x79/0x80 do_syscall_64+0x8f/0x710 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL64_slow_path+0x25/0x25 The cpu hotplug path will hold cpu_hotplug.lock and then reinit all exiting queues for blk mq w/ all_q_mutex, however, blk_mq_init_allocated_queue() will contend these two locks in the inversion order. This is due to commit eabe06595d62 (blk/mq: Cure cpu hotplug lock inversion), it fixes a cpu hotplug lock inversion issue because of hotplug rework, however the hotplug rework is still work-in-progress and lives in a -tip branch and mainline cannot yet trigger that splat. The commit breaks the linus's tree in the merge window, so this patch reverts the lock order and avoids to splat linus's tree. Cc: Jens Axboe Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Signed-off-by: Wanpeng Li Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 5d4ce7eb8dbf..a7e6b353e4e9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2341,15 +2341,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, blk_mq_init_cpu_queues(q, set->nr_hw_queues); - mutex_lock(&all_q_mutex); get_online_cpus(); + mutex_lock(&all_q_mutex); list_add_tail(&q->all_q_node, &all_q_list); blk_mq_add_queue_tag_set(set, q); blk_mq_map_swqueue(q, cpu_online_mask); - put_online_cpus(); mutex_unlock(&all_q_mutex); + put_online_cpus(); if (!(set->flags & BLK_MQ_F_NO_SCHED)) { int ret; From 629b1b2e0e6c8285fdc21624af60e8190fa4417e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 7 May 2017 16:14:44 +0200 Subject: [PATCH 02/10] lightnvm: remove unused rq parameter of nvme_nvm_rqtocmd() to kill warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc 4.1.2: drivers/nvme/host/lightnvm.c: In function ‘nvme_nvm_submit_io’: drivers/nvme/host/lightnvm.c:498: warning: ‘rq’ is used uninitialized in this function Indeed, since commit 2e13f33a2464fc3a ("lightnvm: create cmd before allocating request"), the request is passed to nvme_nvm_rqtocmd() before it is allocated. Fortunately, as of commit 91276162de9476b8 ("lightnvm: refactor end_io functions for sync"), nvme_nvm_rqtocmd () no longer uses the passed request, so this warning is a false positive. Drop the unused parameter to clean up the code and kill the warning. Fixes: 2e13f33a2464fc3a ("lightnvm: create cmd before allocating request") Fixes: 91276162de9476b8 ("lightnvm: refactor end_io functions for sync") Signed-off-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 8c4adac6fafc..206bfdbc1c98 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -464,8 +464,8 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, return ret; } -static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, - struct nvme_ns *ns, struct nvme_nvm_command *c) +static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, + struct nvme_nvm_command *c) { c->ph_rw.opcode = rqd->opcode; c->ph_rw.nsid = cpu_to_le32(ns->ns_id); @@ -503,7 +503,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) if (!cmd) return -ENOMEM; - nvme_nvm_rqtocmd(rq, rqd, ns, cmd); + nvme_nvm_rqtocmd(rqd, ns, cmd); rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); if (IS_ERR(rq)) { From ebd768579552a10682c2cbdfa657779dea62a01d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 8 May 2017 11:44:40 +0100 Subject: [PATCH 03/10] blk-mq: make __blk_mq_stop_hw_queues static Making __blk_mq_stop_hw_queues static fixes sparse warning: block/blk-mq.c:6: warning: symbol '__blk_mq_stop_hw_queues' was not declared. Should it be static? Fixes: 2719aa217e0d0 ("blk-mq: don't use sync workqueue flushing from drivers") Signed-off-by: Colin Ian King Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a7e6b353e4e9..9ae1d9ccf4df 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1236,7 +1236,7 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) } EXPORT_SYMBOL(blk_mq_stop_hw_queue); -void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync) +static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync) { struct blk_mq_hw_ctx *hctx; int i; From fba704b494fdc6816a039a66887274b4e5c00eeb Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Tue, 9 May 2017 08:48:25 -0600 Subject: [PATCH 04/10] nvme: lightnvm: fix memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free up kmalloc allocated memory if failure happens while handling L2P table transfer in nvme_nvm_get_l2p_tbl. Fixes: 8e79b5cb ("lightnvm: move block provisioning to targets") Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 206bfdbc1c98..f5df78ed1e10 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -367,7 +367,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, if (unlikely(elba > nvmdev->total_secs)) { pr_err("nvm: L2P data from device is out of bounds!\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } /* Transform physical address to target address space */ From a66c38a171ed25488debf80247a9e72e1026e82c Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 9 May 2017 11:37:27 +0200 Subject: [PATCH 05/10] block, bfq: use pointer entity->sched_data only if set In the function __bfq_deactivate_entity, the pointer entity->sched_data could happen to be used before being properly initialized. This led to a NULL pointer dereference. This commit fixes this bug by just using this pointer only where it is safe to do so. Reported-by: Tom Harrison Tested-by: Tom Harrison Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-wf2q.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index b4fc3e4260b7..8726ede19eef 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1114,12 +1114,21 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity, bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) { struct bfq_sched_data *sd = entity->sched_data; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); - int is_in_service = entity == sd->in_service_entity; + struct bfq_service_tree *st; + bool is_in_service; if (!entity->on_st) /* entity never activated, or already inactive */ return false; + /* + * If we get here, then entity is active, which implies that + * bfq_group_set_parent has already been invoked for the group + * represented by entity. Therefore, the field + * entity->sched_data has been set, and we can safely use it. + */ + st = bfq_entity_service_tree(entity); + is_in_service = entity == sd->in_service_entity; + if (is_in_service) bfq_calc_finish(entity, entity->service); From 43c1b3d6e536b7b21ed329ae54280d8ba308ba92 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 9 May 2017 12:54:23 +0200 Subject: [PATCH 06/10] block, bfq: stress that low_latency must be off to get max throughput The introduction of the BFQ and Kyber I/O schedulers has triggered a new wave of I/O benchmarks. Unfortunately, comments and discussions on these benchmarks confirm that there is still little awareness that it is very hard to achieve, at the same time, a low latency and a high throughput. In particular, virtually all benchmarks measure throughput, or throughput-related figures of merit, but, for BFQ, they use the scheduler in its default configuration. This configuration is geared, instead, toward a low latency. This is evidently a sign that BFQ documentation is still too unclear on this important aspect. This commit addresses this issue by stressing how BFQ configuration must be (easily) changed if the only goal is maximum throughput. Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- Documentation/block/bfq-iosched.txt | 17 ++++++++++++++++- block/bfq-iosched.c | 5 +++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt index 1b87df6cd476..05e2822a80b3 100644 --- a/Documentation/block/bfq-iosched.txt +++ b/Documentation/block/bfq-iosched.txt @@ -11,6 +11,13 @@ controllers), BFQ's main features are: groups (switching back to time distribution when needed to keep throughput high). +In its default configuration, BFQ privileges latency over +throughput. So, when needed for achieving a lower latency, BFQ builds +schedules that may lead to a lower throughput. If your main or only +goal, for a given device, is to achieve the maximum-possible +throughput at all times, then do switch off all low-latency heuristics +for that device, by setting low_latency to 0. Full details in Section 3. + On average CPUs, the current version of BFQ can handle devices performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a reference, 30-50 KIOPS correspond to very high bandwidths with @@ -375,11 +382,19 @@ default, low latency mode is enabled. If enabled, interactive and soft real-time applications are privileged and experience a lower latency, as explained in more detail in the description of how BFQ works. -DO NOT enable this mode if you need full control on bandwidth +DISABLE this mode if you need full control on bandwidth distribution. In fact, if it is enabled, then BFQ automatically increases the bandwidth share of privileged applications, as the main means to guarantee a lower latency to them. +In addition, as already highlighted at the beginning of this document, +DISABLE this mode if your only goal is to achieve a high throughput. +In fact, privileging the I/O of some application over the rest may +entail a lower throughput. To achieve the highest-possible throughput +on a non-rotational device, setting slice_idle to 0 may be needed too +(at the cost of giving up any strong guarantee on fairness and low +latency). + timeout_sync ------------ diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index bd8499ef157c..08ce45096350 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -56,6 +56,11 @@ * rotational or flash-based devices, and to get the job done quickly * for applications consisting in many I/O-bound processes. * + * NOTE: if the main or only goal, with a given device, is to achieve + * the maximum-possible throughput at all times, then do switch off + * all low-latency heuristics for that device, by setting low_latency + * to 0. + * * BFQ is described in [1], where also a reference to the initial, more * theoretical paper on BFQ can be found. The interested reader can find * in the latter paper full details on the main algorithm, as well as From 340ff3216799a947fe0b07bed8f0409ffc716be9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 May 2017 07:40:04 -0600 Subject: [PATCH 07/10] elevator: remove redundant warnings on IO scheduler switch We warn twice for switching to a scheduler, if that switch fails. As we also report the failure in the return value to the sysfs write, remove the dmesg induced failures. Keep the failure print for warning to switch to the kconfig selected IO scheduler, as we can't report errors for that in any other way. Signed-off-by: Jens Axboe --- block/elevator.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index ab726a5c0bf6..dac99fbfc273 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1062,10 +1062,8 @@ static int __elevator_change(struct request_queue *q, const char *name) strlcpy(elevator_name, name, sizeof(elevator_name)); e = elevator_get(strstrip(elevator_name), true); - if (!e) { - printk(KERN_ERR "elevator: type %s not found\n", elevator_name); + if (!e) return -EINVAL; - } if (q->elevator && !strcmp(elevator_name, q->elevator->type->elevator_name)) { @@ -1105,7 +1103,6 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, if (!ret) return count; - printk(KERN_ERR "elevator: switch to %s failed\n", name); return ret; } From d3738123986954ba3abbd96b595f5176b50c3f5d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 May 2017 11:39:56 -0600 Subject: [PATCH 08/10] blk-stat: don't use this_cpu_ptr() in a preemptable section If PREEMPT_RCU is enabled, rcu_read_lock() isn't strong enough for us to use this_cpu_ptr() in that section. Use the safer get/put_cpu_ptr() variants instead. Reported-by: Mike Galbraith Fixes: 34dbad5d26e2 ("blk-stat: convert to callback-based statistics reporting") Signed-off-by: Jens Axboe --- block/blk-stat.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/block/blk-stat.c b/block/blk-stat.c index 6c2f40940439..c52356d90fe3 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -96,13 +96,16 @@ void blk_stat_add(struct request *rq) rcu_read_lock(); list_for_each_entry_rcu(cb, &q->stats->callbacks, list) { - if (blk_stat_is_active(cb)) { - bucket = cb->bucket_fn(rq); - if (bucket < 0) - continue; - stat = &this_cpu_ptr(cb->cpu_stat)[bucket]; - __blk_stat_add(stat, value); - } + if (!blk_stat_is_active(cb)) + continue; + + bucket = cb->bucket_fn(rq); + if (bucket < 0) + continue; + + stat = &get_cpu_ptr(cb->cpu_stat)[bucket]; + __blk_stat_add(stat, value); + put_cpu_ptr(cb->cpu_stat); } rcu_read_unlock(); } From f36ea50ca0043e7b1204feaf1d2ba6bd68c08d36 Mon Sep 17 00:00:00 2001 From: Wen Xiong Date: Wed, 10 May 2017 08:54:11 -0500 Subject: [PATCH 09/10] blk-mq: NVMe 512B/4K+T10 DIF/DIX format returns I/O error on dd with split op When formatting NVMe to 512B/4K + T10 DIf/DIX, dd with split op returns "Input/output error". Looks block layer split the bio after calling bio_integrity_prep(bio). This patch fixes the issue. Below is how we debug this issue: (1)format nvme to 4K block # size with type 2 DIF (2)dd with block size bigger than 1024k. oflag=direct dd: error writing '/dev/nvme0n1': Input/output error We added some debug code in nvme device driver. It showed us the first op and the second op have the same bi and pi address. This is not correct. 1st op: nvme0n1 Op:Wr slba 0x505 length 0x100, PI ctrl=0x1400, dsmgmt=0x0, AT=0x0 & RT=0x505 Guard 0x00b1, AT 0x0000, RT physical 0x00000505 RT virtual 0x00002828 2nd op: nvme0n1 Op:Wr slba 0x605 length 0x1, PI ctrl=0x1400, dsmgmt=0x0, AT=0x0 & RT=0x605 ==> This op fails and subsequent 5 retires.. Guard 0x00b1, AT 0x0000, RT physical 0x00000605 RT virtual 0x00002828 With the fix, It showed us both of the first op and the second op have correct bi and pi address. 1st op: nvme2n1 Op:Wr slba 0x505 length 0x100, PI ctrl=0x1400, dsmgmt=0x0, AT=0x0 & RT=0x505 Guard 0x5ccb, AT 0x0000, RT physical 0x00000505 RT virtual 0x00002828 2nd op: nvme2n1 Op:Wr slba 0x605 length 0x1, PI ctrl=0x1400, dsmgmt=0x0, AT=0x0 & RT=0x605 Guard 0xab4c, AT 0x0000, RT physical 0x00000605 RT virtual 0x00003028 Signed-off-by: Wen Xiong Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 9ae1d9ccf4df..a69ad122ed66 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1554,13 +1554,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); + blk_queue_split(q, &bio, q->bio_split); + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio_io_error(bio); return BLK_QC_T_NONE; } - blk_queue_split(q, &bio, q->bio_split); - if (!is_flush_fua && !blk_queue_nomerges(q) && blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) return BLK_QC_T_NONE; From ed6565e734249ef021d5c13ba34c167eb4e42f62 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 May 2017 12:34:38 +0200 Subject: [PATCH 10/10] block: handle partial completions for special payload requests SCSI devices can return short writes on Write Same just like for normal writes, so we need to handle this case for our special payload requests as well. Signed-off-by: Christoph Hellwig Reported-by: Abdul Haleem Tested-by: Abdul Haleem Signed-off-by: Jens Axboe --- block/blk-core.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c580b0138a7f..c7068520794b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2644,8 +2644,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) return false; } - WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD); - req->__data_len -= total_bytes; /* update sector only for requests with clear definition of sector */ @@ -2658,17 +2656,19 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; } - /* - * If total number of sectors is less than the first segment - * size, something has gone terribly wrong. - */ - if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { - blk_dump_rq_flags(req, "request botched"); - req->__data_len = blk_rq_cur_bytes(req); - } + if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { + /* + * If total number of sectors is less than the first segment + * size, something has gone terribly wrong. + */ + if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { + blk_dump_rq_flags(req, "request botched"); + req->__data_len = blk_rq_cur_bytes(req); + } - /* recalculate the number of segments */ - blk_recalc_rq_segments(req); + /* recalculate the number of segments */ + blk_recalc_rq_segments(req); + } return true; }