diff options
Diffstat (limited to 'drivers/md/raid10.c')
| -rw-r--r-- | drivers/md/raid10.c | 154 |
1 files changed, 83 insertions, 71 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index dde98f65bd04..b369ebb965a9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -861,7 +861,6 @@ static void flush_pending_writes(struct r10conf *conf) struct bio *bio; bio = bio_list_get(&conf->pending_bio_list); - conf->pending_count = 0; spin_unlock_irq(&conf->device_lock); /* @@ -952,8 +951,10 @@ static void lower_barrier(struct r10conf *conf) wake_up(&conf->wait_barrier); } -static void wait_barrier(struct r10conf *conf) +static bool wait_barrier(struct r10conf *conf, bool nowait) { + bool ret = true; + spin_lock_irq(&conf->resync_lock); if (conf->barrier) { struct bio_list *bio_list = current->bio_list; @@ -967,27 +968,35 @@ static void wait_barrier(struct r10conf *conf) * that queue to get the nr_pending * count down. */ - raid10_log(conf->mddev, "wait barrier"); - wait_event_lock_irq(conf->wait_barrier, - !conf->barrier || - (atomic_read(&conf->nr_pending) && - bio_list && - (!bio_list_empty(&bio_list[0]) || - !bio_list_empty(&bio_list[1]))) || - /* move on if recovery thread is - * blocked by us - */ - (conf->mddev->thread->tsk == current && - test_bit(MD_RECOVERY_RUNNING, - &conf->mddev->recovery) && - conf->nr_queued > 0), - conf->resync_lock); + /* Return false when nowait flag is set */ + if (nowait) { + ret = false; + } else { + raid10_log(conf->mddev, "wait barrier"); + wait_event_lock_irq(conf->wait_barrier, + !conf->barrier || + (atomic_read(&conf->nr_pending) && + bio_list && + (!bio_list_empty(&bio_list[0]) || + !bio_list_empty(&bio_list[1]))) || + /* move on if recovery thread is + * blocked by us + */ + (conf->mddev->thread->tsk == current && + test_bit(MD_RECOVERY_RUNNING, + &conf->mddev->recovery) && + conf->nr_queued > 0), + conf->resync_lock); + } conf->nr_waiting--; if (!conf->nr_waiting) wake_up(&conf->wait_barrier); } - atomic_inc(&conf->nr_pending); + /* Only increment nr_pending when we wait */ + if (ret) + atomic_inc(&conf->nr_pending); spin_unlock_irq(&conf->resync_lock); + return ret; } static void allow_barrier(struct r10conf *conf) @@ -1044,16 +1053,9 @@ static sector_t choose_data_offset(struct r10bio *r10_bio, return rdev->new_data_offset; } -struct raid10_plug_cb { - struct blk_plug_cb cb; - struct bio_list pending; - int pending_cnt; -}; - static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) { - struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb, - cb); + struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, cb); struct mddev *mddev = plug->cb.data; struct r10conf *conf = mddev->private; struct bio *bio; @@ -1061,7 +1063,6 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) if (from_schedule || current->bio_list) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->pending_bio_list, &plug->pending); - conf->pending_count += plug->pending_cnt; spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_barrier); md_wakeup_thread(mddev->thread); @@ -1098,21 +1099,30 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) * currently. * 2. If IO spans the reshape position. Need to wait for reshape to pass. */ -static void regular_request_wait(struct mddev *mddev, struct r10conf *conf, +static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf, struct bio *bio, sector_t sectors) { - wait_barrier(conf); + /* Bail out if REQ_NOWAIT is set for the bio */ + if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) { + bio_wouldblock_error(bio); + return false; + } while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && bio->bi_iter.bi_sector < conf->reshape_progress && bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { - raid10_log(conf->mddev, "wait reshape"); allow_barrier(conf); + if (bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return false; + } + raid10_log(conf->mddev, "wait reshape"); wait_event(conf->wait_barrier, conf->reshape_progress <= bio->bi_iter.bi_sector || conf->reshape_progress >= bio->bi_iter.bi_sector + sectors); - wait_barrier(conf); + wait_barrier(conf, false); } + return true; } static void raid10_read_request(struct mddev *mddev, struct bio *bio, @@ -1157,7 +1167,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, rcu_read_unlock(); } - regular_request_wait(mddev, conf, bio, r10_bio->sectors); + if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) + return; rdev = read_balance(conf, r10_bio, &max_sectors); if (!rdev) { if (err_rdev) { @@ -1179,7 +1190,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, bio_chain(split, bio); allow_barrier(conf); submit_bio_noacct(bio); - wait_barrier(conf); + wait_barrier(conf, false); bio = split; r10_bio->master_bio = bio; r10_bio->sectors = max_sectors; @@ -1188,14 +1199,13 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) r10_bio->start_time = bio_start_io_acct(bio); - read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); + read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set); r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].rdev = rdev; read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + choose_data_offset(r10_bio, rdev); - bio_set_dev(read_bio, rdev->bdev); read_bio->bi_end_io = raid10_end_read_request; bio_set_op_attrs(read_bio, op, do_sync); if (test_bit(FailFast, &rdev->flags) && @@ -1219,7 +1229,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, const unsigned long do_fua = (bio->bi_opf & REQ_FUA); unsigned long flags; struct blk_plug_cb *cb; - struct raid10_plug_cb *plug = NULL; + struct raid1_plug_cb *plug = NULL; struct r10conf *conf = mddev->private; struct md_rdev *rdev; int devnum = r10_bio->devs[n_copy].devnum; @@ -1235,7 +1245,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, } else rdev = conf->mirrors[devnum].rdev; - mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set); if (replacement) r10_bio->devs[n_copy].repl_bio = mbio; else @@ -1243,7 +1253,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + choose_data_offset(r10_bio, rdev)); - bio_set_dev(mbio, rdev->bdev); mbio->bi_end_io = raid10_end_write_request; bio_set_op_attrs(mbio, op, do_sync | do_fua); if (!replacement && test_bit(FailFast, @@ -1262,16 +1271,14 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug)); if (cb) - plug = container_of(cb, struct raid10_plug_cb, cb); + plug = container_of(cb, struct raid1_plug_cb, cb); else plug = NULL; if (plug) { bio_list_add(&plug->pending, mbio); - plug->pending_cnt++; } else { spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; spin_unlock_irqrestore(&conf->device_lock, flags); md_wakeup_thread(mddev->thread); } @@ -1338,7 +1345,7 @@ retry_wait: raid10_log(conf->mddev, "%s wait rdev %d blocked", __func__, blocked_rdev->raid_disk); md_wait_for_blocked_rdev(blocked_rdev, mddev); - wait_barrier(conf); + wait_barrier(conf, false); goto retry_wait; } } @@ -1356,6 +1363,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, bio->bi_iter.bi_sector, bio_end_sector(bio)))) { DEFINE_WAIT(w); + /* Bail out if REQ_NOWAIT is set for the bio */ + if (bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return; + } for (;;) { prepare_to_wait(&conf->wait_barrier, &w, TASK_IDLE); @@ -1368,7 +1380,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, } sectors = r10_bio->sectors; - regular_request_wait(mddev, conf, bio, sectors); + if (!regular_request_wait(mddev, conf, bio, sectors)) + return; if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && (mddev->reshape_backwards ? (bio->bi_iter.bi_sector < conf->reshape_safe && @@ -1380,6 +1393,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, set_mask_bits(&mddev->sb_flags, 0, BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); md_wakeup_thread(mddev->thread); + if (bio->bi_opf & REQ_NOWAIT) { + allow_barrier(conf); + bio_wouldblock_error(bio); + return; + } raid10_log(conf->mddev, "wait reshape metadata"); wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); @@ -1387,12 +1405,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, conf->reshape_safe = mddev->reshape_position; } - if (conf->pending_count >= max_queued_requests) { - md_wakeup_thread(mddev->thread); - raid10_log(mddev, "wait queued"); - wait_event(conf->wait_barrier, - conf->pending_count < max_queued_requests); - } /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio @@ -1482,7 +1494,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, bio_chain(split, bio); allow_barrier(conf); submit_bio_noacct(bio); - wait_barrier(conf); + wait_barrier(conf, false); bio = split; r10_bio->master_bio = bio; } @@ -1607,7 +1619,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) return -EAGAIN; - wait_barrier(conf); + if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) { + bio_wouldblock_error(bio); + return 0; + } + wait_barrier(conf, false); /* * Check reshape again to avoid reshape happens after checking @@ -1649,7 +1665,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) allow_barrier(conf); /* Resend the fist split part */ submit_bio_noacct(split); - wait_barrier(conf); + wait_barrier(conf, false); } div_u64_rem(bio_end, stripe_size, &remainder); if (remainder) { @@ -1660,7 +1676,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) /* Resend the second split part */ submit_bio_noacct(bio); bio = split; - wait_barrier(conf); + wait_barrier(conf, false); } bio_start = bio->bi_iter.bi_sector; @@ -1783,7 +1799,8 @@ retry_discard: */ if (r10_bio->devs[disk].bio) { struct md_rdev *rdev = conf->mirrors[disk].rdev; - mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + mbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, + &mddev->bio_set); mbio->bi_end_io = raid10_end_discard_request; mbio->bi_private = r10_bio; r10_bio->devs[disk].bio = mbio; @@ -1796,7 +1813,8 @@ retry_discard: } if (r10_bio->devs[disk].repl_bio) { struct md_rdev *rrdev = conf->mirrors[disk].replacement; - rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + rbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, + &mddev->bio_set); rbio->bi_end_io = raid10_end_discard_request; rbio->bi_private = r10_bio; r10_bio->devs[disk].repl_bio = rbio; @@ -1816,7 +1834,7 @@ retry_discard: end_disk_offset += geo->stride; atomic_inc(&first_r10bio->remaining); raid_end_discard_bio(r10_bio); - wait_barrier(conf); + wait_barrier(conf, false); goto retry_discard; } @@ -2011,7 +2029,7 @@ static void print_conf(struct r10conf *conf) static void close_sync(struct r10conf *conf) { - wait_barrier(conf); + wait_barrier(conf, false); allow_barrier(conf); mempool_exit(&conf->r10buf_pool); @@ -2393,7 +2411,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) * bi_vecs, as the read request might have corrupted these */ rp = get_resync_pages(tbio); - bio_reset(tbio); + bio_reset(tbio, conf->mirrors[d].rdev->bdev, REQ_OP_WRITE); md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size); @@ -2401,7 +2419,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) tbio->bi_private = rp; tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; tbio->bi_end_io = end_sync_write; - bio_set_op_attrs(tbio, REQ_OP_WRITE, 0); bio_copy_data(tbio, fbio); @@ -2412,7 +2429,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) tbio->bi_opf |= MD_FAILFAST; tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset; - bio_set_dev(tbio, conf->mirrors[d].rdev->bdev); submit_bio_noacct(tbio); } @@ -2865,12 +2881,12 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) if (sectors > sect_to_write) sectors = sect_to_write; /* Write at 'sector' for 'sectors' */ - wbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + wbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, + &mddev->bio_set); bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); wbio->bi_iter.bi_sector = wsector + choose_data_offset(r10_bio, rdev); - bio_set_dev(wbio, rdev->bdev); bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (submit_bio_wait(wbio) < 0) @@ -3131,12 +3147,12 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf) for (i = 0; i < nalloc; i++) { bio = r10bio->devs[i].bio; rp = bio->bi_private; - bio_reset(bio); + bio_reset(bio, NULL, 0); bio->bi_private = rp; bio = r10bio->devs[i].repl_bio; if (bio) { rp = bio->bi_private; - bio_reset(bio); + bio_reset(bio, NULL, 0); bio->bi_private = rp; } } @@ -4819,7 +4835,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, if (need_flush || time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { /* Need to update reshape_position in metadata */ - wait_barrier(conf); + wait_barrier(conf, false); mddev->reshape_position = conf->reshape_progress; if (mddev->reshape_backwards) mddev->curr_resync_completed = raid10_size(mddev, 0, 0) @@ -4863,14 +4879,12 @@ read_more: return sectors_done; } - read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set); - - bio_set_dev(read_bio, rdev->bdev); + read_bio = bio_alloc_bioset(rdev->bdev, RESYNC_PAGES, REQ_OP_READ, + GFP_KERNEL, &mddev->bio_set); read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr + rdev->data_offset); read_bio->bi_private = r10_bio; read_bio->bi_end_io = end_reshape_read; - bio_set_op_attrs(read_bio, REQ_OP_READ, 0); r10_bio->master_bio = read_bio; r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; @@ -5242,5 +5256,3 @@ MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD"); MODULE_ALIAS("md-personality-9"); /* RAID10 */ MODULE_ALIAS("md-raid10"); MODULE_ALIAS("md-level-10"); - -module_param(max_queued_requests, int, S_IRUGO|S_IWUSR); |