aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c154
1 files changed, 83 insertions, 71 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index dde98f65bd04..b369ebb965a9 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -861,7 +861,6 @@ static void flush_pending_writes(struct r10conf *conf)
struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list);
- conf->pending_count = 0;
spin_unlock_irq(&conf->device_lock);
/*
@@ -952,8 +951,10 @@ static void lower_barrier(struct r10conf *conf)
wake_up(&conf->wait_barrier);
}
-static void wait_barrier(struct r10conf *conf)
+static bool wait_barrier(struct r10conf *conf, bool nowait)
{
+ bool ret = true;
+
spin_lock_irq(&conf->resync_lock);
if (conf->barrier) {
struct bio_list *bio_list = current->bio_list;
@@ -967,27 +968,35 @@ static void wait_barrier(struct r10conf *conf)
* that queue to get the nr_pending
* count down.
*/
- raid10_log(conf->mddev, "wait barrier");
- wait_event_lock_irq(conf->wait_barrier,
- !conf->barrier ||
- (atomic_read(&conf->nr_pending) &&
- bio_list &&
- (!bio_list_empty(&bio_list[0]) ||
- !bio_list_empty(&bio_list[1]))) ||
- /* move on if recovery thread is
- * blocked by us
- */
- (conf->mddev->thread->tsk == current &&
- test_bit(MD_RECOVERY_RUNNING,
- &conf->mddev->recovery) &&
- conf->nr_queued > 0),
- conf->resync_lock);
+ /* Return false when nowait flag is set */
+ if (nowait) {
+ ret = false;
+ } else {
+ raid10_log(conf->mddev, "wait barrier");
+ wait_event_lock_irq(conf->wait_barrier,
+ !conf->barrier ||
+ (atomic_read(&conf->nr_pending) &&
+ bio_list &&
+ (!bio_list_empty(&bio_list[0]) ||
+ !bio_list_empty(&bio_list[1]))) ||
+ /* move on if recovery thread is
+ * blocked by us
+ */
+ (conf->mddev->thread->tsk == current &&
+ test_bit(MD_RECOVERY_RUNNING,
+ &conf->mddev->recovery) &&
+ conf->nr_queued > 0),
+ conf->resync_lock);
+ }
conf->nr_waiting--;
if (!conf->nr_waiting)
wake_up(&conf->wait_barrier);
}
- atomic_inc(&conf->nr_pending);
+ /* Only increment nr_pending when we wait */
+ if (ret)
+ atomic_inc(&conf->nr_pending);
spin_unlock_irq(&conf->resync_lock);
+ return ret;
}
static void allow_barrier(struct r10conf *conf)
@@ -1044,16 +1053,9 @@ static sector_t choose_data_offset(struct r10bio *r10_bio,
return rdev->new_data_offset;
}
-struct raid10_plug_cb {
- struct blk_plug_cb cb;
- struct bio_list pending;
- int pending_cnt;
-};
-
static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
{
- struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
- cb);
+ struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, cb);
struct mddev *mddev = plug->cb.data;
struct r10conf *conf = mddev->private;
struct bio *bio;
@@ -1061,7 +1063,6 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
if (from_schedule || current->bio_list) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending);
- conf->pending_count += plug->pending_cnt;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_barrier);
md_wakeup_thread(mddev->thread);
@@ -1098,21 +1099,30 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
* currently.
* 2. If IO spans the reshape position. Need to wait for reshape to pass.
*/
-static void regular_request_wait(struct mddev *mddev, struct r10conf *conf,
+static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
struct bio *bio, sector_t sectors)
{
- wait_barrier(conf);
+ /* Bail out if REQ_NOWAIT is set for the bio */
+ if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) {
+ bio_wouldblock_error(bio);
+ return false;
+ }
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
bio->bi_iter.bi_sector < conf->reshape_progress &&
bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
- raid10_log(conf->mddev, "wait reshape");
allow_barrier(conf);
+ if (bio->bi_opf & REQ_NOWAIT) {
+ bio_wouldblock_error(bio);
+ return false;
+ }
+ raid10_log(conf->mddev, "wait reshape");
wait_event(conf->wait_barrier,
conf->reshape_progress <= bio->bi_iter.bi_sector ||
conf->reshape_progress >= bio->bi_iter.bi_sector +
sectors);
- wait_barrier(conf);
+ wait_barrier(conf, false);
}
+ return true;
}
static void raid10_read_request(struct mddev *mddev, struct bio *bio,
@@ -1157,7 +1167,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
rcu_read_unlock();
}
- regular_request_wait(mddev, conf, bio, r10_bio->sectors);
+ if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors))
+ return;
rdev = read_balance(conf, r10_bio, &max_sectors);
if (!rdev) {
if (err_rdev) {
@@ -1179,7 +1190,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
bio_chain(split, bio);
allow_barrier(conf);
submit_bio_noacct(bio);
- wait_barrier(conf);
+ wait_barrier(conf, false);
bio = split;
r10_bio->master_bio = bio;
r10_bio->sectors = max_sectors;
@@ -1188,14 +1199,13 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r10_bio->start_time = bio_start_io_acct(bio);
- read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
+ read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
r10_bio->devs[slot].bio = read_bio;
r10_bio->devs[slot].rdev = rdev;
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
choose_data_offset(r10_bio, rdev);
- bio_set_dev(read_bio, rdev->bdev);
read_bio->bi_end_io = raid10_end_read_request;
bio_set_op_attrs(read_bio, op, do_sync);
if (test_bit(FailFast, &rdev->flags) &&
@@ -1219,7 +1229,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
unsigned long flags;
struct blk_plug_cb *cb;
- struct raid10_plug_cb *plug = NULL;
+ struct raid1_plug_cb *plug = NULL;
struct r10conf *conf = mddev->private;
struct md_rdev *rdev;
int devnum = r10_bio->devs[n_copy].devnum;
@@ -1235,7 +1245,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
} else
rdev = conf->mirrors[devnum].rdev;
- mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
if (replacement)
r10_bio->devs[n_copy].repl_bio = mbio;
else
@@ -1243,7 +1253,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
choose_data_offset(r10_bio, rdev));
- bio_set_dev(mbio, rdev->bdev);
mbio->bi_end_io = raid10_end_write_request;
bio_set_op_attrs(mbio, op, do_sync | do_fua);
if (!replacement && test_bit(FailFast,
@@ -1262,16 +1271,14 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
if (cb)
- plug = container_of(cb, struct raid10_plug_cb, cb);
+ plug = container_of(cb, struct raid1_plug_cb, cb);
else
plug = NULL;
if (plug) {
bio_list_add(&plug->pending, mbio);
- plug->pending_cnt++;
} else {
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
- conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
md_wakeup_thread(mddev->thread);
}
@@ -1338,7 +1345,7 @@ retry_wait:
raid10_log(conf->mddev, "%s wait rdev %d blocked",
__func__, blocked_rdev->raid_disk);
md_wait_for_blocked_rdev(blocked_rdev, mddev);
- wait_barrier(conf);
+ wait_barrier(conf, false);
goto retry_wait;
}
}
@@ -1356,6 +1363,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
bio->bi_iter.bi_sector,
bio_end_sector(bio)))) {
DEFINE_WAIT(w);
+ /* Bail out if REQ_NOWAIT is set for the bio */
+ if (bio->bi_opf & REQ_NOWAIT) {
+ bio_wouldblock_error(bio);
+ return;
+ }
for (;;) {
prepare_to_wait(&conf->wait_barrier,
&w, TASK_IDLE);
@@ -1368,7 +1380,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
}
sectors = r10_bio->sectors;
- regular_request_wait(mddev, conf, bio, sectors);
+ if (!regular_request_wait(mddev, conf, bio, sectors))
+ return;
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
(mddev->reshape_backwards
? (bio->bi_iter.bi_sector < conf->reshape_safe &&
@@ -1380,6 +1393,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
md_wakeup_thread(mddev->thread);
+ if (bio->bi_opf & REQ_NOWAIT) {
+ allow_barrier(conf);
+ bio_wouldblock_error(bio);
+ return;
+ }
raid10_log(conf->mddev, "wait reshape metadata");
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
@@ -1387,12 +1405,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
conf->reshape_safe = mddev->reshape_position;
}
- if (conf->pending_count >= max_queued_requests) {
- md_wakeup_thread(mddev->thread);
- raid10_log(mddev, "wait queued");
- wait_event(conf->wait_barrier,
- conf->pending_count < max_queued_requests);
- }
/* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting
* bios[x] to bio
@@ -1482,7 +1494,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
bio_chain(split, bio);
allow_barrier(conf);
submit_bio_noacct(bio);
- wait_barrier(conf);
+ wait_barrier(conf, false);
bio = split;
r10_bio->master_bio = bio;
}
@@ -1607,7 +1619,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
return -EAGAIN;
- wait_barrier(conf);
+ if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) {
+ bio_wouldblock_error(bio);
+ return 0;
+ }
+ wait_barrier(conf, false);
/*
* Check reshape again to avoid reshape happens after checking
@@ -1649,7 +1665,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
allow_barrier(conf);
/* Resend the fist split part */
submit_bio_noacct(split);
- wait_barrier(conf);
+ wait_barrier(conf, false);
}
div_u64_rem(bio_end, stripe_size, &remainder);
if (remainder) {
@@ -1660,7 +1676,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
/* Resend the second split part */
submit_bio_noacct(bio);
bio = split;
- wait_barrier(conf);
+ wait_barrier(conf, false);
}
bio_start = bio->bi_iter.bi_sector;
@@ -1783,7 +1799,8 @@ retry_discard:
*/
if (r10_bio->devs[disk].bio) {
struct md_rdev *rdev = conf->mirrors[disk].rdev;
- mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ mbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
+ &mddev->bio_set);
mbio->bi_end_io = raid10_end_discard_request;
mbio->bi_private = r10_bio;
r10_bio->devs[disk].bio = mbio;
@@ -1796,7 +1813,8 @@ retry_discard:
}
if (r10_bio->devs[disk].repl_bio) {
struct md_rdev *rrdev = conf->mirrors[disk].replacement;
- rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ rbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
+ &mddev->bio_set);
rbio->bi_end_io = raid10_end_discard_request;
rbio->bi_private = r10_bio;
r10_bio->devs[disk].repl_bio = rbio;
@@ -1816,7 +1834,7 @@ retry_discard:
end_disk_offset += geo->stride;
atomic_inc(&first_r10bio->remaining);
raid_end_discard_bio(r10_bio);
- wait_barrier(conf);
+ wait_barrier(conf, false);
goto retry_discard;
}
@@ -2011,7 +2029,7 @@ static void print_conf(struct r10conf *conf)
static void close_sync(struct r10conf *conf)
{
- wait_barrier(conf);
+ wait_barrier(conf, false);
allow_barrier(conf);
mempool_exit(&conf->r10buf_pool);
@@ -2393,7 +2411,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
* bi_vecs, as the read request might have corrupted these
*/
rp = get_resync_pages(tbio);
- bio_reset(tbio);
+ bio_reset(tbio, conf->mirrors[d].rdev->bdev, REQ_OP_WRITE);
md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size);
@@ -2401,7 +2419,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
tbio->bi_private = rp;
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
tbio->bi_end_io = end_sync_write;
- bio_set_op_attrs(tbio, REQ_OP_WRITE, 0);
bio_copy_data(tbio, fbio);
@@ -2412,7 +2429,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
tbio->bi_opf |= MD_FAILFAST;
tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
- bio_set_dev(tbio, conf->mirrors[d].rdev->bdev);
submit_bio_noacct(tbio);
}
@@ -2865,12 +2881,12 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
if (sectors > sect_to_write)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors' */
- wbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ wbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO,
+ &mddev->bio_set);
bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
wbio->bi_iter.bi_sector = wsector +
choose_data_offset(r10_bio, rdev);
- bio_set_dev(wbio, rdev->bdev);
bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
if (submit_bio_wait(wbio) < 0)
@@ -3131,12 +3147,12 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
for (i = 0; i < nalloc; i++) {
bio = r10bio->devs[i].bio;
rp = bio->bi_private;
- bio_reset(bio);
+ bio_reset(bio, NULL, 0);
bio->bi_private = rp;
bio = r10bio->devs[i].repl_bio;
if (bio) {
rp = bio->bi_private;
- bio_reset(bio);
+ bio_reset(bio, NULL, 0);
bio->bi_private = rp;
}
}
@@ -4819,7 +4835,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
if (need_flush ||
time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
/* Need to update reshape_position in metadata */
- wait_barrier(conf);
+ wait_barrier(conf, false);
mddev->reshape_position = conf->reshape_progress;
if (mddev->reshape_backwards)
mddev->curr_resync_completed = raid10_size(mddev, 0, 0)
@@ -4863,14 +4879,12 @@ read_more:
return sectors_done;
}
- read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set);
-
- bio_set_dev(read_bio, rdev->bdev);
+ read_bio = bio_alloc_bioset(rdev->bdev, RESYNC_PAGES, REQ_OP_READ,
+ GFP_KERNEL, &mddev->bio_set);
read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset);
read_bio->bi_private = r10_bio;
read_bio->bi_end_io = end_reshape_read;
- bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
r10_bio->master_bio = read_bio;
r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
@@ -5242,5 +5256,3 @@ MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
MODULE_ALIAS("md-personality-9"); /* RAID10 */
MODULE_ALIAS("md-raid10");
MODULE_ALIAS("md-level-10");
-
-module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);