diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 11:43:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 11:43:44 -0700 |
commit | 1ddeeb2a058d7b2a58ed9e820396b4ceb715d529 (patch) | |
tree | 32a27b8eb1c538239b641292d77dc1a8cee8ee97 /drivers/md/raid10.c | |
parent | d2c84bdce25a678c1e1f116d65b58790bd241af0 (diff) | |
parent | 5205a4aa8fc9454853b705b69611c80e9c644283 (diff) |
Merge tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- MD pull requests via Song:
- Cleanup redundant checks (Yu Kuai)
- Remove deprecated headers (Marc Zyngier, Song Liu)
- Concurrency fixes (Li Lingfeng)
- Memory leak fix (Li Nan)
- Refactor raid1 read_balance (Yu Kuai, Paul Luse)
- Clean up and fix for md_ioctl (Li Nan)
- Other small fixes (Gui-Dong Han, Heming Zhao)
- MD atomic limits (Christoph)
- NVMe pull request via Keith:
- RDMA target enhancements (Max)
- Fabrics fixes (Max, Guixin, Hannes)
- Atomic queue_limits usage (Christoph)
- Const use for class_register (Ricardo)
- Identification error handling fixes (Shin'ichiro, Keith)
- Improvement and cleanup for cached request handling (Christoph)
- Moving towards atomic queue limits. Core changes and driver bits so
far (Christoph)
- Fix UAF issues in aoeblk (Chun-Yi)
- Zoned fix and cleanups (Damien)
- s390 dasd cleanups and fixes (Jan, Miroslav)
- Block issue timestamp caching (me)
- noio scope guarding for zoned IO (Johannes)
- block/nvme PI improvements (Kanchan)
- Ability to terminate long running discard loop (Keith)
- bdev revalidation fix (Li)
- Get rid of old nr_queues hack for kdump kernels (Ming)
- Support for async deletion of ublk (Ming)
- Improve IRQ bio recycling (Pavel)
- Factor in CPU capacity for remote vs local completion (Qais)
- Add shared_tags configfs entry for null_blk (Shin'ichiro
- Fix for a regression in page refcounts introduced by the folio
unification (Tony)
- Misc fixes and cleanups (Arnd, Colin, John, Kunwu, Li, Navid,
Ricardo, Roman, Tang, Uwe)
* tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux: (221 commits)
block: partitions: only define function mac_fix_string for CONFIG_PPC_PMAC
block/swim: Convert to platform remove callback returning void
cdrom: gdrom: Convert to platform remove callback returning void
block: remove disk_stack_limits
md: remove mddev->queue
md: don't initialize queue limits
md/raid10: use the atomic queue limit update APIs
md/raid5: use the atomic queue limit update APIs
md/raid1: use the atomic queue limit update APIs
md/raid0: use the atomic queue limit update APIs
md: add queue limit helpers
md: add a mddev_is_dm helper
md: add a mddev_add_trace_msg helper
md: add a mddev_trace_remap helper
bcache: move calculation of stripe_size and io_opt into bcache_device_init
virtio_blk: Do not use disk_set_max_open/active_zones()
aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts
block: move capacity validation to blkpg_do_ioctl()
block: prevent division by zero in blk_rq_stat_sum()
drbd: atomically update queue limits in drbd_reconsider_queue_parameters
...
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 143 |
1 files changed, 58 insertions, 85 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a5f8419e2df1..a4556d2e46bf 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -76,9 +76,6 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); static void end_reshape_write(struct bio *bio); static void end_reshape(struct r10conf *conf); -#define raid10_log(md, fmt, args...) \ - do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) - #include "raid1-10.c" #define NULL_CMD @@ -518,11 +515,7 @@ static void raid10_end_write_request(struct bio *bio) * The 'master' represents the composite IO operation to * user-side. So if something waits for IO, then it will * wait for the 'master' bio. - */ - sector_t first_bad; - int bad_sectors; - - /* + * * Do not set R10BIO_Uptodate if the current device is * rebuilding or Faulty. This is because we cannot use * such device for properly reading the data back (we could @@ -535,10 +528,9 @@ static void raid10_end_write_request(struct bio *bio) set_bit(R10BIO_Uptodate, &r10_bio->state); /* Maybe we can clear some bad blocks. */ - if (is_badblock(rdev, - r10_bio->devs[slot].addr, - r10_bio->sectors, - &first_bad, &bad_sectors) && !discard_error) { + if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr, + r10_bio->sectors) && + !discard_error) { bio_put(bio); if (repl) r10_bio->devs[slot].repl_bio = IO_MADE_GOOD; @@ -753,17 +745,8 @@ static struct md_rdev *read_balance(struct r10conf *conf, best_good_sectors = 0; do_balance = 1; clear_bit(R10BIO_FailFast, &r10_bio->state); - /* - * Check if we can balance. We can balance on the whole - * device if no resync is going on (recovery is ok), or below - * the resync window. We take the first readable disk when - * above the resync window. - */ - if ((conf->mddev->recovery_cp < MaxSector - && (this_sector + sectors >= conf->next_resync)) || - (mddev_is_clustered(conf->mddev) && - md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector, - this_sector + sectors))) + + if (raid1_should_read_first(conf->mddev, this_sector, sectors)) do_balance = 0; for (slot = 0; slot < conf->copies ; slot++) { @@ -1033,7 +1016,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait) ret = false; } else { conf->nr_waiting++; - raid10_log(conf->mddev, "wait barrier"); + mddev_add_trace_msg(conf->mddev, "raid10 wait barrier"); wait_event_barrier(conf, stop_waiting_barrier(conf)); conf->nr_waiting--; } @@ -1152,7 +1135,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf, bio_wouldblock_error(bio); return false; } - raid10_log(conf->mddev, "wait reshape"); + mddev_add_trace_msg(conf->mddev, "raid10 wait reshape"); wait_event(conf->wait_barrier, conf->reshape_progress <= bio->bi_iter.bi_sector || conf->reshape_progress >= bio->bi_iter.bi_sector + @@ -1249,10 +1232,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, test_bit(R10BIO_FailFast, &r10_bio->state)) read_bio->bi_opf |= MD_FAILFAST; read_bio->bi_private = r10_bio; - - if (mddev->gendisk) - trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk), - r10_bio->sector); + mddev_trace_remap(mddev, read_bio, r10_bio->sector); submit_bio_noacct(read_bio); return; } @@ -1288,10 +1268,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, && enough(conf, devnum)) mbio->bi_opf |= MD_FAILFAST; mbio->bi_private = r10_bio; - - if (conf->mddev->gendisk) - trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk), - r10_bio->sector); + mddev_trace_remap(mddev, mbio, r10_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ mbio->bi_bdev = (void *)rdev; @@ -1330,10 +1307,7 @@ retry_wait: } if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) { - sector_t first_bad; sector_t dev_sector = r10_bio->devs[i].addr; - int bad_sectors; - int is_bad; /* * Discard request doesn't care the write result @@ -1342,9 +1316,8 @@ retry_wait: if (!r10_bio->sectors) continue; - is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors, - &first_bad, &bad_sectors); - if (is_bad < 0) { + if (rdev_has_badblock(rdev, dev_sector, + r10_bio->sectors) < 0) { /* * Mustn't write here until the bad block * is acknowledged @@ -1360,8 +1333,9 @@ retry_wait: if (unlikely(blocked_rdev)) { /* Have to wait for this device to get unblocked, then retry */ allow_barrier(conf); - raid10_log(conf->mddev, "%s wait rdev %d blocked", - __func__, blocked_rdev->raid_disk); + mddev_add_trace_msg(conf->mddev, + "raid10 %s wait rdev %d blocked", + __func__, blocked_rdev->raid_disk); md_wait_for_blocked_rdev(blocked_rdev, mddev); wait_barrier(conf, false); goto retry_wait; @@ -1416,7 +1390,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, bio_wouldblock_error(bio); return; } - raid10_log(conf->mddev, "wait reshape metadata"); + mddev_add_trace_msg(conf->mddev, + "raid10 wait reshape metadata"); wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); @@ -2131,10 +2106,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) continue; } - if (mddev->gendisk) - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - + err = mddev_stack_new_rdev(mddev, rdev); + if (err) + return err; p->head_position = 0; p->recovery_disabled = mddev->recovery_disabled - 1; rdev->raid_disk = mirror; @@ -2150,10 +2124,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) clear_bit(In_sync, &rdev->flags); set_bit(Replacement, &rdev->flags); rdev->raid_disk = repl_slot; - err = 0; - if (mddev->gendisk) - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); + err = mddev_stack_new_rdev(mddev, rdev); + if (err) + return err; conf->fullsync = 1; WRITE_ONCE(p->replacement, rdev); } @@ -2290,8 +2263,6 @@ static void end_sync_write(struct bio *bio) struct mddev *mddev = r10_bio->mddev; struct r10conf *conf = mddev->private; int d; - sector_t first_bad; - int bad_sectors; int slot; int repl; struct md_rdev *rdev = NULL; @@ -2312,11 +2283,10 @@ static void end_sync_write(struct bio *bio) &rdev->mddev->recovery); set_bit(R10BIO_WriteError, &r10_bio->state); } - } else if (is_badblock(rdev, - r10_bio->devs[slot].addr, - r10_bio->sectors, - &first_bad, &bad_sectors)) + } else if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr, + r10_bio->sectors)) { set_bit(R10BIO_MadeGood, &r10_bio->state); + } rdev_dec_pending(rdev, mddev); @@ -2597,11 +2567,8 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, int sectors, struct page *page, enum req_op op) { - sector_t first_bad; - int bad_sectors; - - if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) - && (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags))) + if (rdev_has_badblock(rdev, sector, sectors) && + (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags))) return -1; if (sync_page_io(rdev, sector, sectors << 9, page, op, false)) /* success */ @@ -2658,16 +2625,14 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 s = PAGE_SIZE >> 9; do { - sector_t first_bad; - int bad_sectors; - d = r10_bio->devs[sl].devnum; rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags) && - is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, - &first_bad, &bad_sectors) == 0) { + rdev_has_badblock(rdev, + r10_bio->devs[sl].addr + sect, + s) == 0) { atomic_inc(&rdev->nr_pending); success = sync_page_io(rdev, r10_bio->devs[sl].addr + @@ -4002,14 +3967,26 @@ static struct r10conf *setup_conf(struct mddev *mddev) return ERR_PTR(err); } -static void raid10_set_io_opt(struct r10conf *conf) +static unsigned int raid10_nr_stripes(struct r10conf *conf) { - int raid_disks = conf->geo.raid_disks; + unsigned int raid_disks = conf->geo.raid_disks; - if (!(conf->geo.raid_disks % conf->geo.near_copies)) - raid_disks /= conf->geo.near_copies; - blk_queue_io_opt(conf->mddev->queue, (conf->mddev->chunk_sectors << 9) * - raid_disks); + if (conf->geo.raid_disks % conf->geo.near_copies) + return raid_disks; + return raid_disks / conf->geo.near_copies; +} + +static int raid10_set_queue_limits(struct mddev *mddev) +{ + struct r10conf *conf = mddev->private; + struct queue_limits lim; + + blk_set_stacking_limits(&lim); + lim.max_write_zeroes_sectors = 0; + lim.io_min = mddev->chunk_sectors << 9; + lim.io_opt = lim.io_min * raid10_nr_stripes(conf); + mddev_stack_rdev_limits(mddev, &lim); + return queue_limits_set(mddev->gendisk->queue, &lim); } static int raid10_run(struct mddev *mddev) @@ -4021,6 +3998,7 @@ static int raid10_run(struct mddev *mddev) sector_t size; sector_t min_offset_diff = 0; int first = 1; + int ret = -EIO; if (mddev->private == NULL) { conf = setup_conf(mddev); @@ -4047,12 +4025,6 @@ static int raid10_run(struct mddev *mddev) } } - if (mddev->queue) { - blk_queue_max_write_zeroes_sectors(mddev->queue, 0); - blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); - raid10_set_io_opt(conf); - } - rdev_for_each(rdev, mddev) { long long diff; @@ -4081,14 +4053,16 @@ static int raid10_run(struct mddev *mddev) if (first || diff < min_offset_diff) min_offset_diff = diff; - if (mddev->gendisk) - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - disk->head_position = 0; first = 0; } + if (!mddev_is_dm(conf->mddev)) { + ret = raid10_set_queue_limits(mddev); + if (ret) + goto out_free_conf; + } + /* need to check that every block has at least one working mirror */ if (!enough(conf, -1)) { pr_err("md/raid10:%s: not enough operational mirrors.\n", @@ -4185,7 +4159,7 @@ out_free_conf: raid10_free_conf(conf); mddev->private = NULL; out: - return -EIO; + return ret; } static void raid10_free(struct mddev *mddev, void *priv) @@ -4954,8 +4928,7 @@ static void end_reshape(struct r10conf *conf) conf->reshape_safe = MaxSector; spin_unlock_irq(&conf->device_lock); - if (conf->mddev->queue) - raid10_set_io_opt(conf); + mddev_update_io_opt(conf->mddev, raid10_nr_stripes(conf)); conf->fullsync = 0; } |