aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c332
1 files changed, 151 insertions, 181 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d3a837506a36..179ee4afe937 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -546,137 +546,30 @@ static int mddev_set_closing_and_sync_blockdev(struct mddev *mddev, int opener_n
return 0;
}
-/*
- * Generic flush handling for md
- */
-
-static void md_end_flush(struct bio *bio)
-{
- struct md_rdev *rdev = bio->bi_private;
- struct mddev *mddev = rdev->mddev;
-
- bio_put(bio);
-
- rdev_dec_pending(rdev, mddev);
-
- if (atomic_dec_and_test(&mddev->flush_pending))
- /* The pre-request flush has finished */
- queue_work(md_wq, &mddev->flush_work);
-}
-
-static void md_submit_flush_data(struct work_struct *ws);
-
-static void submit_flushes(struct work_struct *ws)
+bool md_flush_request(struct mddev *mddev, struct bio *bio)
{
- struct mddev *mddev = container_of(ws, struct mddev, flush_work);
struct md_rdev *rdev;
-
- mddev->start_flush = ktime_get_boottime();
- INIT_WORK(&mddev->flush_work, md_submit_flush_data);
- atomic_set(&mddev->flush_pending, 1);
- rcu_read_lock();
- rdev_for_each_rcu(rdev, mddev)
- if (rdev->raid_disk >= 0 &&
- !test_bit(Faulty, &rdev->flags)) {
- struct bio *bi;
-
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
- bi = bio_alloc_bioset(rdev->bdev, 0,
- REQ_OP_WRITE | REQ_PREFLUSH,
- GFP_NOIO, &mddev->bio_set);
- bi->bi_end_io = md_end_flush;
- bi->bi_private = rdev;
- atomic_inc(&mddev->flush_pending);
- submit_bio(bi);
- rcu_read_lock();
- }
- rcu_read_unlock();
- if (atomic_dec_and_test(&mddev->flush_pending))
- queue_work(md_wq, &mddev->flush_work);
-}
-
-static void md_submit_flush_data(struct work_struct *ws)
-{
- struct mddev *mddev = container_of(ws, struct mddev, flush_work);
- struct bio *bio = mddev->flush_bio;
+ struct bio *new;
/*
- * must reset flush_bio before calling into md_handle_request to avoid a
- * deadlock, because other bios passed md_handle_request suspend check
- * could wait for this and below md_handle_request could wait for those
- * bios because of suspend check
+ * md_flush_reqeust() should be called under md_handle_request() and
+ * 'active_io' is already grabbed. Hence it's safe to get rdev directly
+ * without rcu protection.
*/
- spin_lock_irq(&mddev->lock);
- mddev->prev_flush_start = mddev->start_flush;
- mddev->flush_bio = NULL;
- spin_unlock_irq(&mddev->lock);
- wake_up(&mddev->sb_wait);
+ WARN_ON(percpu_ref_is_zero(&mddev->active_io));
- if (bio->bi_iter.bi_size == 0) {
- /* an empty barrier - all done */
- bio_endio(bio);
- } else {
- bio->bi_opf &= ~REQ_PREFLUSH;
-
- /*
- * make_requst() will never return error here, it only
- * returns error in raid5_make_request() by dm-raid.
- * Since dm always splits data and flush operation into
- * two separate io, io size of flush submitted by dm
- * always is 0, make_request() will not be called here.
- */
- if (WARN_ON_ONCE(!mddev->pers->make_request(mddev, bio)))
- bio_io_error(bio);
- }
-
- /* The pair is percpu_ref_get() from md_flush_request() */
- percpu_ref_put(&mddev->active_io);
-}
+ rdev_for_each(rdev, mddev) {
+ if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
+ continue;
-/*
- * Manages consolidation of flushes and submitting any flushes needed for
- * a bio with REQ_PREFLUSH. Returns true if the bio is finished or is
- * being finished in another context. Returns false if the flushing is
- * complete but still needs the I/O portion of the bio to be processed.
- */
-bool md_flush_request(struct mddev *mddev, struct bio *bio)
-{
- ktime_t req_start = ktime_get_boottime();
- spin_lock_irq(&mddev->lock);
- /* flush requests wait until ongoing flush completes,
- * hence coalescing all the pending requests.
- */
- wait_event_lock_irq(mddev->sb_wait,
- !mddev->flush_bio ||
- ktime_before(req_start, mddev->prev_flush_start),
- mddev->lock);
- /* new request after previous flush is completed */
- if (ktime_after(req_start, mddev->prev_flush_start)) {
- WARN_ON(mddev->flush_bio);
- /*
- * Grab a reference to make sure mddev_suspend() will wait for
- * this flush to be done.
- *
- * md_flush_reqeust() is called under md_handle_request() and
- * 'active_io' is already grabbed, hence percpu_ref_is_zero()
- * won't pass, percpu_ref_tryget_live() can't be used because
- * percpu_ref_kill() can be called by mddev_suspend()
- * concurrently.
- */
- WARN_ON(percpu_ref_is_zero(&mddev->active_io));
- percpu_ref_get(&mddev->active_io);
- mddev->flush_bio = bio;
- spin_unlock_irq(&mddev->lock);
- INIT_WORK(&mddev->flush_work, submit_flushes);
- queue_work(md_wq, &mddev->flush_work);
- return true;
+ new = bio_alloc_bioset(rdev->bdev, 0,
+ REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO,
+ &mddev->bio_set);
+ bio_chain(new, bio);
+ submit_bio(new);
}
- /* flush was performed for some other bio while we waited. */
- spin_unlock_irq(&mddev->lock);
- if (bio->bi_iter.bi_size == 0) {
- /* pure flush without data - all done */
+ if (bio_sectors(bio) == 0) {
bio_endio(bio);
return true;
}
@@ -763,7 +656,6 @@ int mddev_init(struct mddev *mddev)
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->sync_seq, 0);
spin_lock_init(&mddev->lock);
- atomic_set(&mddev->flush_pending, 0);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
@@ -772,6 +664,7 @@ int mddev_init(struct mddev *mddev)
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->level = LEVEL_NONE;
+ mddev_set_bitmap_ops(mddev);
INIT_WORK(&mddev->sync_work, md_start_sync);
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
@@ -1372,6 +1265,18 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
return ret;
}
+static u64 md_bitmap_events_cleared(struct mddev *mddev)
+{
+ struct md_bitmap_stats stats;
+ int err;
+
+ err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
+ if (err)
+ return 0;
+
+ return stats.events_cleared;
+}
+
/*
* validate_super for 0.90.0
* note: we are not using "freshest" for 0.9 superblock
@@ -1464,7 +1369,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru
/* if adding to array with a bitmap, then we can accept an
* older device ... but not too old.
*/
- if (ev1 < mddev->bitmap->events_cleared)
+ if (ev1 < md_bitmap_events_cleared(mddev))
return 0;
if (ev1 < mddev->events)
set_bit(Bitmap_sync, &rdev->flags);
@@ -1991,7 +1896,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc
/* If adding to array with a bitmap, then we can accept an
* older device, but not too old.
*/
- if (ev1 < mddev->bitmap->events_cleared)
+ if (ev1 < md_bitmap_events_cleared(mddev))
return 0;
if (ev1 < mddev->events)
set_bit(Bitmap_sync, &rdev->flags);
@@ -2323,7 +2228,6 @@ super_1_allow_new_offset(struct md_rdev *rdev,
unsigned long long new_offset)
{
/* All necessary checks on new >= old have been done */
- struct bitmap *bitmap;
if (new_offset >= rdev->data_offset)
return 1;
@@ -2340,11 +2244,18 @@ super_1_allow_new_offset(struct md_rdev *rdev,
*/
if (rdev->sb_start + (32+4)*2 > new_offset)
return 0;
- bitmap = rdev->mddev->bitmap;
- if (bitmap && !rdev->mddev->bitmap_info.file &&
- rdev->sb_start + rdev->mddev->bitmap_info.offset +
- bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
- return 0;
+
+ if (!rdev->mddev->bitmap_info.file) {
+ struct mddev *mddev = rdev->mddev;
+ struct md_bitmap_stats stats;
+ int err;
+
+ err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
+ if (!err && rdev->sb_start + mddev->bitmap_info.offset +
+ stats.file_pages * (PAGE_SIZE >> 9) > new_offset)
+ return 0;
+ }
+
if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
return 0;
@@ -2820,7 +2731,7 @@ repeat:
mddev_add_trace_msg(mddev, "md md_update_sb");
rewrite:
- md_bitmap_update_sb(mddev->bitmap);
+ mddev->bitmap_ops->update_sb(mddev->bitmap);
rdev_for_each(rdev, mddev) {
if (rdev->sb_loaded != 1)
continue; /* no noise on spare devices */
@@ -4142,6 +4053,34 @@ static struct md_sysfs_entry md_level =
__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
static ssize_t
+new_level_show(struct mddev *mddev, char *page)
+{
+ return sprintf(page, "%d\n", mddev->new_level);
+}
+
+static ssize_t
+new_level_store(struct mddev *mddev, const char *buf, size_t len)
+{
+ unsigned int n;
+ int err;
+
+ err = kstrtouint(buf, 10, &n);
+ if (err < 0)
+ return err;
+ err = mddev_lock(mddev);
+ if (err)
+ return err;
+
+ mddev->new_level = n;
+ md_update_sb(mddev, 1);
+
+ mddev_unlock(mddev);
+ return len;
+}
+static struct md_sysfs_entry md_new_level =
+__ATTR(new_level, 0664, new_level_show, new_level_store);
+
+static ssize_t
layout_show(struct mddev *mddev, char *page)
{
/* just a number, not meaningful for all levels */
@@ -4680,17 +4619,23 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len)
/* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */
while (*buf) {
chunk = end_chunk = simple_strtoul(buf, &end, 0);
- if (buf == end) break;
+ if (buf == end)
+ break;
+
if (*end == '-') { /* range */
buf = end + 1;
end_chunk = simple_strtoul(buf, &end, 0);
- if (buf == end) break;
+ if (buf == end)
+ break;
}
- if (*end && !isspace(*end)) break;
- md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
+
+ if (*end && !isspace(*end))
+ break;
+
+ mddev->bitmap_ops->dirty_bits(mddev, chunk, end_chunk);
buf = skip_spaces(end);
}
- md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
+ mddev->bitmap_ops->unplug(mddev, true); /* flush the bits to disk */
out:
mddev_unlock(mddev);
return len;
@@ -5666,6 +5611,7 @@ __ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
static struct attribute *md_default_attrs[] = {
&md_level.attr,
+ &md_new_level.attr,
&md_layout.attr,
&md_raid_disks.attr,
&md_uuid.attr,
@@ -6206,16 +6152,10 @@ int md_run(struct mddev *mddev)
}
if (err == 0 && pers->sync_request &&
(mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
- struct bitmap *bitmap;
-
- bitmap = md_bitmap_create(mddev, -1);
- if (IS_ERR(bitmap)) {
- err = PTR_ERR(bitmap);
+ err = mddev->bitmap_ops->create(mddev, -1);
+ if (err)
pr_warn("%s: failed to create bitmap (%d)\n",
mdname(mddev), err);
- } else
- mddev->bitmap = bitmap;
-
}
if (err)
goto bitmap_abort;
@@ -6285,7 +6225,7 @@ bitmap_abort:
pers->free(mddev, mddev->private);
mddev->private = NULL;
module_put(pers->owner);
- md_bitmap_destroy(mddev);
+ mddev->bitmap_ops->destroy(mddev);
abort:
bioset_exit(&mddev->io_clone_set);
exit_sync_set:
@@ -6304,9 +6244,10 @@ int do_md_run(struct mddev *mddev)
err = md_run(mddev);
if (err)
goto out;
- err = md_bitmap_load(mddev);
+
+ err = mddev->bitmap_ops->load(mddev);
if (err) {
- md_bitmap_destroy(mddev);
+ mddev->bitmap_ops->destroy(mddev);
goto out;
}
@@ -6450,7 +6391,8 @@ static void __md_stop_writes(struct mddev *mddev)
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
}
- md_bitmap_flush(mddev);
+
+ mddev->bitmap_ops->flush(mddev);
if (md_is_rdwr(mddev) &&
((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
@@ -6477,7 +6419,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
static void mddev_detach(struct mddev *mddev)
{
- md_bitmap_wait_behind_writes(mddev);
+ mddev->bitmap_ops->wait_behind_writes(mddev);
if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
@@ -6492,7 +6434,8 @@ static void mddev_detach(struct mddev *mddev)
static void __md_stop(struct mddev *mddev)
{
struct md_personality *pers = mddev->pers;
- md_bitmap_destroy(mddev);
+
+ mddev->bitmap_ops->destroy(mddev);
mddev_detach(mddev);
spin_lock(&mddev->lock);
mddev->pers = NULL;
@@ -7270,22 +7213,19 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
err = 0;
if (mddev->pers) {
if (fd >= 0) {
- struct bitmap *bitmap;
+ err = mddev->bitmap_ops->create(mddev, -1);
+ if (!err)
+ err = mddev->bitmap_ops->load(mddev);
- bitmap = md_bitmap_create(mddev, -1);
- if (!IS_ERR(bitmap)) {
- mddev->bitmap = bitmap;
- err = md_bitmap_load(mddev);
- } else
- err = PTR_ERR(bitmap);
if (err) {
- md_bitmap_destroy(mddev);
+ mddev->bitmap_ops->destroy(mddev);
fd = -1;
}
} else if (fd < 0) {
- md_bitmap_destroy(mddev);
+ mddev->bitmap_ops->destroy(mddev);
}
}
+
if (fd < 0) {
struct file *f = mddev->bitmap_info.file;
if (f) {
@@ -7554,7 +7494,6 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
goto err;
}
if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
- struct bitmap *bitmap;
/* add the bitmap */
if (mddev->bitmap) {
rv = -EEXIST;
@@ -7568,24 +7507,24 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.default_offset;
mddev->bitmap_info.space =
mddev->bitmap_info.default_space;
- bitmap = md_bitmap_create(mddev, -1);
- if (!IS_ERR(bitmap)) {
- mddev->bitmap = bitmap;
- rv = md_bitmap_load(mddev);
- } else
- rv = PTR_ERR(bitmap);
+ rv = mddev->bitmap_ops->create(mddev, -1);
+ if (!rv)
+ rv = mddev->bitmap_ops->load(mddev);
+
if (rv)
- md_bitmap_destroy(mddev);
+ mddev->bitmap_ops->destroy(mddev);
} else {
- /* remove the bitmap */
- if (!mddev->bitmap) {
- rv = -ENOENT;
+ struct md_bitmap_stats stats;
+
+ rv = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
+ if (rv)
goto err;
- }
- if (mddev->bitmap->storage.file) {
+
+ if (stats.file) {
rv = -EINVAL;
goto err;
}
+
if (mddev->bitmap_info.nodes) {
/* hold PW on all the bitmap lock */
if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
@@ -7600,7 +7539,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
module_put(md_cluster_mod);
mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
}
- md_bitmap_destroy(mddev);
+ mddev->bitmap_ops->destroy(mddev);
mddev->bitmap_info.offset = 0;
}
}
@@ -8370,6 +8309,33 @@ static void md_seq_stop(struct seq_file *seq, void *v)
spin_unlock(&all_mddevs_lock);
}
+static void md_bitmap_status(struct seq_file *seq, struct mddev *mddev)
+{
+ struct md_bitmap_stats stats;
+ unsigned long used_pages;
+ unsigned long chunk_kb;
+ int err;
+
+ err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
+ if (err)
+ return;
+
+ chunk_kb = mddev->bitmap_info.chunksize >> 10;
+ used_pages = stats.pages - stats.missing_pages;
+
+ seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], %lu%s chunk",
+ used_pages, stats.pages, used_pages << (PAGE_SHIFT - 10),
+ chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
+ chunk_kb ? "KB" : "B");
+
+ if (stats.file) {
+ seq_puts(seq, ", file: ");
+ seq_file_path(seq, stats.file, " \t\n");
+ }
+
+ seq_putc(seq, '\n');
+}
+
static int md_seq_show(struct seq_file *seq, void *v)
{
struct mddev *mddev;
@@ -8390,14 +8356,19 @@ static int md_seq_show(struct seq_file *seq, void *v)
spin_unlock(&all_mddevs_lock);
spin_lock(&mddev->lock);
if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
- seq_printf(seq, "%s : %sactive", mdname(mddev),
- mddev->pers ? "" : "in");
+ seq_printf(seq, "%s : ", mdname(mddev));
if (mddev->pers) {
+ if (test_bit(MD_BROKEN, &mddev->flags))
+ seq_printf(seq, "broken");
+ else
+ seq_printf(seq, "active");
if (mddev->ro == MD_RDONLY)
seq_printf(seq, " (read-only)");
if (mddev->ro == MD_AUTO_READ)
seq_printf(seq, " (auto-read-only)");
seq_printf(seq, " %s", mddev->pers->name);
+ } else {
+ seq_printf(seq, "inactive");
}
sectors = 0;
@@ -8453,7 +8424,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
} else
seq_printf(seq, "\n ");
- md_bitmap_status(seq, mddev->bitmap);
+ md_bitmap_status(seq, mddev);
seq_printf(seq, "\n");
}
@@ -8668,7 +8639,6 @@ void md_write_start(struct mddev *mddev, struct bio *bi)
BUG_ON(mddev->ro == MD_RDONLY);
if (mddev->ro == MD_AUTO_READ) {
/* need to switch to read/write */
- flush_work(&mddev->sync_work);
mddev->ro = MD_RDWR;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
@@ -9506,7 +9476,7 @@ static void md_start_sync(struct work_struct *ws)
* stored on all devices. So make sure all bitmap pages get written.
*/
if (spares)
- md_bitmap_write_all(mddev->bitmap);
+ mddev->bitmap_ops->write_all(mddev);
name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
"reshape" : "resync";
@@ -9594,7 +9564,7 @@ static void unregister_sync_thread(struct mddev *mddev)
void md_check_recovery(struct mddev *mddev)
{
if (mddev->bitmap)
- md_bitmap_daemon_work(mddev);
+ mddev->bitmap_ops->daemon_work(mddev);
if (signal_pending(current)) {
if (mddev->pers->sync_request && !mddev->external) {
@@ -9965,7 +9935,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
if (ret)
pr_info("md-cluster: resize failed\n");
else
- md_bitmap_update_sb(mddev->bitmap);
+ mddev->bitmap_ops->update_sb(mddev->bitmap);
}
/* Check for change of roles in the active devices */