diff options
Diffstat (limited to 'drivers/md')
29 files changed, 568 insertions, 417 deletions
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 99499d1f6e66..9f32901fdad1 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -160,7 +160,7 @@ static void read_moving(struct cache_set *c) moving_init(io); bio = &io->bio.bio; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; bio->bi_end_io = read_moving_endio; if (bch_bio_alloc_pages(bio, GFP_KERNEL)) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 3427555b0cca..67a2e29e0b40 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -244,7 +244,7 @@ static void bch_data_insert_start(struct closure *cl) trace_bcache_cache_insert(k); bch_keylist_push(&op->insert_keys); - bio_set_op_attrs(n, REQ_OP_WRITE, 0); + n->bi_opf = REQ_OP_WRITE; bch_submit_bbio(n, op->c, k, 0); } while (n != bio); @@ -401,7 +401,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) } if (bypass_torture_test(dc)) { - if (prandom_u32_max(4) == 3) + if (get_random_u32_below(4) == 3) goto skip; else goto rescale; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 0285b676e983..d4a5fc0650bb 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -434,7 +434,7 @@ static void write_dirty(struct closure *cl) */ if (KEY_DIRTY(&w->key)) { dirty_init(w); - bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0); + io->bio.bi_opf = REQ_OP_WRITE; io->bio.bi_iter.bi_sector = KEY_START(&w->key); bio_set_dev(&io->bio, io->dc->bdev); io->bio.bi_end_io = dirty_endio; @@ -547,7 +547,7 @@ static void read_dirty(struct cached_dev *dc) io->sequence = sequence++; dirty_init(w); - bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); + io->bio.bi_opf = REQ_OP_READ; io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0); bio_set_dev(&io->bio, dc->disk.c->cache->bdev); io->bio.bi_end_io = read_dirty_endio; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 09c7ed2650ca..bb786c39545e 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -795,7 +795,8 @@ static void __make_buffer_clean(struct dm_buffer *b) { BUG_ON(b->hold_count); - if (!b->state) /* fast case */ + /* smp_load_acquire() pairs with read_endio()'s smp_mb__before_atomic() */ + if (!smp_load_acquire(&b->state)) /* fast case */ return; wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); @@ -816,7 +817,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c) BUG_ON(test_bit(B_DIRTY, &b->state)); if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep && - unlikely(test_bit(B_READING, &b->state))) + unlikely(test_bit_acquire(B_READING, &b->state))) continue; if (!b->hold_count) { @@ -1058,7 +1059,7 @@ found_buffer: * If the user called both dm_bufio_prefetch and dm_bufio_get on * the same buffer, it would deadlock if we waited. */ - if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state))) + if (nf == NF_GET && unlikely(test_bit_acquire(B_READING, &b->state))) return NULL; b->hold_count++; @@ -1218,7 +1219,7 @@ void dm_bufio_release(struct dm_buffer *b) * invalid buffer. */ if ((b->read_error || b->write_error) && - !test_bit(B_READING, &b->state) && + !test_bit_acquire(B_READING, &b->state) && !test_bit(B_WRITING, &b->state) && !test_bit(B_DIRTY, &b->state)) { __unlink_buffer(b); @@ -1479,7 +1480,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_release_move); static void forget_buffer_locked(struct dm_buffer *b) { - if (likely(!b->hold_count) && likely(!b->state)) { + if (likely(!b->hold_count) && likely(!smp_load_acquire(&b->state))) { __unlink_buffer(b); __free_buffer_wake(b); } @@ -1639,7 +1640,7 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) { if (!(gfp & __GFP_FS) || (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) { - if (test_bit(B_READING, &b->state) || + if (test_bit_acquire(B_READING, &b->state) || test_bit(B_WRITING, &b->state) || test_bit(B_DIRTY, &b->state)) return false; @@ -1857,6 +1858,8 @@ bad: dm_io_client_destroy(c->dm_io); bad_dm_io: mutex_destroy(&c->lock); + if (c->no_sleep) + static_branch_dec(&no_sleep_enabled); kfree(c); bad_client: return ERR_PTR(r); diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index ab13b7380265..83a5975bcc72 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -551,11 +551,13 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, return r; } -static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd) +static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd, + bool destroy_bm) { dm_sm_destroy(cmd->metadata_sm); dm_tm_destroy(cmd->tm); - dm_block_manager_destroy(cmd->bm); + if (destroy_bm) + dm_block_manager_destroy(cmd->bm); } typedef unsigned long (*flags_mutator)(unsigned long); @@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, cmd2 = lookup(bdev); if (cmd2) { mutex_unlock(&table_lock); - __destroy_persistent_data_objects(cmd); + __destroy_persistent_data_objects(cmd, true); kfree(cmd); return cmd2; } @@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd) mutex_unlock(&table_lock); if (!cmd->fail_io) - __destroy_persistent_data_objects(cmd); + __destroy_persistent_data_objects(cmd, true); kfree(cmd); } } @@ -1807,14 +1809,52 @@ int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result) int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) { - int r; + int r = -EINVAL; + struct dm_block_manager *old_bm = NULL, *new_bm = NULL; + + /* fail_io is double-checked with cmd->root_lock held below */ + if (unlikely(cmd->fail_io)) + return r; + + /* + * Replacement block manager (new_bm) is created and old_bm destroyed outside of + * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of + * shrinker associated with the block manager's bufio client vs cmd root_lock). + * - must take shrinker_rwsem without holding cmd->root_lock + */ + new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, + CACHE_MAX_CONCURRENT_LOCKS); WRITE_LOCK(cmd); - __destroy_persistent_data_objects(cmd); - r = __create_persistent_data_objects(cmd, false); + if (cmd->fail_io) { + WRITE_UNLOCK(cmd); + goto out; + } + + __destroy_persistent_data_objects(cmd, false); + old_bm = cmd->bm; + if (IS_ERR(new_bm)) { + DMERR("could not create block manager during abort"); + cmd->bm = NULL; + r = PTR_ERR(new_bm); + goto out_unlock; + } + + cmd->bm = new_bm; + r = __open_or_format_metadata(cmd, false); + if (r) { + cmd->bm = NULL; + goto out_unlock; + } + new_bm = NULL; +out_unlock: if (r) cmd->fail_io = true; WRITE_UNLOCK(cmd); + dm_block_manager_destroy(old_bm); +out: + if (new_bm && !IS_ERR(new_bm)) + dm_block_manager_destroy(new_bm); return r; } diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index c05fc3436cef..06eb31af626f 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h @@ -166,7 +166,7 @@ struct dm_cache_policy_type { struct dm_cache_policy_type *real; /* - * Policies may store a hint for each each cache block. + * Policies may store a hint for each cache block. * Currently the size of this hint must be 0 or 4 bytes but we * expect to relax this in future. */ diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 54a8d5c9a44e..5e92fac90b67 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -907,16 +907,16 @@ static void abort_transaction(struct cache *cache) if (get_cache_mode(cache) >= CM_READ_ONLY) return; - if (dm_cache_metadata_set_needs_check(cache->cmd)) { - DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); - set_cache_mode(cache, CM_FAIL); - } - DMERR_LIMIT("%s: aborting current metadata transaction", dev_name); if (dm_cache_metadata_abort(cache->cmd)) { DMERR("%s: failed to abort metadata transaction", dev_name); set_cache_mode(cache, CM_FAIL); } + + if (dm_cache_metadata_set_needs_check(cache->cmd)) { + DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); + set_cache_mode(cache, CM_FAIL); + } } static void metadata_operation_failed(struct cache *cache, const char *op, int r) @@ -1887,6 +1887,7 @@ static void destroy(struct cache *cache) if (cache->prison) dm_bio_prison_destroy_v2(cache->prison); + cancel_delayed_work_sync(&cache->waker); if (cache->wq) destroy_workqueue(cache->wq); diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index 811b0a5379d0..29e0b85eeaf0 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -1958,6 +1958,7 @@ static void clone_dtr(struct dm_target *ti) mempool_exit(&clone->hydration_pool); dm_kcopyd_client_destroy(clone->kcopyd_client); + cancel_delayed_work_sync(&clone->waker); destroy_workqueue(clone->wq); hash_table_exit(clone); dm_clone_metadata_close(clone->cmd); @@ -2035,7 +2036,7 @@ static void disable_passdown_if_not_supported(struct clone *clone) reason = "max discard sectors smaller than a region"; if (reason) { - DMWARN("Destination device (%pd) %s: Disabling discard passdown.", + DMWARN("Destination device (%pg) %s: Disabling discard passdown.", dest_dev, reason); clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags); } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 159c6806c19b..2653516bcdef 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3630,6 +3630,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->physical_block_size = max_t(unsigned, limits->physical_block_size, cc->sector_size); limits->io_min = max_t(unsigned, limits->io_min, cc->sector_size); + limits->dma_alignment = limits->logical_block_size - 1; } static struct target_type crypt_target = { diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index b0c45c6ebe0b..dc4381d68313 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -8,6 +8,7 @@ */ #include <linux/ctype.h> +#include <linux/delay.h> #include <linux/device.h> #include <linux/device-mapper.h> #include <linux/init.h> @@ -18,12 +19,17 @@ #define DM_MAX_DEVICES 256 #define DM_MAX_TARGETS 256 #define DM_MAX_STR_SIZE 4096 +#define DM_MAX_WAITFOR 256 static char *create; +static char *waitfor[DM_MAX_WAITFOR]; + /* * Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+] * Table format: <start_sector> <num_sectors> <target_type> <target_args> + * Block devices to wait for to become available before setting up tables: + * dm-mod.waitfor=<device1>[,..,<deviceN>] * * See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format * details. @@ -266,7 +272,7 @@ static int __init dm_init_init(void) struct dm_device *dev; LIST_HEAD(devices); char *str; - int r; + int i, r; if (!create) return 0; @@ -286,6 +292,17 @@ static int __init dm_init_init(void) DMINFO("waiting for all devices to be available before creating mapped devices"); wait_for_device_probe(); + for (i = 0; i < ARRAY_SIZE(waitfor); i++) { + if (waitfor[i]) { + DMINFO("waiting for device %s ...", waitfor[i]); + while (!dm_get_dev_t(waitfor[i])) + msleep(5); + } + } + + if (waitfor[0]) + DMINFO("all devices available"); + list_for_each_entry(dev, &devices, list) { if (dm_early_create(&dev->dmi, dev->table, dev->target_args_array)) @@ -301,3 +318,6 @@ late_initcall(dm_init_init); module_param(create, charp, 0); MODULE_PARM_DESC(create, "Create a mapped device in early boot"); + +module_param_array(waitfor, charp, NULL, 0); +MODULE_PARM_DESC(waitfor, "Devices to wait for before setting up tables"); diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index aaf2472df6e5..1388ee35571e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -263,6 +263,7 @@ struct dm_integrity_c { struct completion crypto_backoff; + bool wrote_to_journal; bool journal_uptodate; bool just_formatted; bool recalculate_flag; @@ -2375,6 +2376,8 @@ static void integrity_commit(struct work_struct *w) if (!commit_sections) goto release_flush_bios; + ic->wrote_to_journal = true; + i = commit_start; for (n = 0; n < commit_sections; n++) { for (j = 0; j < ic->journal_section_entries; j++) { @@ -2591,10 +2594,6 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; - /* the following test is not needed, but it tests the replay code */ - if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev) - return; - spin_lock_irq(&ic->endio_wait.lock); write_start = ic->committed_section; write_sections = ic->n_committed_sections; @@ -3101,10 +3100,17 @@ static void dm_integrity_postsuspend(struct dm_target *ti) drain_workqueue(ic->commit_wq); if (ic->mode == 'J') { - if (ic->meta_dev) - queue_work(ic->writer_wq, &ic->writer_work); + queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); dm_integrity_flush_buffers(ic, true); + if (ic->wrote_to_journal) { + init_journal(ic, ic->free_section, + ic->journal_sections - ic->free_section, ic->commit_seq); + if (ic->free_section) { + init_journal(ic, 0, ic->free_section, + next_commit_seq(ic->commit_seq)); + } + } } if (ic->mode == 'B') { @@ -3132,6 +3138,8 @@ static void dm_integrity_resume(struct dm_target *ti) DEBUG_print("resume\n"); + ic->wrote_to_journal = false; + if (ic->provided_data_sectors != old_provided_data_sectors) { if (ic->provided_data_sectors > old_provided_data_sectors && ic->mode == 'B' && @@ -3370,6 +3378,7 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT); + limits->dma_alignment = limits->logical_block_size - 1; } } @@ -4549,6 +4558,8 @@ static void dm_integrity_dtr(struct dm_target *ti) BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); BUG_ON(!list_empty(&ic->wait_list)); + if (ic->mode == 'B') + cancel_delayed_work_sync(&ic->bitmap_flush_work); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); if (ic->wait_wq) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 98976aaa9db9..36fc6ae4737a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -434,10 +434,10 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, hc = __get_name_cell(new); if (hc) { - DMWARN("Unable to change %s on mapped device %s to one that " - "already exists: %s", - change_uuid ? "uuid" : "name", - param->name, new); + DMERR("Unable to change %s on mapped device %s to one that " + "already exists: %s", + change_uuid ? "uuid" : "name", + param->name, new); dm_put(hc->md); up_write(&_hash_lock); kfree(new_data); @@ -449,8 +449,8 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, */ hc = __get_name_cell(param->name); if (!hc) { - DMWARN("Unable to rename non-existent device, %s to %s%s", - param->name, change_uuid ? "uuid " : "", new); + DMERR("Unable to rename non-existent device, %s to %s%s", + param->name, change_uuid ? "uuid " : "", new); up_write(&_hash_lock); kfree(new_data); return ERR_PTR(-ENXIO); @@ -460,9 +460,9 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, * Does this device already have a uuid? */ if (change_uuid && hc->uuid) { - DMWARN("Unable to change uuid of mapped device %s to %s " - "because uuid is already set to %s", - param->name, new, hc->uuid); + DMERR("Unable to change uuid of mapped device %s to %s " + "because uuid is already set to %s", + param->name, new, hc->uuid); dm_put(hc->md); up_write(&_hash_lock); kfree(new_data); @@ -655,7 +655,7 @@ static void list_version_get_needed(struct target_type *tt, void *needed_param) size_t *needed = needed_param; *needed += sizeof(struct dm_target_versions); - *needed += strlen(tt->name); + *needed += strlen(tt->name) + 1; *needed += ALIGN_MASK; } @@ -681,7 +681,7 @@ static void list_version_get_info(struct target_type *tt, void *param) strcpy(info->vers->name, tt->name); info->old_vers = info->vers; - info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1); + info->vers = align_ptr((void *)(info->vers + 1) + strlen(tt->name) + 1); } static int __list_versions(struct dm_ioctl *param, size_t param_size, const char *name) @@ -720,7 +720,7 @@ static int __list_versions(struct dm_ioctl *param, size_t param_size, const char iter_info.old_vers = NULL; iter_info.vers = vers; iter_info.flags = 0; - iter_info.end = (char *)vers+len; + iter_info.end = (char *)vers + needed; /* * Now loop through filling out the names & versions. @@ -750,7 +750,7 @@ static int get_target_version(struct file *filp, struct dm_ioctl *param, size_t static int check_name(const char *name) { if (strchr(name, '/')) { - DMWARN("invalid device name"); + DMERR("invalid device name"); return -EINVAL; } @@ -773,7 +773,7 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *src down_read(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { - DMWARN("device has been removed from the dev hash table."); + DMERR("device has been removed from the dev hash table."); goto out; } @@ -1026,7 +1026,7 @@ static int dev_rename(struct file *filp, struct dm_ioctl *param, size_t param_si if (new_data < param->data || invalid_str(new_data, (void *) param + param_size) || !*new_data || strlen(new_data) > (change_uuid ? DM_UUID_LEN - 1 : DM_NAME_LEN - 1)) { - DMWARN("Invalid new mapped device name or uuid string supplied."); + DMERR("Invalid new mapped device name or uuid string supplied."); return -EINVAL; } @@ -1061,7 +1061,7 @@ static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t pa if (geostr < param->data || invalid_str(geostr, (void *) param + param_size)) { - DMWARN("Invalid geometry supplied."); + DMERR("Invalid geometry supplied."); goto out; } @@ -1069,13 +1069,13 @@ static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t pa indata + 1, indata + 2, indata + 3, &dummy); if (x != 4) { - DMWARN("Unable to interpret geometry settings."); + DMERR("Unable to interpret geometry settings."); goto out; } if (indata[0] > 65535 || indata[1] > 255 || indata[2] > 255 || indata[3] > ULONG_MAX) { - DMWARN("Geometry exceeds range limits."); + DMERR("Geometry exceeds range limits."); goto out; } @@ -1387,7 +1387,7 @@ static int populate_table(struct dm_table *table, char *target_params; if (!param->target_count) { - DMWARN("populate_table: no targets specified"); + DMERR("populate_table: no targets specified"); return -EINVAL; } @@ -1395,7 +1395,7 @@ static int populate_table(struct dm_table *table, r = next_target(spec, next, end, &spec, &target_params); if (r) { - DMWARN("unable to find target"); + DMERR("unable to find target"); return r; } @@ -1404,7 +1404,7 @@ static int populate_table(struct dm_table *table, (sector_t) spec->length, target_params); if (r) { - DMWARN("error adding target to table"); + DMERR("error adding target to table"); return r; } @@ -1451,8 +1451,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si if (immutable_target_type && (immutable_target_type != dm_table_get_immutable_target_type(t)) && !dm_table_get_wildcard_target(t)) { - DMWARN("can't replace immutable target type %s", - immutable_target_type->name); + DMERR("can't replace immutable target type %s", + immutable_target_type->name); r = -EINVAL; goto err_unlock_md_type; } @@ -1461,12 +1461,12 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si /* setup md->queue to reflect md's type (may block) */ r = dm_setup_md_queue(md, t); if (r) { - DMWARN("unable to set up device queue for new table."); + DMERR("unable to set up device queue for new table."); goto err_unlock_md_type; } } else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) { - DMWARN("can't change device type (old=%u vs new=%u) after initial table load.", - dm_get_md_type(md), dm_table_get_type(t)); + DMERR("can't change device type (old=%u vs new=%u) after initial table load.", + dm_get_md_type(md), dm_table_get_type(t)); r = -EINVAL; goto err_unlock_md_type; } @@ -1477,7 +1477,7 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si down_write(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { - DMWARN("device has been removed from the dev hash table."); + DMERR("device has been removed from the dev hash table."); up_write(&_hash_lock); r = -ENXIO; goto err_destroy_table; @@ -1686,19 +1686,19 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para if (tmsg < (struct dm_target_msg *) param->data || invalid_str(tmsg->message, (void *) param + param_size)) { - DMWARN("Invalid target message parameters."); + DMERR("Invalid target message parameters."); r = -EINVAL; goto out; } r = dm_split_args(&argc, &argv, tmsg->message); if (r) { - DMWARN("Failed to split target message parameters"); + DMERR("Failed to split target message parameters"); goto out; } if (!argc) { - DMWARN("Empty message received."); + DMERR("Empty message received."); r = -EINVAL; goto out_argv; } @@ -1718,12 +1718,12 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para ti = dm_table_find_target(table, tmsg->sector); if (!ti) { - DMWARN("Target message sector outside device."); + DMERR("Target message sector outside device."); r = -EINVAL; } else if (ti->type->message) r = ti->type->message(ti, argc, argv, result, maxlen); else { - DMWARN("Target type does not support messages"); + DMERR("Target type does not support messages"); r = -EINVAL; } @@ -1788,8 +1788,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) {DM_TARGET_MSG_CMD, 0, target_message}, {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry}, - {DM_DEV_ARM_POLL, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll}, - {DM_GET_TARGET_VERSION, 0, get_target_version}, + {DM_DEV_ARM_POLL_CMD, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll}, + {DM_GET_TARGET_VERSION_CMD, 0, get_target_version}, }; if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) @@ -1814,11 +1814,11 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) if ((DM_VERSION_MAJOR != version[0]) || (DM_VERSION_MINOR < version[1])) { - DMWARN("ioctl interface mismatch: " - "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", - DM_VERSION_MAJOR, DM_VERSION_MINOR, - DM_VERSION_PATCHLEVEL, - version[0], version[1], version[2], cmd); + DMERR("ioctl interface mismatch: " + "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", + DM_VERSION_MAJOR, DM_VERSION_MINOR, + DM_VERSION_PATCHLEVEL, + version[0], version[1], version[2], cmd); r = -EINVAL; } @@ -1927,11 +1927,11 @@ static int validate_params(uint cmd, struct dm_ioctl *param) if (cmd == DM_DEV_CREATE_CMD) { if (!*param->name) { - DMWARN("name not supplied when creating device"); + DMERR("name not supplied when creating device"); return -EINVAL; } } else if (*param->uuid && *param->name) { - DMWARN("only supply one of name or uuid, cmd(%u)", cmd); + DMERR("only supply one of name or uuid, cmd(%u)", cmd); return -EINVAL; } @@ -1978,7 +1978,7 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us fn = lookup_ioctl(cmd, &ioctl_flags); if (!fn) { - DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); + DMERR("dm_ctl_ioctl: unknown command 0x%x", command); return -ENOTTY; } @@ -2203,7 +2203,7 @@ int __init dm_early_create(struct dm_ioctl *dmi, (sector_t) spec_array[i]->length, target_params_array[i]); if (r) { - DMWARN("error adding target to table"); + DMERR("error adding target to table"); goto err_destroy_table; } } @@ -2216,7 +2216,7 @@ int __init dm_early_create(struct dm_ioctl *dmi, /* setup md->queue to reflect md's type (may block) */ r = dm_setup_md_queue(md, t); if (r) { - DMWARN("unable to set up device queue for new table."); + DMERR("unable to set up device queue for new table."); goto err_destroy_table; } diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 20fd688f72e7..178e13a5b059 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -875,6 +875,7 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev); limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev); limits->io_min = limits->physical_block_size; + limits->dma_alignment = limits->logical_block_size - 1; } #if IS_ENABLED(CONFIG_FS_DAX) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c640be453313..54263679a7b1 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2529,7 +2529,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) * of the "sync" directive. * * With reshaping capability added, we must ensure that - * that the "sync" directive is disallowed during the reshape. + * the "sync" directive is disallowed during the reshape. */ if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) continue; @@ -2590,7 +2590,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) /* * Adjust data_offset and new_data_offset on all disk members of @rs - * for out of place reshaping if requested by contructor + * for out of place reshaping if requested by constructor * * We need free space at the beginning of each raid disk for forward * and at the end for backward reshapes which userspace has to provide diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 3001b10a3fbf..a41209a43506 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -238,7 +238,7 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped) dm_requeue_original_request(tio, true); break; default: - DMWARN("unimplemented target endio return value: %d", r); + DMCRIT("unimplemented target endio return value: %d", r); BUG(); } } @@ -409,7 +409,7 @@ static int map_request(struct dm_rq_target_io *tio) dm_kill_unmapped_request(rq, BLK_STS_IOERR); break; default: - DMWARN("unimplemented target map return value: %d", r); + DMCRIT("unimplemented target map return value: %d", r); BUG(); } diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 8326f9fe0e91..f105a71915ab 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -1220,7 +1220,7 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv, return 2; /* this wasn't a stats message */ if (r == -EINVAL) - DMWARN("Invalid parameters for message %s", argv[0]); + DMCRIT("Invalid parameters for message %s", argv[0]); return r; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d8034ff0cb24..8541d5688f3a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -234,12 +234,12 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, return 0; if ((start >= dev_size) || (start + len > dev_size)) { - DMWARN("%s: %pg too small for target: " - "start=%llu, len=%llu, dev_size=%llu", - dm_device_name(ti->table->md), bdev, - (unsigned long long)start, - (unsigned long long)len, - (unsigned long long)dev_size); + DMERR("%s: %pg too small for target: " + "start=%llu, len=%llu, dev_size=%llu", + dm_device_name(ti->table->md), bdev, + (unsigned long long)start, + (unsigned long long)len, + (unsigned long long)dev_size); return 1; } @@ -251,10 +251,10 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, unsigned int zone_sectors = bdev_zone_sectors(bdev); if (start & (zone_sectors - 1)) { - DMWARN("%s: start=%llu not aligned to h/w zone size %u of %pg", - dm_device_name(ti->table->md), - (unsigned long long)start, - zone_sectors, bdev); + DMERR("%s: start=%llu not aligned to h/w zone size %u of %pg", + dm_device_name(ti->table->md), + (unsigned long long)start, + zone_sectors, bdev); return 1; } @@ -268,10 +268,10 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, * the sector range. */ if (len & (zone_sectors - 1)) { - DMWARN("%s: len=%llu not aligned to h/w zone size %u of %pg", - dm_device_name(ti->table->md), - (unsigned long long)len, - zone_sectors, bdev); + DMERR("%s: len=%llu not aligned to h/w zone size %u of %pg", + dm_device_name(ti->table->md), + (unsigned long long)len, + zone_sectors, bdev); return 1; } } @@ -280,20 +280,20 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, return 0; if (start & (logical_block_size_sectors - 1)) { - DMWARN("%s: start=%llu not aligned to h/w " - "logical block size %u of %pg", - dm_device_name(ti->table->md), - (unsigned long long)start, - limits->logical_block_size, bdev); + DMERR("%s: start=%llu not aligned to h/w " + "logical block size %u of %pg", + dm_device_name(ti->table->md), + (unsigned long long)start, + limits->logical_block_size, bdev); return 1; } if (len & (logical_block_size_sectors - 1)) { - DMWARN("%s: len=%llu not aligned to h/w " - "logical block size %u of %pg", - dm_device_name(ti->table->md), - (unsigned long long)len, - limits->logical_block_size, bdev); + DMERR("%s: len=%llu not aligned to h/w " + "logical block size %u of %pg", + dm_device_name(ti->table->md), + (unsigned long long)len, + limits->logical_block_size, bdev); return 1; } @@ -434,8 +434,8 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) } } if (!found) { - DMWARN("%s: device %s not in table devices list", - dm_device_name(ti->table->md), d->name); + DMERR("%s: device %s not in table devices list", + dm_device_name(ti->table->md), d->name); return; } if (refcount_dec_and_test(&dd->count)) { @@ -618,12 +618,12 @@ static int validate_hardware_logical_block_alignment(struct dm_table *t, } if (remaining) { - DMWARN("%s: table line %u (start sect %llu len %llu) " - "not aligned to h/w logical block size %u", - dm_device_name(t->md), i, - (unsigned long long) ti->begin, - (unsigned long long) ti->len, - limits->logical_block_size); + DMERR("%s: table line %u (start sect %llu len %llu) " + "not aligned to h/w logical block size %u", + dm_device_name(t->md), i, + (unsigned long long) ti->begin, + (unsigned long long) ti->len, + limits->logical_block_size); return -EINVAL; } @@ -1008,7 +1008,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * struct dm_md_mempools *pools; if (unlikely(type == DM_TYPE_NONE)) { - DMWARN("no table type is set, can't allocate mempools"); + DMERR("no table type is set, can't allocate mempools"); return -EINVAL; } @@ -1112,7 +1112,7 @@ static bool integrity_profile_exists(struct gendisk *disk) * Get a disk whose integrity profile reflects the table's profile. * Returns NULL if integrity support was inconsistent or unavailable. */ -static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t) +static struct gendisk *dm_table_get_integrity_disk(struct dm_table *t) { struct list_head *devices = dm_table_get_devices(t); struct dm_dev_internal *dd = NULL; @@ -1185,10 +1185,10 @@ static int dm_table_register_integrity(struct dm_table *t) * profile the new profile should not conflict. */ if (blk_integrity_compare(dm_disk(md), template_disk) < 0) { - DMWARN("%s: conflict with existing integrity profile: " - "%s profile mismatch", - dm_device_name(t->md), - template_disk->disk_name); + DMERR("%s: conflict with existing integrity profile: " + "%s profile mismatch", + dm_device_name(t->md), + template_disk->disk_name); return 1; } @@ -1215,7 +1215,7 @@ static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev, struct dm_keyslot_evict_args *args = data; int err; - err = blk_crypto_evict_key(bdev_get_queue(dev->bdev), args->key); + err = blk_crypto_evict_key(dev->bdev, args->key); if (!args->err) args->err = err; /* Always try to evict the key from all devices. */ @@ -1327,7 +1327,7 @@ static int dm_table_construct_crypto_profile(struct dm_table *t) if (t->md->queue && !blk_crypto_has_capabilities(profile, t->md->queue->crypto_profile)) { - DMWARN("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!"); + DMERR("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!"); dm_destroy_crypto_profile(profile); return -EINVAL; } diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index a27395c8621f..6bcc4c4786d8 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -724,6 +724,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd) goto bad_cleanup_data_sm; } + /* + * For pool metadata opening process, root setting is redundant + * because it will be set again in __begin_transaction(). But dm + * pool aborting process really needs to get last transaction's + * root to avoid accessing broken btree. + */ + pmd->root = le64_to_cpu(disk_super->data_mapping_root); + pmd->details_root = le64_to_cpu(disk_super->device_details_root); + __setup_btree_details(pmd); dm_bm_unlock(sblock); @@ -776,13 +785,15 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f return r; } -static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) +static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd, + bool destroy_bm) { dm_sm_destroy(pmd->data_sm); dm_sm_destroy(pmd->metadata_sm); dm_tm_destroy(pmd->nb_tm); dm_tm_destroy(pmd->tm); - dm_block_manager_destroy(pmd->bm); + if (destroy_bm) + dm_block_manager_destroy(pmd->bm); } static int __begin_transaction(struct dm_pool_metadata *pmd) @@ -989,7 +1000,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) } pmd_write_unlock(pmd); if (!pmd->fail_io) - __destroy_persistent_data_objects(pmd); + __destroy_persistent_data_objects(pmd, true); kfree(pmd); return 0; @@ -1860,19 +1871,52 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) { int r = -EINVAL; + struct dm_block_manager *old_bm = NULL, *new_bm = NULL; + + /* fail_io is double-checked with pmd->root_lock held below */ + if (unlikely(pmd->fail_io)) + return r; + + /* + * Replacement block manager (new_bm) is created and old_bm destroyed outside of + * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of + * shrinker associated with the block manager's bufio client vs pmd root_lock). + * - must take shrinker_rwsem without holding pmd->root_lock + */ + new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, + THIN_MAX_CONCURRENT_LOCKS); pmd_write_lock(pmd); - if (pmd->fail_io) + if (pmd->fail_io) { + pmd_write_unlock(pmd); goto out; + } __set_abort_with_changes_flags(pmd); - __destroy_persistent_data_objects(pmd); - r = __create_persistent_data_objects(pmd, false); + __destroy_persistent_data_objects(pmd, false); + old_bm = pmd->bm; + if (IS_ERR(new_bm)) { + DMERR("could not create block manager during abort"); + pmd->bm = NULL; + r = PTR_ERR(new_bm); + goto out_unlock; + } + + pmd->bm = new_bm; + r = __open_or_format_metadata(pmd, false); + if (r) { + pmd->bm = NULL; + goto out_unlock; + } + new_bm = NULL; +out_unlock: if (r) pmd->fail_io = true; - -out: pmd_write_unlock(pmd); + dm_block_manager_destroy(old_bm); +out: + if (new_bm && !IS_ERR(new_bm)) + dm_block_manager_destroy(new_bm); return r; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e76c96c760a9..64cfcf46881d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -410,7 +410,7 @@ static void end_discard(struct discard_op *op, int r) * need to wait for the chain to complete. */ bio_chain(op->bio, op->parent_bio); - bio_set_op_attrs(op->bio, REQ_OP_DISCARD, 0); + op->bio->bi_opf = REQ_OP_DISCARD; submit_bio(op->bio); } @@ -2889,6 +2889,8 @@ static void __pool_destroy(struct pool *pool) dm_bio_prison_destroy(pool->prison); dm_kcopyd_client_destroy(pool->copier); + cancel_delayed_work_sync(&pool->waker); + cancel_delayed_work_sync(&pool->no_space_timeout); if (pool->wq) destroy_workqueue(pool->wq); @@ -3540,20 +3542,28 @@ static int pool_preresume(struct dm_target *ti) */ r = bind_control_target(pool, ti); if (r) - return r; + goto out; r = maybe_resize_data_dev(ti, &need_commit1); if (r) - return r; + goto out; r = maybe_resize_metadata_dev(ti, &need_commit2); if (r) - return r; + goto out; if (need_commit1 || need_commit2) (void) commit(pool); +out: + /* + * When a thin-pool is PM_FAIL, it cannot be rebuilt if + * bio is in deferred list. Therefore need to return 0 + * to allow pool_resume() to flush IO. + */ + if (r && get_pool_mode(pool) == PM_FAIL) + r = 0; - return 0; + return r; } static void pool_suspend_active_thins(struct pool *pool) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 8a00cc42e498..ccf5b852fbf7 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1401,14 +1401,16 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) /* WQ_UNBOUND greatly improves performance when running on ramdisk */ wq_flags = WQ_MEM_RECLAIM | WQ_UNBOUND; - if (v->use_tasklet) { - /* - * Allow verify_wq to preempt softirq since verification in - * tasklet will fall-back to using it for error handling - * (or if the bufio cache doesn't have required hashes). - */ - wq_flags |= WQ_HIGHPRI; - } + /* + * Using WQ_HIGHPRI improves throughput and completion latency by + * reducing wait times when reading from a dm-verity device. + * + * Also as required for the "try_verify_in_tasklet" feature: WQ_HIGHPRI + * allows verify_wq to preempt softirq since verification in tasklet + * will fall-back to using it for error handling (or if the bufio cache + * doesn't have required hashes). + */ + wq_flags |= WQ_HIGHPRI; v->verify_wq = alloc_workqueue("kverityd", wq_flags, num_online_cpus()); if (!v->verify_wq) { ti->error = "Cannot allocate workqueue"; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 60549b65c799..e1ea3a7bd9d9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -732,28 +732,48 @@ static char *_dm_claim_ptr = "I belong to device-mapper"; /* * Open a table device so we can use it as a map destination. */ -static int open_table_device(struct table_device *td, dev_t dev, - struct mapped_device *md) +static struct table_device *open_table_device(struct mapped_device *md, + dev_t dev, fmode_t mode) { + struct table_device *td; struct block_device *bdev; u64 part_off; int r; - BUG_ON(td->dm_dev.bdev); + td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); + if (!td) + return ERR_PTR(-ENOMEM); + refcount_set(&td->count, 1); - bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); + bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr); + if (IS_ERR(bdev)) { + r = PTR_ERR(bdev); + goto out_free_td; + } - r = bd_link_disk_holder(bdev, dm_disk(md)); - if (r) { - blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL); - return r; + /* + * We can be called before the dm disk is added. In that case we can't + * register the holder relation here. It will be done once add_disk was + * called. + */ + if (md->disk->slave_dir) { + r = bd_link_disk_holder(bdev, md->disk); + if (r) + goto out_blkdev_put; } + td->dm_dev.mode = mode; td->dm_dev.bdev = bdev; td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, NULL, NULL); - return 0; + format_dev_t(td->dm_dev.name, dev); + list_add(&td->list, &md->table_devices); + return td; + +out_blkdev_put: + blkdev_put(bdev, mode | FMODE_EXCL); +out_free_td: + kfree(td); + return ERR_PTR(r); } /* @@ -761,14 +781,12 @@ static int open_table_device(struct table_device *td, dev_t dev, */ static void close_table_device(struct table_device *td, struct mapped_device *md) { - if (!td->dm_dev.bdev) - return; - - bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); + if (md->disk->slave_dir) + bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); put_dax(td->dm_dev.dax_dev); - td->dm_dev.bdev = NULL; - td->dm_dev.dax_dev = NULL; + list_del(&td->list); + kfree(td); } static struct table_device *find_table_device(struct list_head *l, dev_t dev, @@ -786,31 +804,16 @@ static struct table_device *find_table_device(struct list_head *l, dev_t dev, int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, struct dm_dev **result) { - int r; struct table_device *td; mutex_lock(&md->table_devices_lock); td = find_table_device(&md->table_devices, dev, mode); if (!td) { - td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); - if (!td) { + td = open_table_device(md, dev, mode); + if (IS_ERR(td)) { mutex_unlock(&md->table_devices_lock); - return -ENOMEM; + return PTR_ERR(td); } - - td->dm_dev.mode = mode; - td->dm_dev.bdev = NULL; - - if ((r = open_table_device(td, dev, md))) { - mutex_unlock(&md->table_devices_lock); - kfree(td); - return r; - } - - format_dev_t(td->dm_dev.name, dev); - - refcount_set(&td->count, 1); - list_add(&td->list, &md->table_devices); } else { refcount_inc(&td->count); } @@ -825,27 +828,11 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) struct table_device *td = container_of(d, struct table_device, dm_dev); mutex_lock(&md->table_devices_lock); - if (refcount_dec_and_test(&td->count)) { + if (refcount_dec_and_test(&td->count)) close_table_device(td, md); - list_del(&td->list); - kfree(td); - } mutex_unlock(&md->table_devices_lock); } -static void free_table_devices(struct list_head *devices) -{ - struct list_head *tmp, *next; - - list_for_each_safe(tmp, next, devices) { - struct table_device *td = list_entry(tmp, struct table_device, list); - - DMWARN("dm_destroy: %s still exists with %d references", - td->dm_dev.name, refcount_read(&td->count)); - kfree(td); - } -} - /* * Get the geometry associated with a dm device */ @@ -864,7 +851,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; if (geo->start > sz) { - DMWARN("Start sector is beyond the geometry limits."); + DMERR("Start sector is beyond the geometry limits."); return -EINVAL; } @@ -1149,7 +1136,7 @@ static void clone_endio(struct bio *bio) /* The target will handle the io */ return; default: - DMWARN("unimplemented target endio return value: %d", r); + DMCRIT("unimplemented target endio return value: %d", r); BUG(); } } @@ -1455,7 +1442,7 @@ static void __map_bio(struct bio *clone) dm_io_dec_pending(io, BLK_STS_DM_REQUEUE); break; default: - DMWARN("unimplemented target map return value: %d", r); + DMCRIT("unimplemented target map return value: %d", r); BUG(); } } @@ -1972,8 +1959,21 @@ static void cleanup_mapped_device(struct mapped_device *md) md->disk->private_data = NULL; spin_unlock(&_minor_lock); if (dm_get_md_type(md) != DM_TYPE_NONE) { + struct table_device *td; + dm_sysfs_exit(md); + list_for_each_entry(td, &md->table_devices, list) { + bd_unlink_disk_holder(td->dm_dev.bdev, + md->disk); + } + + /* + * Hold lock to make sure del_gendisk() won't concurrent + * with open/close_table_device(). + */ + mutex_lock(&md->table_devices_lock); del_gendisk(md->disk); + mutex_unlock(&md->table_devices_lock); } dm_queue_destroy_crypto_profile(md->queue); put_disk(md->disk); @@ -2005,7 +2005,7 @@ static struct mapped_device *alloc_dev(int minor) md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id); if (!md) { - DMWARN("unable to allocate device, out of memory."); + DMERR("unable to allocate device, out of memory."); return NULL; } @@ -2065,7 +2065,6 @@ static struct mapped_device *alloc_dev(int minor) md->disk->minors = 1; md->disk->flags |= GENHD_FL_NO_PART; md->disk->fops = &dm_blk_dops; - md->disk->queue = md->queue; md->disk->private_data = md; sprintf(md->disk->disk_name, "dm-%d", minor); @@ -2123,7 +2122,7 @@ static void free_dev(struct mapped_device *md) cleanup_mapped_device(md); - free_table_devices(&md->table_devices); + WARN_ON_ONCE(!list_empty(&md->table_devices)); dm_stats_cleanup(&md->stats); free_minor(minor); @@ -2306,6 +2305,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) { enum dm_queue_mode type = dm_table_get_type(t); struct queue_limits limits; + struct table_device *td; int r; switch (type) { @@ -2334,17 +2334,40 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) if (r) return r; + /* + * Hold lock to make sure add_disk() and del_gendisk() won't concurrent + * with open_table_device() and close_table_device(). + */ + mutex_lock(&md->table_devices_lock); r = add_disk(md->disk); + mutex_unlock(&md->table_devices_lock); if (r) return r; - r = dm_sysfs_init(md); - if (r) { - del_gendisk(md->disk); - return r; + /* + * Register the holder relationship for devices added before the disk + * was live. + */ + list_for_each_entry(td, &md->table_devices, list) { + r = bd_link_disk_holder(td->dm_dev.bdev, md->disk); + if (r) + goto out_undo_holders; } + + r = dm_sysfs_init(md); + if (r) + goto out_undo_holders; + md->type = type; return 0; + +out_undo_holders: + list_for_each_entry_continue_reverse(td, &md->table_devices, list) + bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); + mutex_lock(&md->table_devices_lock); + del_gendisk(md->disk); + mutex_unlock(&md->table_devices_lock); + return r; } struct mapped_device *dm_get_md(dev_t dev) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index bf6dffadbe6f..e7cc6ba1b657 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -486,7 +486,7 @@ void md_bitmap_print_sb(struct bitmap *bitmap) sb = kmap_atomic(bitmap->storage.sb_page); pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); - pr_debug(" version: %d\n", le32_to_cpu(sb->version)); + pr_debug(" version: %u\n", le32_to_cpu(sb->version)); pr_debug(" uuid: %08x.%08x.%08x.%08x\n", le32_to_cpu(*(__le32 *)(sb->uuid+0)), le32_to_cpu(*(__le32 *)(sb->uuid+4)), @@ -497,11 +497,11 @@ void md_bitmap_print_sb(struct bitmap *bitmap) pr_debug("events cleared: %llu\n", (unsigned long long) le64_to_cpu(sb->events_cleared)); pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); - pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); - pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); + pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); + pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); pr_debug(" sync size: %llu KB\n", (unsigned long long)le64_to_cpu(sb->sync_size)/2); - pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); + pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); kunmap_atomic(sb); } @@ -2105,7 +2105,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bytes = DIV_ROUND_UP(chunks, 8); if (!bitmap->mddev->bitmap_info.external) bytes += sizeof(bitmap_super_t); - } while (bytes > (space << 9)); + } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < + (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); } else chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; @@ -2150,7 +2151,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->counts.missing_pages = pages; bitmap->counts.chunkshift = chunkshift; bitmap->counts.chunks = chunks; - bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift + + bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + BITMAP_BLOCK_SHIFT); blocks = min(old_counts.chunks << old_counts.chunkshift, @@ -2176,8 +2177,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->counts.missing_pages = old_counts.pages; bitmap->counts.chunkshift = old_counts.chunkshift; bitmap->counts.chunks = old_counts.chunks; - bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + - BITMAP_BLOCK_SHIFT); + bitmap->mddev->bitmap_info.chunksize = + 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); blocks = old_counts.chunks << old_counts.chunkshift; pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); break; @@ -2195,20 +2196,23 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (set) { bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); - if (*bmc_new == 0) { - /* need to set on-disk bits too. */ - sector_t end = block + new_blocks; - sector_t start = block >> chunkshift; - start <<= chunkshift; - while (start < end) { - md_bitmap_file_set_bit(bitmap, block); - start += 1 << chunkshift; + if (bmc_new) { + if (*bmc_new == 0) { + /* need to set on-disk bits too. */ + sector_t end = block + new_blocks; + sector_t start = block >> chunkshift; + + start <<= chunkshift; + while (start < end) { + md_bitmap_file_set_bit(bitmap, block); + start += 1 << chunkshift; + } + *bmc_new = 2; + md_bitmap_count_page(&bitmap->counts, block, 1); + md_bitmap_set_pending(&bitmap->counts, block); } - *bmc_new = 2; - md_bitmap_count_page(&bitmap->counts, block, 1); - md_bitmap_set_pending(&bitmap->counts, block); + *bmc_new |= NEEDED_MASK; } - *bmc_new |= NEEDED_MASK; if (new_blocks < old_blocks) old_blocks = new_blocks; } @@ -2534,6 +2538,9 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len) if (csize < 512 || !is_power_of_2(csize)) return -EINVAL; + if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * + sizeof(((bitmap_super_t *)0)->chunksize)))) + return -EOVERFLOW; mddev->bitmap_info.chunksize = csize; return len; } diff --git a/drivers/md/md.c b/drivers/md/md.c index a467b492d4ad..775f1dde190a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -93,6 +93,18 @@ static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *this); static void mddev_detach(struct mddev *mddev); +enum md_ro_state { + MD_RDWR, + MD_RDONLY, + MD_AUTO_READ, + MD_MAX_STATE +}; + +static bool md_is_rdwr(struct mddev *mddev) +{ + return (mddev->ro == MD_RDWR); +} + /* * Default number of read corrections we'll attempt on an rdev * before ejecting it from the array. We divide the read error @@ -444,7 +456,7 @@ static void md_submit_bio(struct bio *bio) bio = bio_split_to_limits(bio); - if (mddev->ro == 1 && unlikely(rw == WRITE)) { + if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) bio->bi_status = BLK_STS_IOERR; bio_endio(bio); @@ -509,13 +521,14 @@ static void md_end_flush(struct bio *bio) struct md_rdev *rdev = bio->bi_private; struct mddev *mddev = rdev->mddev; + bio_put(bio); + rdev_dec_pending(rdev, mddev); if (atomic_dec_and_test(&mddev->flush_pending)) { /* The pre-request flush has finished */ queue_work(md_wq, &mddev->flush_work); } - bio_put(bio); } static void md_submit_flush_data(struct work_struct *ws); @@ -913,10 +926,12 @@ static void super_written(struct bio *bio) } else clear_bit(LastDev, &rdev->flags); + bio_put(bio); + + rdev_dec_pending(rdev, mddev); + if (atomic_dec_and_test(&mddev->pending_writes)) wake_up(&mddev->sb_wait); - rdev_dec_pending(rdev, mddev); - bio_put(bio); } void md_super_write(struct mddev *mddev, struct md_rdev *rdev, @@ -2453,7 +2468,22 @@ static void rdev_delayed_delete(struct work_struct *ws) kobject_put(&rdev->kobj); } -static void unbind_rdev_from_array(struct md_rdev *rdev) +void md_autodetect_dev(dev_t dev); + +static void export_rdev(struct md_rdev *rdev) +{ + pr_debug("md: export_rdev(%pg)\n", rdev->bdev); + md_rdev_clear(rdev); +#ifndef MODULE + if (test_bit(AutoDetected, &rdev->flags)) + md_autodetect_dev(rdev->bdev->bd_dev); +#endif + blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + rdev->bdev = NULL; + kobject_put(&rdev->kobj); +} + +static void md_kick_rdev_from_array(struct md_rdev *rdev) { bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); list_del_rcu(&rdev->same_set); @@ -2476,56 +2506,8 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) INIT_WORK(&rdev->del_work, rdev_delayed_delete); kobject_get(&rdev->kobj); queue_work(md_rdev_misc_wq, &rdev->del_work); -} - -/* - * prevent the device from being mounted, repartitioned or - * otherwise reused by a RAID array (or any other kernel - * subsystem), by bd_claiming the device. - */ -static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared) -{ - int err = 0; - struct block_device *bdev; - - bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - shared ? (struct md_rdev *)lock_rdev : rdev); - if (IS_ERR(bdev)) { - pr_warn("md: could not open device unknown-block(%u,%u).\n", - MAJOR(dev), MINOR(dev)); - return PTR_ERR(bdev); - } - rdev->bdev = bdev; - return err; -} - -static void unlock_rdev(struct md_rdev *rdev) -{ - struct block_device *bdev = rdev->bdev; - rdev->bdev = NULL; - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -} - -void md_autodetect_dev(dev_t dev); - -static void export_rdev(struct md_rdev *rdev) -{ - pr_debug("md: export_rdev(%pg)\n", rdev->bdev); - md_rdev_clear(rdev); -#ifndef MODULE - if (test_bit(AutoDetected, &rdev->flags)) - md_autodetect_dev(rdev->bdev->bd_dev); -#endif - unlock_rdev(rdev); - kobject_put(&rdev->kobj); -} - -void md_kick_rdev_from_array(struct md_rdev *rdev) -{ - unbind_rdev_from_array(rdev); export_rdev(rdev); } -EXPORT_SYMBOL_GPL(md_kick_rdev_from_array); static void export_array(struct mddev *mddev) { @@ -2639,7 +2621,7 @@ void md_update_sb(struct mddev *mddev, int force_change) int any_badblocks_changed = 0; int ret = -1; - if (mddev->ro) { + if (!md_is_rdwr(mddev)) { if (force_change) set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); return; @@ -3660,9 +3642,10 @@ EXPORT_SYMBOL_GPL(md_rdev_init); */ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor) { - int err; + static struct md_rdev *claim_rdev; /* just for claiming the bdev */ struct md_rdev *rdev; sector_t size; + int err; rdev = kzalloc(sizeof(*rdev), GFP_KERNEL); if (!rdev) @@ -3670,14 +3653,20 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe err = md_rdev_init(rdev); if (err) - goto abort_free; + goto out_free_rdev; err = alloc_disk_sb(rdev); if (err) - goto abort_free; + goto out_clear_rdev; - err = lock_rdev(rdev, newdev, super_format == -2); - if (err) - goto abort_free; + rdev->bdev = blkdev_get_by_dev(newdev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL, + super_format == -2 ? claim_rdev : rdev); + if (IS_ERR(rdev->bdev)) { + pr_warn("md: could not open device unknown-block(%u,%u).\n", + MAJOR(newdev), MINOR(newdev)); + err = PTR_ERR(rdev->bdev); + goto out_clear_rdev; + } kobject_init(&rdev->kobj, &rdev_ktype); @@ -3686,7 +3675,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe pr_warn("md: %pg has zero or unknown size, marking faulty!\n", rdev->bdev); err = -EINVAL; - goto abort_free; + goto out_blkdev_put; } if (super_format >= 0) { @@ -3696,21 +3685,22 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe pr_warn("md: %pg does not have a valid v%d.%d superblock, not importing!\n", rdev->bdev, super_format, super_minor); - goto abort_free; + goto out_blkdev_put; } if (err < 0) { pr_warn("md: could not read %pg's sb, not importing!\n", rdev->bdev); - goto abort_free; + goto out_blkdev_put; } } return rdev; -abort_free: - if (rdev->bdev) - unlock_rdev(rdev); +out_blkdev_put: + blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); +out_clear_rdev: md_rdev_clear(rdev); +out_free_rdev: kfree(rdev); return ERR_PTR(err); } @@ -3901,7 +3891,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) goto out_unlock; } rv = -EROFS; - if (mddev->ro) + if (!md_is_rdwr(mddev)) goto out_unlock; /* request to change the personality. Need to ensure: @@ -4107,7 +4097,7 @@ layout_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) { if (mddev->pers->check_reshape == NULL) err = -EBUSY; - else if (mddev->ro) + else if (!md_is_rdwr(mddev)) err = -EROFS; else { mddev->new_layout = n; @@ -4216,7 +4206,7 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) { if (mddev->pers->check_reshape == NULL) err = -EBUSY; - else if (mddev->ro) + else if (!md_is_rdwr(mddev)) err = -EROFS; else { mddev->new_chunk_sectors = n >> 9; @@ -4339,13 +4329,13 @@ array_state_show(struct mddev *mddev, char *page) if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) { switch(mddev->ro) { - case 1: + case MD_RDONLY: st = readonly; break; - case 2: + case MD_AUTO_READ: st = read_auto; break; - case 0: + case MD_RDWR: spin_lock(&mddev->lock); if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) st = write_pending; @@ -4381,7 +4371,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) int err = 0; enum array_state st = match_word(buf, array_states); - if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) { + if (mddev->pers && (st == active || st == clean) && + mddev->ro != MD_RDONLY) { /* don't take reconfig_mutex when toggling between * clean and active */ @@ -4425,23 +4416,23 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) err = md_set_readonly(mddev, NULL); else { - mddev->ro = 1; + mddev->ro = MD_RDONLY; set_disk_ro(mddev->gendisk, 1); err = do_md_run(mddev); } break; case read_auto: if (mddev->pers) { - if (mddev->ro == 0) + if (md_is_rdwr(mddev)) err = md_set_readonly(mddev, NULL); - else if (mddev->ro == 1) + else if (mddev->ro == MD_RDONLY) err = restart_array(mddev); if (err == 0) { - mddev->ro = 2; + mddev->ro = MD_AUTO_READ; set_disk_ro(mddev->gendisk, 0); } } else { - mddev->ro = 2; + mddev->ro = MD_AUTO_READ; err = do_md_run(mddev); } break; @@ -4466,7 +4457,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) wake_up(&mddev->sb_wait); err = 0; } else { - mddev->ro = 0; + mddev->ro = MD_RDWR; set_disk_ro(mddev->gendisk, 0); err = do_md_run(mddev); } @@ -4765,7 +4756,7 @@ action_show(struct mddev *mddev, char *page) if (test_bit(MD_RECOVERY_FROZEN, &recovery)) type = "frozen"; else if (test_bit(MD_RECOVERY_RUNNING, &recovery) || - (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) { + (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) { if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) type = "reshape"; else if (test_bit(MD_RECOVERY_SYNC, &recovery)) { @@ -4851,11 +4842,11 @@ action_store(struct mddev *mddev, const char *page, size_t len) set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); } - if (mddev->ro == 2) { + if (mddev->ro == MD_AUTO_READ) { /* A write to sync_action is enough to justify * canceling read-auto mode */ - mddev->ro = 0; + mddev->ro = MD_RDWR; md_wakeup_thread(mddev->sync_thread); } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -5083,8 +5074,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len) goto out_unlock; err = -EBUSY; - if (max < mddev->resync_max && - mddev->ro == 0 && + if (max < mddev->resync_max && md_is_rdwr(mddev) && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) goto out_unlock; @@ -5813,8 +5803,8 @@ int md_run(struct mddev *mddev) continue; sync_blockdev(rdev->bdev); invalidate_bdev(rdev->bdev); - if (mddev->ro != 1 && rdev_read_only(rdev)) { - mddev->ro = 1; + if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) { + mddev->ro = MD_RDONLY; if (mddev->gendisk) set_disk_ro(mddev->gendisk, 1); } @@ -5917,8 +5907,8 @@ int md_run(struct mddev *mddev) mddev->ok_start_degraded = start_dirty_degraded; - if (start_readonly && mddev->ro == 0) - mddev->ro = 2; /* read-only, but switch on first write */ + if (start_readonly && md_is_rdwr(mddev)) + mddev->ro = MD_AUTO_READ; /* read-only, but switch on first write */ err = pers->run(mddev); if (err) @@ -5996,8 +5986,8 @@ int md_run(struct mddev *mddev) mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action"); mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed"); mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded"); - } else if (mddev->ro == 2) /* auto-readonly not meaningful */ - mddev->ro = 0; + } else if (mddev->ro == MD_AUTO_READ) + mddev->ro = MD_RDWR; atomic_set(&mddev->max_corr_read_errors, MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); @@ -6015,7 +6005,7 @@ int md_run(struct mddev *mddev) if (rdev->raid_disk >= 0) sysfs_link_rdev(mddev, rdev); /* failure here is OK */ - if (mddev->degraded && !mddev->ro) + if (mddev->degraded && md_is_rdwr(mddev)) /* This ensures that recovering status is reported immediately * via sysfs - until a lack of spares is confirmed. */ @@ -6105,7 +6095,7 @@ static int restart_array(struct mddev *mddev) return -ENXIO; if (!mddev->pers) return -EINVAL; - if (!mddev->ro) + if (md_is_rdwr(mddev)) return -EBUSY; rcu_read_lock(); @@ -6124,7 +6114,7 @@ static int restart_array(struct mddev *mddev) return -EROFS; mddev->safemode = 0; - mddev->ro = 0; + mddev->ro = MD_RDWR; set_disk_ro(disk, 0); pr_debug("md: %s switched to read-write mode.\n", mdname(mddev)); /* Kick recovery or resync if necessary */ @@ -6151,7 +6141,7 @@ static void md_clean(struct mddev *mddev) mddev->clevel[0] = 0; mddev->flags = 0; mddev->sb_flags = 0; - mddev->ro = 0; + mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; mddev->chunk_sectors = 0; mddev->ctime = mddev->utime = 0; @@ -6203,7 +6193,7 @@ static void __md_stop_writes(struct mddev *mddev) } md_bitmap_flush(mddev); - if (mddev->ro == 0 && + if (md_is_rdwr(mddev) && ((!mddev->in_sync && !mddev_is_clustered(mddev)) || mddev->sb_flags)) { /* mark array as shutdown cleanly */ @@ -6312,9 +6302,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) __md_stop_writes(mddev); err = -ENXIO; - if (mddev->ro==1) + if (mddev->ro == MD_RDONLY) goto out; - mddev->ro = 1; + mddev->ro = MD_RDONLY; set_disk_ro(mddev->gendisk, 1); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -6371,7 +6361,7 @@ static int do_md_stop(struct mddev *mddev, int mode, return -EBUSY; } if (mddev->pers) { - if (mddev->ro) + if (!md_is_rdwr(mddev)) set_disk_ro(disk, 0); __md_stop_writes(mddev); @@ -6388,8 +6378,8 @@ static int do_md_stop(struct mddev *mddev, int mode, mutex_unlock(&mddev->open_mutex); mddev->changed = 1; - if (mddev->ro) - mddev->ro = 0; + if (!md_is_rdwr(mddev)) + mddev->ro = MD_RDWR; } else mutex_unlock(&mddev->open_mutex); /* @@ -7204,7 +7194,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || mddev->sync_thread) return -EBUSY; - if (mddev->ro) + if (!md_is_rdwr(mddev)) return -EROFS; rdev_for_each(rdev, mddev) { @@ -7234,7 +7224,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) /* change the number of raid disks */ if (mddev->pers->check_reshape == NULL) return -EINVAL; - if (mddev->ro) + if (!md_is_rdwr(mddev)) return -EROFS; if (raid_disks <= 0 || (mddev->max_disks && raid_disks >= mddev->max_disks)) @@ -7464,6 +7454,40 @@ static inline bool md_ioctl_valid(unsigned int cmd) } } +static int __md_set_array_info(struct mddev *mddev, void __user *argp) +{ + mdu_array_info_t info; + int err; + + if (!argp) + memset(&info, 0, sizeof(info)); + else if (copy_from_user(&info, argp, sizeof(info))) + return -EFAULT; + + if (mddev->pers) { + err = update_array_info(mddev, &info); + if (err) + pr_warn("md: couldn't update array info. %d\n", err); + return err; + } + + if (!list_empty(&mddev->disks)) { + pr_warn("md: array %s already has disks!\n", mdname(mddev)); + return -EBUSY; + } + + if (mddev->raid_disks) { + pr_warn("md: array %s already initialised!\n", mdname(mddev)); + return -EBUSY; + } + + err = md_set_array_info(mddev, &info); + if (err) + pr_warn("md: couldn't set array info. %d\n", err); + + return err; +} + static int md_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -7569,36 +7593,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, } if (cmd == SET_ARRAY_INFO) { - mdu_array_info_t info; - if (!arg) - memset(&info, 0, sizeof(info)); - else if (copy_from_user(&info, argp, sizeof(info))) { - err = -EFAULT; - goto unlock; - } - if (mddev->pers) { - err = update_array_info(mddev, &info); - if (err) { - pr_warn("md: couldn't update array info. %d\n", err); - goto unlock; - } - goto unlock; - } - if (!list_empty(&mddev->disks)) { - pr_warn("md: array %s already has disks!\n", mdname(mddev)); - err = -EBUSY; - goto unlock; - } - if (mddev->raid_disks) { - pr_warn("md: array %s already initialised!\n", mdname(mddev)); - err = -EBUSY; - goto unlock; - } - err = md_set_array_info(mddev, &info); - if (err) { - pr_warn("md: couldn't set array info. %d\n", err); - goto unlock; - } + err = __md_set_array_info(mddev, argp); goto unlock; } @@ -7658,26 +7653,25 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, * The remaining ioctls are changing the state of the * superblock, so we do not allow them on read-only arrays. */ - if (mddev->ro && mddev->pers) { - if (mddev->ro == 2) { - mddev->ro = 0; - sysfs_notify_dirent_safe(mddev->sysfs_state); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - /* mddev_unlock will wake thread */ - /* If a device failed while we were read-only, we - * need to make sure the metadata is updated now. - */ - if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) { - mddev_unlock(mddev); - wait_event(mddev->sb_wait, - !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) && - !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); - mddev_lock_nointr(mddev); - } - } else { + if (!md_is_rdwr(mddev) && mddev->pers) { + if (mddev->ro != MD_AUTO_READ) { err = -EROFS; goto unlock; } + mddev->ro = MD_RDWR; + sysfs_notify_dirent_safe(mddev->sysfs_state); + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + /* mddev_unlock will wake thread */ + /* If a device failed while we were read-only, we + * need to make sure the metadata is updated now. + */ + if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) { + mddev_unlock(mddev); + wait_event(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) && + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); + mddev_lock_nointr(mddev); + } } switch (cmd) { @@ -7763,11 +7757,11 @@ static int md_set_read_only(struct block_device *bdev, bool ro) * Transitioning to read-auto need only happen for arrays that call * md_write_start and which are not ready for writes yet. */ - if (!ro && mddev->ro == 1 && mddev->pers) { + if (!ro && mddev->ro == MD_RDONLY && mddev->pers) { err = restart_array(mddev); if (err) goto out_unlock; - mddev->ro = 2; + mddev->ro = MD_AUTO_READ; } out_unlock: @@ -8241,9 +8235,9 @@ static int md_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%s : %sactive", mdname(mddev), mddev->pers ? "" : "in"); if (mddev->pers) { - if (mddev->ro==1) + if (mddev->ro == MD_RDONLY) seq_printf(seq, " (read-only)"); - if (mddev->ro==2) + if (mddev->ro == MD_AUTO_READ) seq_printf(seq, " (auto-read-only)"); seq_printf(seq, " %s", mddev->pers->name); } @@ -8502,10 +8496,10 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) if (bio_data_dir(bi) != WRITE) return true; - BUG_ON(mddev->ro == 1); - if (mddev->ro == 2) { + BUG_ON(mddev->ro == MD_RDONLY); + if (mddev->ro == MD_AUTO_READ) { /* need to switch to read/write */ - mddev->ro = 0; + mddev->ro = MD_RDWR; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); @@ -8556,7 +8550,7 @@ void md_write_inc(struct mddev *mddev, struct bio *bi) { if (bio_data_dir(bi) != WRITE) return; - WARN_ON_ONCE(mddev->in_sync || mddev->ro); + WARN_ON_ONCE(mddev->in_sync || !md_is_rdwr(mddev)); percpu_ref_get(&mddev->writes_pending); } EXPORT_SYMBOL(md_write_inc); @@ -8661,7 +8655,7 @@ void md_allow_write(struct mddev *mddev) { if (!mddev->pers) return; - if (mddev->ro) + if (!md_is_rdwr(mddev)) return; if (!mddev->pers->sync_request) return; @@ -8709,7 +8703,7 @@ void md_do_sync(struct md_thread *thread) if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) || test_bit(MD_RECOVERY_WAIT, &mddev->recovery)) return; - if (mddev->ro) {/* never try to sync a read-only array */ + if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); return; } @@ -9178,9 +9172,9 @@ static int remove_and_add_spares(struct mddev *mddev, if (test_bit(Faulty, &rdev->flags)) continue; if (!test_bit(Journal, &rdev->flags)) { - if (mddev->ro && - ! (rdev->saved_raid_disk >= 0 && - !test_bit(Bitmap_sync, &rdev->flags))) + if (!md_is_rdwr(mddev) && + !(rdev->saved_raid_disk >= 0 && + !test_bit(Bitmap_sync, &rdev->flags))) continue; rdev->recovery_offset = 0; @@ -9278,7 +9272,8 @@ void md_check_recovery(struct mddev *mddev) flush_signals(current); } - if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + if (!md_is_rdwr(mddev) && + !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return; if ( ! ( (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) || @@ -9297,7 +9292,7 @@ void md_check_recovery(struct mddev *mddev) if (!mddev->external && mddev->safemode == 1) mddev->safemode = 0; - if (mddev->ro) { + if (!md_is_rdwr(mddev)) { struct md_rdev *rdev; if (!mddev->external && mddev->in_sync) /* 'Blocked' flag not needed as failed devices diff --git a/drivers/md/md.h b/drivers/md/md.h index b4e2d8b87b61..554a9026669a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -782,7 +782,6 @@ extern void mddev_resume(struct mddev *mddev); extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_update_sb(struct mddev *mddev, int force); -extern void md_kick_rdev_from_array(struct md_rdev * rdev); extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev, bool is_suspend); extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev, diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 857c49399c28..b536befd8898 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -398,7 +398,6 @@ static int raid0_run(struct mddev *mddev) blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_discard_sectors(mddev->queue, UINT_MAX); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); blk_queue_io_opt(mddev->queue, diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 05d8438cfec8..68a9e2d9985b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1321,7 +1321,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_iter.bi_sector = r1_bio->sector + mirror->rdev->data_offset; read_bio->bi_end_io = raid1_end_read_request; - bio_set_op_attrs(read_bio, op, do_sync); + read_bio->bi_opf = op | do_sync; if (test_bit(FailFast, &mirror->rdev->flags) && test_bit(R1BIO_FailFast, &r1_bio->state)) read_bio->bi_opf |= MD_FAILFAST; @@ -2254,7 +2254,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) continue; } - bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); + wbio->bi_opf = REQ_OP_WRITE; if (test_bit(FailFast, &conf->mirrors[i].rdev->flags)) wbio->bi_opf |= MD_FAILFAST; @@ -2419,7 +2419,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) GFP_NOIO, &mddev->bio_set); } - bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); + wbio->bi_opf = REQ_OP_WRITE; wbio->bi_iter.bi_sector = r1_bio->sector; wbio->bi_iter.bi_size = r1_bio->sectors << 9; @@ -2770,7 +2770,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (i < conf->raid_disks) still_degraded = 1; } else if (!test_bit(In_sync, &rdev->flags)) { - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; bio->bi_end_io = end_sync_write; write_targets ++; } else { @@ -2797,7 +2797,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (disk < 0) disk = i; } - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; bio->bi_end_io = end_sync_read; read_targets++; } else if (!test_bit(WriteErrorSeen, &rdev->flags) && @@ -2809,7 +2809,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, * if we are doing resync or repair. Otherwise, leave * this device alone for this sync request. */ - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; bio->bi_end_io = end_sync_write; write_targets++; } @@ -3159,6 +3159,7 @@ static int raid1_run(struct mddev *mddev) * RAID1 needs at least one disk in active */ if (conf->raid_disks - mddev->degraded < 1) { + md_unregister_thread(&conf->thread); ret = -EINVAL; goto abort; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3aa8b6e11d58..6c66357f92f5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1254,7 +1254,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + choose_data_offset(r10_bio, rdev); read_bio->bi_end_io = raid10_end_read_request; - bio_set_op_attrs(read_bio, op, do_sync); + read_bio->bi_opf = op | do_sync; if (test_bit(FailFast, &rdev->flags) && test_bit(R10BIO_FailFast, &r10_bio->state)) read_bio->bi_opf |= MD_FAILFAST; @@ -1301,7 +1301,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + choose_data_offset(r10_bio, rdev)); mbio->bi_end_io = raid10_end_write_request; - bio_set_op_attrs(mbio, op, do_sync | do_fua); + mbio->bi_opf = op | do_sync | do_fua; if (!replacement && test_bit(FailFast, &conf->mirrors[devnum].rdev->flags) && enough(conf, devnum)) @@ -2933,7 +2933,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); wbio->bi_iter.bi_sector = wsector + choose_data_offset(r10_bio, rdev); - bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); + wbio->bi_opf = REQ_OP_WRITE; if (submit_bio_wait(wbio) < 0) /* Failure! */ @@ -3542,7 +3542,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_read; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; from_addr = r10_bio->devs[j].addr; @@ -3567,7 +3567,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_write; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; bio->bi_iter.bi_sector = to_addr + mrdev->data_offset; bio_set_dev(bio, mrdev->bdev); @@ -3588,7 +3588,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_write; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; bio->bi_iter.bi_sector = to_addr + mreplace->data_offset; bio_set_dev(bio, mreplace->bdev); @@ -3742,7 +3742,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_read; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; @@ -3764,7 +3764,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_write; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; @@ -4145,8 +4145,6 @@ static int raid10_run(struct mddev *mddev) conf->thread = NULL; if (mddev->queue) { - blk_queue_max_discard_sectors(mddev->queue, - UINT_MAX); blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); raid10_set_io_opt(conf); @@ -4972,7 +4970,7 @@ read_more: b->bi_iter.bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; b->bi_end_io = end_reshape_write; - bio_set_op_attrs(b, REQ_OP_WRITE, 0); + b->bi_opf = REQ_OP_WRITE; b->bi_next = blist; blist = b; } diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 832d8566e165..46182b955aef 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1565,11 +1565,12 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space) if (!log) return; + + target = READ_ONCE(log->reclaim_target); do { - target = log->reclaim_target; if (new < target) return; - } while (cmpxchg(&log->reclaim_target, target, new) != target); + } while (!try_cmpxchg(&log->reclaim_target, &target, new)); md_wakeup_thread(log->reclaim_thread); } @@ -3061,7 +3062,6 @@ void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev) int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) { - struct request_queue *q = bdev_get_queue(rdev->bdev); struct r5l_log *log; int ret; @@ -3090,9 +3090,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) if (!log) return -ENOMEM; log->rdev = rdev; - - log->need_cache_flush = test_bit(QUEUE_FLAG_WC, &q->queue_flags) != 0; - + log->need_cache_flush = bdev_write_cache(rdev->bdev); log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid, sizeof(rdev->mddev->uuid)); diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 31b9157bc9ae..e495939bb3e0 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -1301,8 +1301,6 @@ static int ppl_validate_rdev(struct md_rdev *rdev) static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev) { - struct request_queue *q; - if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE + PPL_HEADER_SIZE) * 2) { log->use_multippl = true; @@ -1316,8 +1314,7 @@ static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev) } log->next_io_sector = rdev->ppl.sector; - q = bdev_get_queue(rdev->bdev); - if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + if (bdev_write_cache(rdev->bdev)) log->wb_cache_on = true; } |