aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c458
1 files changed, 364 insertions, 94 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3086da5664f3..2edbcc2d7d3f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -12,11 +12,14 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/sched/signal.h>
#include <linux/blkpg.h>
#include <linux/bio.h>
#include <linux/mempool.h>
+#include <linux/dax.h>
#include <linux/slab.h>
#include <linux/idr.h>
+#include <linux/uio.h>
#include <linux/hdreg.h>
#include <linux/delay.h>
#include <linux/wait.h>
@@ -56,12 +59,15 @@ static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
static struct workqueue_struct *deferred_remove_workqueue;
+atomic_t dm_global_event_nr = ATOMIC_INIT(0);
+DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
+
/*
* One of these is allocated per bio.
*/
struct dm_io {
struct mapped_device *md;
- int error;
+ blk_status_t status;
atomic_t io_count;
struct bio *bio;
unsigned long start_time;
@@ -91,7 +97,6 @@ static int dm_numa_node = DM_NUMA_NODE;
*/
struct dm_md_mempools {
mempool_t *io_pool;
- mempool_t *rq_pool;
struct bio_set *bs;
};
@@ -466,13 +471,16 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
if (r > 0) {
/*
- * Target determined this ioctl is being issued against
- * a logical partition of the parent bdev; so extra
- * validation is needed.
+ * Target determined this ioctl is being issued against a
+ * subset of the parent bdev; require extra privileges.
*/
- r = scsi_verify_blk_ioctl(NULL, cmd);
- if (r)
+ if (!capable(CAP_SYS_RAWIO)) {
+ DMWARN_LIMIT(
+ "%s: sending ioctl %x to DM device without required privilege.",
+ current->comm, cmd);
+ r = -ENOIOCTLCMD;
goto out;
+ }
}
r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
@@ -626,6 +634,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
}
td->dm_dev.bdev = bdev;
+ td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
return 0;
}
@@ -639,7 +648,9 @@ static void close_table_device(struct table_device *td, struct mapped_device *md
bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
+ put_dax(td->dm_dev.dax_dev);
td->dm_dev.bdev = NULL;
+ td->dm_dev.dax_dev = NULL;
}
static struct table_device *find_table_device(struct list_head *l, dev_t dev,
@@ -761,23 +772,24 @@ static int __noflush_suspending(struct mapped_device *md)
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
*/
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
{
unsigned long flags;
- int io_error;
+ blk_status_t io_error;
struct bio *bio;
struct mapped_device *md = io->md;
/* Push-back supersedes any I/O errors */
if (unlikely(error)) {
spin_lock_irqsave(&io->endio_lock, flags);
- if (!(io->error > 0 && __noflush_suspending(md)))
- io->error = error;
+ if (!(io->status == BLK_STS_DM_REQUEUE &&
+ __noflush_suspending(md)))
+ io->status = error;
spin_unlock_irqrestore(&io->endio_lock, flags);
}
if (atomic_dec_and_test(&io->io_count)) {
- if (io->error == DM_ENDIO_REQUEUE) {
+ if (io->status == BLK_STS_DM_REQUEUE) {
/*
* Target requested pushing back the I/O.
*/
@@ -786,16 +798,16 @@ static void dec_pending(struct dm_io *io, int error)
bio_list_add_head(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
- io->error = -EIO;
+ io->status = BLK_STS_IOERR;
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
- io_error = io->error;
+ io_error = io->status;
bio = io->bio;
end_io_acct(io);
free_io(md, io);
- if (io_error == DM_ENDIO_REQUEUE)
+ if (io_error == BLK_STS_DM_REQUEUE)
return;
if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -807,8 +819,7 @@ static void dec_pending(struct dm_io *io, int error)
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
- trace_block_bio_complete(md->queue, bio, io_error);
- bio->bi_error = io_error;
+ bio->bi_status = io_error;
bio_endio(bio);
}
}
@@ -822,36 +833,48 @@ void disable_write_same(struct mapped_device *md)
limits->max_write_same_sectors = 0;
}
+void disable_write_zeroes(struct mapped_device *md)
+{
+ struct queue_limits *limits = dm_get_queue_limits(md);
+
+ /* device doesn't really support WRITE ZEROES, disable it */
+ limits->max_write_zeroes_sectors = 0;
+}
+
static void clone_endio(struct bio *bio)
{
- int error = bio->bi_error;
- int r = error;
+ blk_status_t error = bio->bi_status;
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
+ if (unlikely(error == BLK_STS_TARGET)) {
+ if (bio_op(bio) == REQ_OP_WRITE_SAME &&
+ !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
+ disable_write_same(md);
+ if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+ !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
+ disable_write_zeroes(md);
+ }
+
if (endio) {
- r = endio(tio->ti, bio, error);
- if (r < 0 || r == DM_ENDIO_REQUEUE)
- /*
- * error and requeue request are handled
- * in dec_pending().
- */
- error = r;
- else if (r == DM_ENDIO_INCOMPLETE)
+ int r = endio(tio->ti, bio, &error);
+ switch (r) {
+ case DM_ENDIO_REQUEUE:
+ error = BLK_STS_DM_REQUEUE;
+ /*FALLTHRU*/
+ case DM_ENDIO_DONE:
+ break;
+ case DM_ENDIO_INCOMPLETE:
/* The target will handle the io */
return;
- else if (r) {
+ default:
DMWARN("unimplemented target endio return value: %d", r);
BUG();
}
}
- if (unlikely(r == -EREMOTEIO && (bio_op(bio) == REQ_OP_WRITE_SAME) &&
- !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
- disable_write_same(md);
-
free_tio(tio);
dec_pending(io, error);
}
@@ -905,31 +928,91 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
}
EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
-static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
- void **kaddr, pfn_t *pfn, long size)
+static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
+ sector_t sector, int *srcu_idx)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
struct dm_table *map;
struct dm_target *ti;
- int srcu_idx;
- long len, ret = -EIO;
- map = dm_get_live_table(md, &srcu_idx);
+ map = dm_get_live_table(md, srcu_idx);
if (!map)
- goto out;
+ return NULL;
ti = dm_table_find_target(map, sector);
if (!dm_target_is_valid(ti))
- goto out;
+ return NULL;
+
+ return ti;
+}
+
+static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
+ long nr_pages, void **kaddr, pfn_t *pfn)
+{
+ struct mapped_device *md = dax_get_private(dax_dev);
+ sector_t sector = pgoff * PAGE_SECTORS;
+ struct dm_target *ti;
+ long len, ret = -EIO;
+ int srcu_idx;
- len = max_io_len(sector, ti) << SECTOR_SHIFT;
- size = min(len, size);
+ ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+ if (!ti)
+ goto out;
+ if (!ti->type->direct_access)
+ goto out;
+ len = max_io_len(sector, ti) / PAGE_SECTORS;
+ if (len < 1)
+ goto out;
+ nr_pages = min(len, nr_pages);
if (ti->type->direct_access)
- ret = ti->type->direct_access(ti, sector, kaddr, pfn, size);
-out:
+ ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
+
+ out:
+ dm_put_live_table(md, srcu_idx);
+
+ return ret;
+}
+
+static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ struct mapped_device *md = dax_get_private(dax_dev);
+ sector_t sector = pgoff * PAGE_SECTORS;
+ struct dm_target *ti;
+ long ret = 0;
+ int srcu_idx;
+
+ ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+ if (!ti)
+ goto out;
+ if (!ti->type->dax_copy_from_iter) {
+ ret = copy_from_iter(addr, bytes, i);
+ goto out;
+ }
+ ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
+ out:
+ dm_put_live_table(md, srcu_idx);
+
+ return ret;
+}
+
+static void dm_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+ size_t size)
+{
+ struct mapped_device *md = dax_get_private(dax_dev);
+ sector_t sector = pgoff * PAGE_SECTORS;
+ struct dm_target *ti;
+ int srcu_idx;
+
+ ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+ if (!ti)
+ goto out;
+ if (ti->type->dax_flush)
+ ti->type->dax_flush(ti, pgoff, addr, size);
+ out:
dm_put_live_table(md, srcu_idx);
- return min(ret, size);
}
/*
@@ -972,10 +1055,144 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
+/*
+ * The zone descriptors obtained with a zone report indicate
+ * zone positions within the target device. The zone descriptors
+ * must be remapped to match their position within the dm device.
+ * A target may call dm_remap_zone_report after completion of a
+ * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained
+ * from the target device mapping to the dm device.
+ */
+void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+ struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+ struct bio *report_bio = tio->io->bio;
+ struct blk_zone_report_hdr *hdr = NULL;
+ struct blk_zone *zone;
+ unsigned int nr_rep = 0;
+ unsigned int ofst;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+ void *addr;
+
+ if (bio->bi_status)
+ return;
+
+ /*
+ * Remap the start sector of the reported zones. For sequential zones,
+ * also remap the write pointer position.
+ */
+ bio_for_each_segment(bvec, report_bio, iter) {
+ addr = kmap_atomic(bvec.bv_page);
+
+ /* Remember the report header in the first page */
+ if (!hdr) {
+ hdr = addr;
+ ofst = sizeof(struct blk_zone_report_hdr);
+ } else
+ ofst = 0;
+
+ /* Set zones start sector */
+ while (hdr->nr_zones && ofst < bvec.bv_len) {
+ zone = addr + ofst;
+ if (zone->start >= start + ti->len) {
+ hdr->nr_zones = 0;
+ break;
+ }
+ zone->start = zone->start + ti->begin - start;
+ if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
+ if (zone->cond == BLK_ZONE_COND_FULL)
+ zone->wp = zone->start + zone->len;
+ else if (zone->cond == BLK_ZONE_COND_EMPTY)
+ zone->wp = zone->start;
+ else
+ zone->wp = zone->wp + ti->begin - start;
+ }
+ ofst += sizeof(struct blk_zone);
+ hdr->nr_zones--;
+ nr_rep++;
+ }
+
+ if (addr != hdr)
+ kunmap_atomic(addr);
+
+ if (!hdr->nr_zones)
+ break;
+ }
+
+ if (hdr) {
+ hdr->nr_zones = nr_rep;
+ kunmap_atomic(hdr);
+ }
+
+ bio_advance(report_bio, report_bio->bi_iter.bi_size);
+
+#else /* !CONFIG_BLK_DEV_ZONED */
+ bio->bi_status = BLK_STS_NOTSUPP;
+#endif
+}
+EXPORT_SYMBOL_GPL(dm_remap_zone_report);
+
+/*
+ * Flush current->bio_list when the target map method blocks.
+ * This fixes deadlocks in snapshot and possibly in other targets.
+ */
+struct dm_offload {
+ struct blk_plug plug;
+ struct blk_plug_cb cb;
+};
+
+static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
+{
+ struct dm_offload *o = container_of(cb, struct dm_offload, cb);
+ struct bio_list list;
+ struct bio *bio;
+ int i;
+
+ INIT_LIST_HEAD(&o->cb.list);
+
+ if (unlikely(!current->bio_list))
+ return;
+
+ for (i = 0; i < 2; i++) {
+ list = current->bio_list[i];
+ bio_list_init(&current->bio_list[i]);
+
+ while ((bio = bio_list_pop(&list))) {
+ struct bio_set *bs = bio->bi_pool;
+ if (unlikely(!bs) || bs == fs_bio_set ||
+ !bs->rescue_workqueue) {
+ bio_list_add(&current->bio_list[i], bio);
+ continue;
+ }
+
+ spin_lock(&bs->rescue_lock);
+ bio_list_add(&bs->rescue_list, bio);
+ queue_work(bs->rescue_workqueue, &bs->rescue_work);
+ spin_unlock(&bs->rescue_lock);
+ }
+ }
+}
+
+static void dm_offload_start(struct dm_offload *o)
+{
+ blk_start_plug(&o->plug);
+ o->cb.callback = flush_current_bio_list;
+ list_add(&o->cb.list, &current->plug->cb_list);
+}
+
+static void dm_offload_end(struct dm_offload *o)
+{
+ list_del(&o->cb.list);
+ blk_finish_plug(&o->plug);
+}
+
static void __map_bio(struct dm_target_io *tio)
{
int r;
sector_t sector;
+ struct dm_offload o;
struct bio *clone = &tio->clone;
struct dm_target *ti = tio->ti;
@@ -988,19 +1205,29 @@ static void __map_bio(struct dm_target_io *tio)
*/
atomic_inc(&tio->io->io_count);
sector = clone->bi_iter.bi_sector;
+
+ dm_offload_start(&o);
r = ti->type->map(ti, clone);
- if (r == DM_MAPIO_REMAPPED) {
- /* the bio has been remapped so dispatch it */
+ dm_offload_end(&o);
+ switch (r) {
+ case DM_MAPIO_SUBMITTED:
+ break;
+ case DM_MAPIO_REMAPPED:
+ /* the bio has been remapped so dispatch it */
trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
tio->io->bio->bi_bdev->bd_dev, sector);
-
generic_make_request(clone);
- } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
- /* error the io and bail out, or requeue it if needed */
- dec_pending(tio->io, r);
+ break;
+ case DM_MAPIO_KILL:
+ dec_pending(tio->io, BLK_STS_IOERR);
free_tio(tio);
- } else if (r != DM_MAPIO_SUBMITTED) {
+ break;
+ case DM_MAPIO_REQUEUE:
+ dec_pending(tio->io, BLK_STS_DM_REQUEUE);
+ free_tio(tio);
+ break;
+ default:
DMWARN("unimplemented target map return value: %d", r);
BUG();
}
@@ -1031,17 +1258,28 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
__bio_clone_fast(clone, bio);
- if (bio_integrity(bio)) {
- int r = bio_integrity_clone(clone, bio, GFP_NOIO);
+ if (unlikely(bio_integrity(bio) != NULL)) {
+ int r;
+
+ if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
+ !dm_target_passes_integrity(tio->ti->type))) {
+ DMWARN("%s: the target %s doesn't support integrity data.",
+ dm_device_name(tio->io->md),
+ tio->ti->type->name);
+ return -EIO;
+ }
+
+ r = bio_integrity_clone(clone, bio, GFP_NOIO);
if (r < 0)
return r;
}
- bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
+ if (bio_op(bio) != REQ_OP_ZONE_REPORT)
+ bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
clone->bi_iter.bi_size = to_bytes(len);
- if (bio_integrity(bio))
- bio_integrity_trim(clone, 0, len);
+ if (unlikely(bio_integrity(bio) != NULL))
+ bio_integrity_trim(clone);
return 0;
}
@@ -1141,6 +1379,11 @@ static unsigned get_num_write_same_bios(struct dm_target *ti)
return ti->num_write_same_bios;
}
+static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
+{
+ return ti->num_write_zeroes_bios;
+}
+
typedef bool (*is_split_required_fn)(struct dm_target *ti);
static bool is_split_required_for_discard(struct dm_target *ti)
@@ -1195,6 +1438,11 @@ static int __send_write_same(struct clone_info *ci)
return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
}
+static int __send_write_zeroes(struct clone_info *ci)
+{
+ return __send_changing_extent_only(ci, get_num_write_zeroes_bios, NULL);
+}
+
/*
* Select the correct strategy for processing a non-flush bio.
*/
@@ -1209,12 +1457,18 @@ static int __split_and_process_non_flush(struct clone_info *ci)
return __send_discard(ci);
else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
return __send_write_same(ci);
+ else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES))
+ return __send_write_zeroes(ci);
ti = dm_table_find_target(ci->map, ci->sector);
if (!dm_target_is_valid(ti))
return -EIO;
- len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
+ if (bio_op(bio) == REQ_OP_ZONE_REPORT)
+ len = ci->sector_count;
+ else
+ len = min_t(sector_t, max_io_len(ci->sector, ti),
+ ci->sector_count);
r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
if (r < 0)
@@ -1243,7 +1497,7 @@ static void __split_and_process_bio(struct mapped_device *md,
ci.map = map;
ci.md = md;
ci.io = alloc_io(md);
- ci.io->error = 0;
+ ci.io->status = 0;
atomic_set(&ci.io->io_count, 1);
ci.io->bio = bio;
ci.io->md = md;
@@ -1257,6 +1511,10 @@ static void __split_and_process_bio(struct mapped_device *md,
ci.sector_count = 0;
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
+ } else if (bio_op(bio) == REQ_OP_ZONE_RESET) {
+ ci.bio = bio;
+ ci.sector_count = 0;
+ error = __split_and_process_non_flush(&ci);
} else {
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
@@ -1314,7 +1572,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
* With request-based DM we only need to check the
* top-level queue for congestion.
*/
- r = md->queue->backing_dev_info.wb.state & bdi_bits;
+ r = md->queue->backing_dev_info->wb.state & bdi_bits;
} else {
map = dm_get_live_table_fast(md);
if (map)
@@ -1376,6 +1634,7 @@ static int next_free_minor(int *minor)
}
static const struct block_device_operations dm_blk_dops;
+static const struct dax_operations dm_dax_ops;
static void dm_wq_work(struct work_struct *work);
@@ -1397,7 +1656,7 @@ void dm_init_md_queue(struct mapped_device *md)
* - must do so here (in alloc_dev callchain) before queue is used
*/
md->queue->queuedata = md;
- md->queue->backing_dev_info.congested_data = md;
+ md->queue->backing_dev_info->congested_data = md;
}
void dm_init_normal_md_queue(struct mapped_device *md)
@@ -1408,8 +1667,7 @@ void dm_init_normal_md_queue(struct mapped_device *md)
/*
* Initialize aspects of queue that aren't relevant for blk-mq
*/
- md->queue->backing_dev_info.congested_fn = dm_any_congested;
- blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
+ md->queue->backing_dev_info->congested_fn = dm_any_congested;
}
static void cleanup_mapped_device(struct mapped_device *md)
@@ -1419,10 +1677,15 @@ static void cleanup_mapped_device(struct mapped_device *md)
if (md->kworker_task)
kthread_stop(md->kworker_task);
mempool_destroy(md->io_pool);
- mempool_destroy(md->rq_pool);
if (md->bs)
bioset_free(md->bs);
+ if (md->dax_dev) {
+ kill_dax(md->dax_dev);
+ put_dax(md->dax_dev);
+ md->dax_dev = NULL;
+ }
+
if (md->disk) {
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
@@ -1450,6 +1713,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
static struct mapped_device *alloc_dev(int minor)
{
int r, numa_node_id = dm_get_numa_node();
+ struct dax_device *dax_dev;
struct mapped_device *md;
void *old_md;
@@ -1514,6 +1778,12 @@ static struct mapped_device *alloc_dev(int minor)
md->disk->queue = md->queue;
md->disk->private_data = md;
sprintf(md->disk->disk_name, "dm-%d", minor);
+
+ dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+ if (!dax_dev)
+ goto bad;
+ md->dax_dev = dax_dev;
+
add_disk(md->disk);
format_dev_t(md->name, MKDEV(_major, minor));
@@ -1527,7 +1797,7 @@ static struct mapped_device *alloc_dev(int minor)
bio_init(&md->flush_bio, NULL, 0);
md->flush_bio.bi_bdev = md->bdev;
- md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+ md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
dm_stats_init(&md->stats);
@@ -1595,12 +1865,10 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
goto out;
}
- BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
+ BUG_ON(!p || md->io_pool || md->bs);
md->io_pool = p->io_pool;
p->io_pool = NULL;
- md->rq_pool = p->rq_pool;
- p->rq_pool = NULL;
md->bs = p->bs;
p->bs = NULL;
@@ -1625,7 +1893,9 @@ static void event_callback(void *context)
dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
atomic_inc(&md->event_nr);
+ atomic_inc(&dm_global_event_nr);
wake_up(&md->eventq);
+ wake_up(&dm_global_eventq);
}
/*
@@ -1633,6 +1903,8 @@ static void event_callback(void *context)
*/
static void __set_size(struct mapped_device *md, sector_t size)
{
+ lockdep_assert_held(&md->suspend_lock);
+
set_capacity(md->disk, size);
i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
@@ -1740,13 +2012,13 @@ void dm_unlock_md_type(struct mapped_device *md)
mutex_unlock(&md->type_lock);
}
-void dm_set_md_type(struct mapped_device *md, unsigned type)
+void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type)
{
BUG_ON(!mutex_is_locked(&md->type_lock));
md->type = type;
}
-unsigned dm_get_md_type(struct mapped_device *md)
+enum dm_queue_mode dm_get_md_type(struct mapped_device *md)
{
return md->type;
}
@@ -1773,11 +2045,11 @@ EXPORT_SYMBOL_GPL(dm_get_queue_limits);
int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
{
int r;
- unsigned type = dm_get_md_type(md);
+ enum dm_queue_mode type = dm_get_md_type(md);
switch (type) {
case DM_TYPE_REQUEST_BASED:
- r = dm_old_init_request_queue(md);
+ r = dm_old_init_request_queue(md, t);
if (r) {
DMERR("Cannot initialize queue for request-based mapped device");
return r;
@@ -1804,6 +2076,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
if (type == DM_TYPE_DAX_BIO_BASED)
queue_flag_set_unlocked(QUEUE_FLAG_DAX, md->queue);
break;
+ case DM_TYPE_NONE:
+ WARN_ON_ONCE(true);
+ break;
}
return 0;
@@ -2082,8 +2357,6 @@ static void unlock_fs(struct mapped_device *md)
* If __dm_suspend returns 0, the device is completely quiescent
* now. There is no request-processing activity. All new requests
* are being added to md->deferred list.
- *
- * Caller must hold md->suspend_lock
*/
static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
unsigned suspend_flags, long task_state,
@@ -2101,6 +2374,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
*/
if (noflush)
set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
+ else
+ pr_debug("%s: suspending with flush\n", dm_device_name(md));
/*
* This gets reverted if there's an error later and the targets
@@ -2299,6 +2574,8 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
{
struct dm_table *map = NULL;
+ lockdep_assert_held(&md->suspend_lock);
+
if (md->internal_suspend_count++)
return; /* nested internal suspend */
@@ -2489,11 +2766,10 @@ int dm_noflush_suspending(struct dm_target *ti)
}
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
+struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
unsigned integrity, unsigned per_io_data_size)
{
struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
- struct kmem_cache *cachep = NULL;
unsigned int pool_size = 0;
unsigned int front_pad;
@@ -2503,20 +2779,16 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
switch (type) {
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
- cachep = _io_cache;
pool_size = dm_get_reserved_bio_based_ios();
front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
+
+ pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
+ if (!pools->io_pool)
+ goto out;
break;
case DM_TYPE_REQUEST_BASED:
- cachep = _rq_tio_cache;
- pool_size = dm_get_reserved_rq_based_ios();
- pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
- if (!pools->rq_pool)
- goto out;
- /* fall through to setup remaining rq-based pools */
case DM_TYPE_MQ_REQUEST_BASED:
- if (!pool_size)
- pool_size = dm_get_reserved_rq_based_ios();
+ pool_size = dm_get_reserved_rq_based_ios();
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
/* per_io_data_size is used for blk-mq pdu at queue allocation */
break;
@@ -2524,13 +2796,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
BUG();
}
- if (cachep) {
- pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
- if (!pools->io_pool)
- goto out;
- }
-
- pools->bs = bioset_create_nobvec(pool_size, front_pad);
+ pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
if (!pools->bs)
goto out;
@@ -2551,7 +2817,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
return;
mempool_destroy(pools->io_pool);
- mempool_destroy(pools->rq_pool);
if (pools->bs)
bioset_free(pools->bs);
@@ -2729,12 +2994,17 @@ static const struct block_device_operations dm_blk_dops = {
.open = dm_blk_open,
.release = dm_blk_close,
.ioctl = dm_blk_ioctl,
- .direct_access = dm_blk_direct_access,
.getgeo = dm_blk_getgeo,
.pr_ops = &dm_pr_ops,
.owner = THIS_MODULE
};
+static const struct dax_operations dm_dax_ops = {
+ .direct_access = dm_dax_direct_access,
+ .copy_from_iter = dm_dax_copy_from_iter,
+ .flush = dm_dax_flush,
+};
+
/*
* module hooks
*/