aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/bio-integrity.c218
-rw-r--r--block/bio.c46
-rw-r--r--block/blk-mq.c3
-rw-r--r--block/blk-rq-qos.h2
-rw-r--r--drivers/nvme/host/ioctl.c197
-rw-r--r--include/linux/bio.h9
-rw-r--r--include/linux/io_uring.h9
-rw-r--r--io_uring/uring_cmd.c1
8 files changed, 283 insertions, 202 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index ec8ac8cf6e1b..feef615e2c9c 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -69,15 +69,15 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
memset(bip, 0, sizeof(*bip));
+ /* always report as many vecs as asked explicitly, not inline vecs */
+ bip->bip_max_vcnt = nr_vecs;
if (nr_vecs > inline_vecs) {
- bip->bip_max_vcnt = nr_vecs;
bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
&bip->bip_max_vcnt, gfp_mask);
if (!bip->bip_vec)
goto err;
} else {
bip->bip_vec = bip->bip_inline_vecs;
- bip->bip_max_vcnt = inline_vecs;
}
bip->bip_bio = bio;
@@ -91,6 +91,47 @@ err:
}
EXPORT_SYMBOL(bio_integrity_alloc);
+static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
+ bool dirty)
+{
+ int i;
+
+ for (i = 0; i < nr_vecs; i++) {
+ if (dirty && !PageCompound(bv[i].bv_page))
+ set_page_dirty_lock(bv[i].bv_page);
+ unpin_user_page(bv[i].bv_page);
+ }
+}
+
+static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
+{
+ unsigned short nr_vecs = bip->bip_max_vcnt - 1;
+ struct bio_vec *copy = &bip->bip_vec[1];
+ size_t bytes = bip->bip_iter.bi_size;
+ struct iov_iter iter;
+ int ret;
+
+ iov_iter_bvec(&iter, ITER_DEST, copy, nr_vecs, bytes);
+ ret = copy_to_iter(bvec_virt(bip->bip_vec), bytes, &iter);
+ WARN_ON_ONCE(ret != bytes);
+
+ bio_integrity_unpin_bvec(copy, nr_vecs, true);
+}
+
+static void bio_integrity_unmap_user(struct bio_integrity_payload *bip)
+{
+ bool dirty = bio_data_dir(bip->bip_bio) == READ;
+
+ if (bip->bip_flags & BIP_COPY_USER) {
+ if (dirty)
+ bio_integrity_uncopy_user(bip);
+ kfree(bvec_virt(bip->bip_vec));
+ return;
+ }
+
+ bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, dirty);
+}
+
/**
* bio_integrity_free - Free bio integrity payload
* @bio: bio containing bip to be freed
@@ -105,6 +146,8 @@ void bio_integrity_free(struct bio *bio)
if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
kfree(bvec_virt(bip->bip_vec));
+ else if (bip->bip_flags & BIP_INTEGRITY_USER)
+ bio_integrity_unmap_user(bip);
__bio_integrity_free(bs, bip);
bio->bi_integrity = NULL;
@@ -160,6 +203,177 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_integrity_add_page);
+static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
+ int nr_vecs, unsigned int len,
+ unsigned int direction, u32 seed)
+{
+ bool write = direction == ITER_SOURCE;
+ struct bio_integrity_payload *bip;
+ struct iov_iter iter;
+ void *buf;
+ int ret;
+
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (write) {
+ iov_iter_bvec(&iter, direction, bvec, nr_vecs, len);
+ if (!copy_from_iter_full(buf, len, &iter)) {
+ ret = -EFAULT;
+ goto free_buf;
+ }
+
+ bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
+ } else {
+ memset(buf, 0, len);
+
+ /*
+ * We need to preserve the original bvec and the number of vecs
+ * in it for completion handling
+ */
+ bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1);
+ }
+
+ if (IS_ERR(bip)) {
+ ret = PTR_ERR(bip);
+ goto free_buf;
+ }
+
+ if (write)
+ bio_integrity_unpin_bvec(bvec, nr_vecs, false);
+ else
+ memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
+
+ ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
+ offset_in_page(buf));
+ if (ret != len) {
+ ret = -ENOMEM;
+ goto free_bip;
+ }
+
+ bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
+ bip->bip_iter.bi_sector = seed;
+ return 0;
+free_bip:
+ bio_integrity_free(bio);
+free_buf:
+ kfree(buf);
+ return ret;
+}
+
+static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
+ int nr_vecs, unsigned int len, u32 seed)
+{
+ struct bio_integrity_payload *bip;
+
+ bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs);
+ if (IS_ERR(bip))
+ return PTR_ERR(bip);
+
+ memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
+ bip->bip_flags |= BIP_INTEGRITY_USER;
+ bip->bip_iter.bi_sector = seed;
+ bip->bip_iter.bi_size = len;
+ return 0;
+}
+
+static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
+ int nr_vecs, ssize_t bytes, ssize_t offset)
+{
+ unsigned int nr_bvecs = 0;
+ int i, j;
+
+ for (i = 0; i < nr_vecs; i = j) {
+ size_t size = min_t(size_t, bytes, PAGE_SIZE - offset);
+ struct folio *folio = page_folio(pages[i]);
+
+ bytes -= size;
+ for (j = i + 1; j < nr_vecs; j++) {
+ size_t next = min_t(size_t, PAGE_SIZE, bytes);
+
+ if (page_folio(pages[j]) != folio ||
+ pages[j] != pages[j - 1] + 1)
+ break;
+ unpin_user_page(pages[j]);
+ size += next;
+ bytes -= next;
+ }
+
+ bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset);
+ offset = 0;
+ nr_bvecs++;
+ }
+
+ return nr_bvecs;
+}
+
+int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
+ u32 seed)
+{
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
+ struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
+ struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
+ unsigned int direction, nr_bvecs;
+ struct iov_iter iter;
+ int ret, nr_vecs;
+ size_t offset;
+ bool copy;
+
+ if (bio_integrity(bio))
+ return -EINVAL;
+ if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q))
+ return -E2BIG;
+
+ if (bio_data_dir(bio) == READ)
+ direction = ITER_DEST;
+ else
+ direction = ITER_SOURCE;
+
+ iov_iter_ubuf(&iter, direction, ubuf, bytes);
+ nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
+ if (nr_vecs > BIO_MAX_VECS)
+ return -E2BIG;
+ if (nr_vecs > UIO_FASTIOV) {
+ bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL);
+ if (!bvec)
+ return -ENOMEM;
+ pages = NULL;
+ }
+
+ copy = !iov_iter_is_aligned(&iter, align, align);
+ ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
+ if (unlikely(ret < 0))
+ goto free_bvec;
+
+ nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset);
+ if (pages != stack_pages)
+ kvfree(pages);
+ if (nr_bvecs > queue_max_integrity_segments(q))
+ copy = true;
+
+ if (copy)
+ ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
+ direction, seed);
+ else
+ ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed);
+ if (ret)
+ goto release_pages;
+ if (bvec != stack_vec)
+ kfree(bvec);
+
+ return 0;
+
+release_pages:
+ bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
+free_bvec:
+ if (bvec != stack_vec)
+ kfree(bvec);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(bio_integrity_map_user);
+
/**
* bio_integrity_process - Process integrity metadata for a bio
* @bio: bio to generate/verify integrity metadata for
diff --git a/block/bio.c b/block/bio.c
index 816d412c06e9..5eba53ca953b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1145,13 +1145,22 @@ EXPORT_SYMBOL(bio_add_folio);
void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
- struct bvec_iter_all iter_all;
- struct bio_vec *bvec;
+ struct folio_iter fi;
+
+ bio_for_each_folio_all(fi, bio) {
+ struct page *page;
+ size_t done = 0;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (mark_dirty && !PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
- bio_release_page(bio, bvec->bv_page);
+ if (mark_dirty) {
+ folio_lock(fi.folio);
+ folio_mark_dirty(fi.folio);
+ folio_unlock(fi.folio);
+ }
+ page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
+ do {
+ bio_release_page(bio, page++);
+ done += PAGE_SIZE;
+ } while (done < fi.length);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1439,18 +1448,12 @@ EXPORT_SYMBOL(bio_free_pages);
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
* for performing direct-IO in BIOs.
*
- * The problem is that we cannot run set_page_dirty() from interrupt context
+ * The problem is that we cannot run folio_mark_dirty() from interrupt context
* because the required locks are not interrupt-safe. So what we can do is to
* mark the pages dirty _before_ performing IO. And in interrupt context,
* check that the pages are still dirty. If so, fine. If not, redirty them
* in process context.
*
- * We special-case compound pages here: normally this means reads into hugetlb
- * pages. The logic in here doesn't really work right for compound pages
- * because the VM does not uniformly chase down the head page in all cases.
- * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
- * handle them at all. So we skip compound pages here at an early stage.
- *
* Note that this code is very hard to test under normal circumstances because
* direct-io pins the pages with get_user_pages(). This makes
* is_page_cache_freeable return false, and the VM will not clean the pages.
@@ -1466,12 +1469,12 @@ EXPORT_SYMBOL(bio_free_pages);
*/
void bio_set_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
+ bio_for_each_folio_all(fi, bio) {
+ folio_lock(fi.folio);
+ folio_mark_dirty(fi.folio);
+ folio_unlock(fi.folio);
}
}
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
@@ -1515,12 +1518,11 @@ static void bio_dirty_fn(struct work_struct *work)
void bio_check_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec;
+ struct folio_iter fi;
unsigned long flags;
- struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
+ bio_for_each_folio_all(fi, bio) {
+ if (!folio_test_dirty(fi.folio))
goto defer;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 900c1be1fee1..fb29ff5cc281 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1248,7 +1248,8 @@ void blk_mq_start_request(struct request *rq)
trace_block_rq_issue(rq);
- if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
+ if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) &&
+ !blk_rq_is_passthrough(rq)) {
rq->io_start_time_ns = ktime_get_ns();
rq->stats_sectors = blk_rq_sectors(rq);
rq->rq_flags |= RQF_STATS;
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index f48ee150d667..37245c97ee61 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -118,7 +118,7 @@ static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio)
static inline void rq_qos_done(struct request_queue *q, struct request *rq)
{
- if (q->rq_qos)
+ if (q->rq_qos && !blk_rq_is_passthrough(rq))
__rq_qos_done(q->rq_qos, rq);
}
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 529b9954d2b8..32c9bcf491a3 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -96,58 +96,6 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
return (void __user *)ptrval;
}
-static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
- unsigned len, u32 seed)
-{
- struct bio_integrity_payload *bip;
- int ret = -ENOMEM;
- void *buf;
- struct bio *bio = req->bio;
-
- buf = kmalloc(len, GFP_KERNEL);
- if (!buf)
- goto out;
-
- if (req_op(req) == REQ_OP_DRV_OUT) {
- ret = -EFAULT;
- if (copy_from_user(buf, ubuf, len))
- goto out_free_meta;
- } else {
- memset(buf, 0, len);
- }
-
- bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
- if (IS_ERR(bip)) {
- ret = PTR_ERR(bip);
- goto out_free_meta;
- }
-
- bip->bip_iter.bi_sector = seed;
- ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
- offset_in_page(buf));
- if (ret != len) {
- ret = -ENOMEM;
- goto out_free_meta;
- }
-
- req->cmd_flags |= REQ_INTEGRITY;
- return buf;
-out_free_meta:
- kfree(buf);
-out:
- return ERR_PTR(ret);
-}
-
-static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
- void *meta, unsigned len, int ret)
-{
- if (!ret && req_op(req) == REQ_OP_DRV_IN &&
- copy_to_user(ubuf, meta, len))
- ret = -EFAULT;
- kfree(meta);
- return ret;
-}
-
static struct request *nvme_alloc_user_request(struct request_queue *q,
struct nvme_command *cmd, blk_opf_t rq_flags,
blk_mq_req_flags_t blk_flags)
@@ -164,14 +112,12 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd,
- unsigned int flags)
+ u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
struct block_device *bdev = ns ? ns->disk->part0 : NULL;
struct bio *bio = NULL;
- void *meta = NULL;
int ret;
if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
@@ -193,18 +139,17 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
if (ret)
goto out;
+
bio = req->bio;
- if (bdev)
+ if (bdev) {
bio_set_dev(bio, bdev);
-
- if (bdev && meta_buffer && meta_len) {
- meta = nvme_add_user_metadata(req, meta_buffer, meta_len,
- meta_seed);
- if (IS_ERR(meta)) {
- ret = PTR_ERR(meta);
- goto out_unmap;
+ if (meta_buffer && meta_len) {
+ ret = bio_integrity_map_user(bio, meta_buffer, meta_len,
+ meta_seed);
+ if (ret)
+ goto out_unmap;
+ req->cmd_flags |= REQ_INTEGRITY;
}
- *metap = meta;
}
return ret;
@@ -225,7 +170,6 @@ static int nvme_submit_user_cmd(struct request_queue *q,
struct nvme_ns *ns = q->queuedata;
struct nvme_ctrl *ctrl;
struct request *req;
- void *meta = NULL;
struct bio *bio;
u32 effects;
int ret;
@@ -237,7 +181,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
- meta_len, meta_seed, &meta, NULL, flags);
+ meta_len, meta_seed, NULL, flags);
if (ret)
return ret;
}
@@ -249,9 +193,6 @@ static int nvme_submit_user_cmd(struct request_queue *q,
ret = nvme_execute_rq(req, false);
if (result)
*result = le64_to_cpu(nvme_req(req)->result.u64);
- if (meta)
- ret = nvme_finish_user_metadata(req, meta_buffer, meta,
- meta_len, ret);
if (bio)
blk_rq_unmap_user(bio);
blk_mq_free_request(req);
@@ -446,19 +387,10 @@ struct nvme_uring_data {
* Expect build errors if this grows larger than that.
*/
struct nvme_uring_cmd_pdu {
- union {
- struct bio *bio;
- struct request *req;
- };
- u32 meta_len;
- u32 nvme_status;
- union {
- struct {
- void *meta; /* kernel-resident buffer */
- void __user *meta_buffer;
- };
- u64 result;
- } u;
+ struct request *req;
+ struct bio *bio;
+ u64 result;
+ int status;
};
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
@@ -467,31 +399,6 @@ static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
}
-static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd,
- unsigned issue_flags)
-{
- struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
- struct request *req = pdu->req;
- int status;
- u64 result;
-
- if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
- status = -EINTR;
- else
- status = nvme_req(req)->status;
-
- result = le64_to_cpu(nvme_req(req)->result.u64);
-
- if (pdu->meta_len)
- status = nvme_finish_user_metadata(req, pdu->u.meta_buffer,
- pdu->u.meta, pdu->meta_len, status);
- if (req->bio)
- blk_rq_unmap_user(req->bio);
- blk_mq_free_request(req);
-
- io_uring_cmd_done(ioucmd, status, result, issue_flags);
-}
-
static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
unsigned issue_flags)
{
@@ -499,8 +406,7 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
if (pdu->bio)
blk_rq_unmap_user(pdu->bio);
-
- io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result, issue_flags);
+ io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags);
}
static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
@@ -509,53 +415,24 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
- req->bio = pdu->bio;
- if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
- pdu->nvme_status = -EINTR;
- } else {
- pdu->nvme_status = nvme_req(req)->status;
- if (!pdu->nvme_status)
- pdu->nvme_status = blk_status_to_errno(err);
- }
- pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64);
+ if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+ pdu->status = -EINTR;
+ else
+ pdu->status = nvme_req(req)->status;
+ pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
/*
* For iopoll, complete it directly.
* Otherwise, move the completion to task work.
*/
- if (blk_rq_is_poll(req)) {
- WRITE_ONCE(ioucmd->cookie, NULL);
+ if (blk_rq_is_poll(req))
nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
- } else {
+ else
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
- }
return RQ_END_IO_FREE;
}
-static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
- blk_status_t err)
-{
- struct io_uring_cmd *ioucmd = req->end_io_data;
- struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-
- req->bio = pdu->bio;
- pdu->req = req;
-
- /*
- * For iopoll, complete it directly.
- * Otherwise, move the completion to task work.
- */
- if (blk_rq_is_poll(req)) {
- WRITE_ONCE(ioucmd->cookie, NULL);
- nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
- } else {
- io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
- }
-
- return RQ_END_IO_NONE;
-}
-
static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
{
@@ -567,7 +444,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct request *req;
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
- void *meta = NULL;
int ret;
c.common.opcode = READ_ONCE(cmd->opcode);
@@ -615,27 +491,16 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
if (d.addr && d.data_len) {
ret = nvme_map_user_request(req, d.addr,
d.data_len, nvme_to_user_ptr(d.metadata),
- d.metadata_len, 0, &meta, ioucmd, vec);
+ d.metadata_len, 0, ioucmd, vec);
if (ret)
return ret;
}
- if (blk_rq_is_poll(req)) {
- ioucmd->flags |= IORING_URING_CMD_POLLED;
- WRITE_ONCE(ioucmd->cookie, req);
- }
-
/* to free bio on completion, as req->bio will be null at that time */
pdu->bio = req->bio;
- pdu->meta_len = d.metadata_len;
+ pdu->req = req;
req->end_io_data = ioucmd;
- if (pdu->meta_len) {
- pdu->u.meta = meta;
- pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata);
- req->end_io = nvme_uring_cmd_end_io_meta;
- } else {
- req->end_io = nvme_uring_cmd_end_io;
- }
+ req->end_io = nvme_uring_cmd_end_io;
blk_execute_rq_nowait(req, false);
return -EIOCBQUEUED;
}
@@ -786,16 +651,12 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
struct io_comp_batch *iob,
unsigned int poll_flags)
{
- struct request *req;
- int ret = 0;
-
- if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
- return 0;
+ struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+ struct request *req = pdu->req;
- req = READ_ONCE(ioucmd->cookie);
if (req && blk_rq_is_poll(req))
- ret = blk_rq_poll(req, iob, poll_flags);
- return ret;
+ return blk_rq_poll(req, iob, poll_flags);
+ return 0;
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 41d417ee1349..ec4db73e5f4e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -324,6 +324,8 @@ enum bip_flags {
BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */
BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */
BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
+ BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */
+ BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */
};
/*
@@ -718,6 +720,7 @@ static inline bool bioset_initialized(struct bio_set *bs)
for_each_bio(_bio) \
bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
+int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
extern bool bio_integrity_prep(struct bio *);
@@ -789,6 +792,12 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
return 0;
}
+static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
+ ssize_t len, u32 seed)
+{
+ return -EINVAL;
+}
+
#endif /* CONFIG_BLK_DEV_INTEGRITY */
/*
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index aefb73eeeebf..9e6ce6d4ab51 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -28,17 +28,12 @@ enum io_uring_cmd_flags {
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
#define IORING_URING_CMD_CANCELABLE (1U << 30)
-#define IORING_URING_CMD_POLLED (1U << 31)
struct io_uring_cmd {
struct file *file;
const struct io_uring_sqe *sqe;
- union {
- /* callback to defer completions to task context */
- void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
- /* used for polled completion */
- void *cookie;
- };
+ /* callback to defer completions to task context */
+ void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
u32 cmd_op;
u32 flags;
u8 pdu[32]; /* available inline for free use */
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index acbc2924ecd2..b39ec25c36bc 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -182,7 +182,6 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
return -EOPNOTSUPP;
issue_flags |= IO_URING_F_IOPOLL;
req->iopoll_completed = 0;
- WRITE_ONCE(ioucmd->cookie, NULL);
}
ret = file->f_op->uring_cmd(ioucmd, issue_flags);