From 8e29da69feade64ec7fe9e1a2824b967c5183a21 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2022 10:44:00 -0600 Subject: io_uring: add support for IORING_ASYNC_CANCEL_ALL The current cancelation will lookup and cancel the first request it finds based on the key passed in. Add a flag that allows to cancel any request that matches they key. It completes with the number of requests found and canceled, or res < 0 if an error occured. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20220418164402.75259-4-axboe@kernel.dk --- include/uapi/linux/io_uring.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1845cf7c80ba..476e58a2837f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -187,6 +187,13 @@ enum { #define IORING_POLL_UPDATE_EVENTS (1U << 1) #define IORING_POLL_UPDATE_USER_DATA (1U << 2) +/* + * ASYNC_CANCEL flags. + * + * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key + */ +#define IORING_ASYNC_CANCEL_ALL (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- cgit From 4bf94615b8886305199ed5755cb72fea88258d15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2022 10:44:01 -0600 Subject: io_uring: allow IORING_OP_ASYNC_CANCEL with 'fd' key Currently sqe->addr must contain the user_data of the request being canceled. Introduce the IORING_ASYNC_CANCEL_FD flag, which tells the kernel that we're keying off the file fd instead for cancelation. This allows canceling any request that a) uses a file, and b) was assigned the file based on the value being passed in. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20220418164402.75259-5-axboe@kernel.dk --- fs/io_uring.c | 67 ++++++++++++++++++++++++++++++++++++++----- include/uapi/linux/io_uring.h | 3 ++ 2 files changed, 63 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/io_uring.c b/fs/io_uring.c index b43cdf1a4555..cf0d5437b77d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -587,6 +587,7 @@ struct io_cancel { struct file *file; u64 addr; u32 flags; + s32 fd; }; struct io_timeout { @@ -992,7 +993,10 @@ struct io_defer_entry { struct io_cancel_data { struct io_ring_ctx *ctx; - u64 data; + union { + u64 data; + struct file *file; + }; u32 flags; int seq; }; @@ -6332,6 +6336,29 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, return NULL; } +static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, + struct io_cancel_data *cd) + __must_hold(&ctx->completion_lock) +{ + struct io_kiocb *req; + int i; + + for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { + struct hlist_head *list; + + list = &ctx->cancel_hash[i]; + hlist_for_each_entry(req, list, hash_node) { + if (req->file != cd->file) + continue; + if (cd->seq == req->work.cancel_seq) + continue; + req->work.cancel_seq = cd->seq; + return req; + } + } + return NULL; +} + static bool io_poll_disarm(struct io_kiocb *req) __must_hold(&ctx->completion_lock) { @@ -6345,8 +6372,12 @@ static bool io_poll_disarm(struct io_kiocb *req) static int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) __must_hold(&ctx->completion_lock) { - struct io_kiocb *req = io_poll_find(ctx, false, cd); + struct io_kiocb *req; + if (cd->flags & IORING_ASYNC_CANCEL_FD) + req = io_poll_file_find(ctx, cd); + else + req = io_poll_find(ctx, false, cd); if (!req) return -ENOENT; io_poll_cancel_req(req); @@ -6796,8 +6827,13 @@ static bool io_cancel_cb(struct io_wq_work *work, void *data) if (req->ctx != cd->ctx) return false; - if (req->cqe.user_data != cd->data) - return false; + if (cd->flags & IORING_ASYNC_CANCEL_FD) { + if (req->file != cd->file) + return false; + } else { + if (req->cqe.user_data != cd->data) + return false; + } if (cd->flags & IORING_ASYNC_CANCEL_ALL) { if (cd->seq == req->work.cancel_seq) return false; @@ -6851,7 +6887,8 @@ static int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) ret = io_poll_cancel(ctx, cd); if (ret != -ENOENT) goto out; - ret = io_timeout_cancel(ctx, cd); + if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) + ret = io_timeout_cancel(ctx, cd); out: spin_unlock(&ctx->completion_lock); return ret; @@ -6862,15 +6899,17 @@ static int io_async_cancel_prep(struct io_kiocb *req, { if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) return -EINVAL; if (sqe->ioprio || sqe->off || sqe->len || sqe->splice_fd_in) return -EINVAL; req->cancel.addr = READ_ONCE(sqe->addr); req->cancel.flags = READ_ONCE(sqe->cancel_flags); - if (req->cancel.flags & ~IORING_ASYNC_CANCEL_ALL) + if (req->cancel.flags & ~(IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_FD)) return -EINVAL; + if (req->cancel.flags & IORING_ASYNC_CANCEL_FD) + req->cancel.fd = READ_ONCE(sqe->fd); return 0; } @@ -6919,7 +6958,21 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) }; int ret; + if (cd.flags & IORING_ASYNC_CANCEL_FD) { + if (req->flags & REQ_F_FIXED_FILE) + req->file = io_file_get_fixed(req, req->cancel.fd, + issue_flags); + else + req->file = io_file_get_normal(req, req->cancel.fd); + if (!req->file) { + ret = -EBADF; + goto done; + } + cd.file = req->file; + } + ret = __io_async_cancel(&cd, req, issue_flags); +done: if (ret < 0) req_set_fail(req); io_req_complete_post(req, ret, 0); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 476e58a2837f..cc7fe82a1798 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -191,8 +191,11 @@ enum { * ASYNC_CANCEL flags. * * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key + * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the + * request 'user_data' */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) +#define IORING_ASYNC_CANCEL_FD (1U << 1) /* * IO completion data structure (Completion Queue Entry) -- cgit From 970f256edb8c1259c8ed48d52b38215135396126 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2022 10:44:02 -0600 Subject: io_uring: add support for IORING_ASYNC_CANCEL_ANY Rather than match on a specific key, be it user_data or file, allow canceling any request that we can lookup. Works like IORING_ASYNC_CANCEL_ALL in that it cancels multiple requests, but it doesn't key off user_data or the file. Can't be set with IORING_ASYNC_CANCEL_FD, as that's a key selector. Only one may be used at the time. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20220418164402.75259-6-axboe@kernel.dk Signed-off-by: Jens Axboe --- fs/io_uring.c | 39 +++++++++++++++++++++++++-------------- include/uapi/linux/io_uring.h | 2 ++ 2 files changed, 27 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/io_uring.c b/fs/io_uring.c index cf0d5437b77d..03134ec070a3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6348,7 +6348,8 @@ static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, list = &ctx->cancel_hash[i]; hlist_for_each_entry(req, list, hash_node) { - if (req->file != cd->file) + if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && + req->file != cd->file) continue; if (cd->seq == req->work.cancel_seq) continue; @@ -6374,7 +6375,7 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) { struct io_kiocb *req; - if (cd->flags & IORING_ASYNC_CANCEL_FD) + if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) req = io_poll_file_find(ctx, cd); else req = io_poll_find(ctx, false, cd); @@ -6543,9 +6544,10 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, bool found = false; list_for_each_entry(req, &ctx->timeout_list, timeout.list) { - if (cd->data != req->cqe.user_data) + if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && + cd->data != req->cqe.user_data) continue; - if (cd->flags & IORING_ASYNC_CANCEL_ALL) { + if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { if (cd->seq == req->work.cancel_seq) continue; req->work.cancel_seq = cd->seq; @@ -6827,14 +6829,16 @@ static bool io_cancel_cb(struct io_wq_work *work, void *data) if (req->ctx != cd->ctx) return false; - if (cd->flags & IORING_ASYNC_CANCEL_FD) { + if (cd->flags & IORING_ASYNC_CANCEL_ANY) { + ; + } else if (cd->flags & IORING_ASYNC_CANCEL_FD) { if (req->file != cd->file) return false; } else { if (req->cqe.user_data != cd->data) return false; } - if (cd->flags & IORING_ASYNC_CANCEL_ALL) { + if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { if (cd->seq == req->work.cancel_seq) return false; req->work.cancel_seq = cd->seq; @@ -6847,12 +6851,13 @@ static int io_async_cancel_one(struct io_uring_task *tctx, { enum io_wq_cancel cancel_ret; int ret = 0; + bool all; if (!tctx || !tctx->io_wq) return -ENOENT; - cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, - cd->flags & IORING_ASYNC_CANCEL_ALL); + all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); + cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all); switch (cancel_ret) { case IO_WQ_CANCEL_OK: ret = 0; @@ -6894,6 +6899,9 @@ out: return ret; } +#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ + IORING_ASYNC_CANCEL_ANY) + static int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -6906,10 +6914,13 @@ static int io_async_cancel_prep(struct io_kiocb *req, req->cancel.addr = READ_ONCE(sqe->addr); req->cancel.flags = READ_ONCE(sqe->cancel_flags); - if (req->cancel.flags & ~(IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_FD)) + if (req->cancel.flags & ~CANCEL_FLAGS) return -EINVAL; - if (req->cancel.flags & IORING_ASYNC_CANCEL_FD) + if (req->cancel.flags & IORING_ASYNC_CANCEL_FD) { + if (req->cancel.flags & IORING_ASYNC_CANCEL_ANY) + return -EINVAL; req->cancel.fd = READ_ONCE(sqe->fd); + } return 0; } @@ -6917,7 +6928,7 @@ static int io_async_cancel_prep(struct io_kiocb *req, static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req, unsigned int issue_flags) { - bool cancel_all = cd->flags & IORING_ASYNC_CANCEL_ALL; + bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); struct io_ring_ctx *ctx = cd->ctx; struct io_tctx_node *node; int ret, nr = 0; @@ -6926,7 +6937,7 @@ static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req, ret = io_try_cancel(req, cd); if (ret == -ENOENT) break; - if (!cancel_all) + if (!all) return ret; nr++; } while (1); @@ -6939,13 +6950,13 @@ static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req, ret = io_async_cancel_one(tctx, cd); if (ret != -ENOENT) { - if (!cancel_all) + if (!all) break; nr++; } } io_ring_submit_unlock(ctx, issue_flags); - return cancel_all ? nr : ret; + return all ? nr : ret; } static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index cc7fe82a1798..980d82eb196e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -193,9 +193,11 @@ enum { * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the * request 'user_data' + * IORING_ASYNC_CANCEL_ANY Match any request */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) #define IORING_ASYNC_CANCEL_FD (1U << 1) +#define IORING_ASYNC_CANCEL_ANY (1U << 2) /* * IO completion data structure (Completion Queue Entry) -- cgit From e1169f06d5bbdbc2b22ae4e3083a4bf75ae5ecee Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 25 Apr 2022 19:49:03 -0600 Subject: io_uring: use TWA_SIGNAL_NO_IPI if IORING_SETUP_COOP_TASKRUN is used If this is set, io_uring will never use an IPI to deliver a task_work notification. This can be used in the common case where a single task or thread communicates with the ring, and doesn't rely on io_uring_cqe_peek(). This provides a noticeable win in performance, both from eliminating the IPI itself, but also from avoiding interrupting the submitting task unnecessarily. Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20220426014904.60384-6-axboe@kernel.dk Signed-off-by: Jens Axboe --- fs/io_uring.c | 17 +++++++++++++---- include/uapi/linux/io_uring.h | 8 ++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/io_uring.c b/fs/io_uring.c index 3c669d8f5e57..0b9ae3615911 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -11327,12 +11327,20 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, ctx->user = get_uid(current_user()); /* - * For SQPOLL, we just need a wakeup, always. + * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if + * COOP_TASKRUN is set, then IPIs are never needed by the app. */ - if (ctx->flags & IORING_SETUP_SQPOLL) + ret = -EINVAL; + if (ctx->flags & IORING_SETUP_SQPOLL) { + /* IPI related flags don't make sense with SQPOLL */ + if (ctx->flags & IORING_SETUP_COOP_TASKRUN) + goto err; ctx->notify_method = TWA_SIGNAL_NO_IPI; - else + } else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) { + ctx->notify_method = TWA_SIGNAL_NO_IPI; + } else { ctx->notify_method = TWA_SIGNAL; + } /* * This is just grabbed for accounting purposes. When a process exits, @@ -11431,7 +11439,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | - IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL)) + IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL | + IORING_SETUP_COOP_TASKRUN)) return -EINVAL; return io_uring_create(entries, &p, params); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 980d82eb196e..a84f29d657c3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -102,6 +102,14 @@ enum { #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ +/* + * Cooperative task running. When requests complete, they often require + * forcing the submitter to transition to the kernel to complete. If this + * flag is set, work will be done when the task transitions anyway, rather + * than force an inter-processor interrupt reschedule. This avoids interrupting + * a task running in userspace, and saves an IPI. + */ +#define IORING_SETUP_COOP_TASKRUN (1U << 8) enum { IORING_OP_NOP, -- cgit From ef060ea9e4fd3b763e7060a3af0a258d2d5d7c0d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 25 Apr 2022 19:49:04 -0600 Subject: io_uring: add IORING_SETUP_TASKRUN_FLAG If IORING_SETUP_COOP_TASKRUN is set to use cooperative scheduling for running task_work, then IORING_SETUP_TASKRUN_FLAG can be set so the application can tell if task_work is pending in the kernel for this ring. This allows use cases like io_uring_peek_cqe() to still function appropriately, or for the task to know when it would be useful to call io_uring_wait_cqe() to run pending events. Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20220426014904.60384-7-axboe@kernel.dk Signed-off-by: Jens Axboe --- fs/io_uring.c | 14 +++++++++++--- include/uapi/linux/io_uring.h | 7 +++++++ 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/io_uring.c b/fs/io_uring.c index 0b9ae3615911..72cb2d50125c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2506,6 +2506,8 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked) { if (!ctx) return; + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) + atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); if (*locked) { io_submit_flush_completions(ctx); mutex_unlock(&ctx->uring_lock); @@ -2646,6 +2648,9 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority) if (running) return; + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) + atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); + if (likely(!task_work_add(tsk, &tctx->task_work, ctx->notify_method))) return; @@ -11333,12 +11338,15 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, ret = -EINVAL; if (ctx->flags & IORING_SETUP_SQPOLL) { /* IPI related flags don't make sense with SQPOLL */ - if (ctx->flags & IORING_SETUP_COOP_TASKRUN) + if (ctx->flags & (IORING_SETUP_COOP_TASKRUN | + IORING_SETUP_TASKRUN_FLAG)) goto err; ctx->notify_method = TWA_SIGNAL_NO_IPI; } else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) { ctx->notify_method = TWA_SIGNAL_NO_IPI; } else { + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) + goto err; ctx->notify_method = TWA_SIGNAL; } @@ -11440,10 +11448,10 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL | - IORING_SETUP_COOP_TASKRUN)) + IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG)) return -EINVAL; - return io_uring_create(entries, &p, params); + return io_uring_create(entries, &p, params); } SYSCALL_DEFINE2(io_uring_setup, u32, entries, diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a84f29d657c3..fad63564678a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -110,6 +110,12 @@ enum { * a task running in userspace, and saves an IPI. */ #define IORING_SETUP_COOP_TASKRUN (1U << 8) +/* + * If COOP_TASKRUN is set, get notified if task work is available for + * running and a kernel transition would be needed to run it. This sets + * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + */ +#define IORING_SETUP_TASKRUN_FLAG (1U << 9) enum { IORING_OP_NOP, @@ -256,6 +262,7 @@ struct io_sqring_offsets { */ #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ #define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ +#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */ struct io_cqring_offsets { __u32 head; -- cgit From 0455d4ccec548b0fb51db39a4d3350a7a80a0222 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 26 Apr 2022 12:11:33 -0600 Subject: io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg If IORING_RECVSEND_POLL_FIRST is set for recv/recvmsg or send/sendmsg, then we arm poll first rather than attempt a receive or send upfront. This can be useful if we expect there to be no data (or space) available for the request, as we can then avoid wasting time on the initial issue attempt. Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io_uring.c | 27 +++++++++++++++++++++++++-- include/uapi/linux/io_uring.h | 10 ++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/io_uring.c b/fs/io_uring.c index efe4e92ad8ad..6db9ab8d4d15 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -637,6 +637,7 @@ struct io_sr_msg { int bgid; size_t len; size_t done_io; + unsigned int flags; }; struct io_open { @@ -5272,11 +5273,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = &req->sr_msg; - if (unlikely(sqe->addr2 || sqe->file_index)) + if (unlikely(sqe->file_index)) return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); + sr->flags = READ_ONCE(sqe->addr2); + if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) + return -EINVAL; sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; if (sr->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; @@ -5311,6 +5315,10 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) kmsg = &iomsg; } + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return io_setup_async_msg(req, kmsg); + flags = req->sr_msg.msg_flags; if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; @@ -5353,6 +5361,10 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) int min_ret = 0; int ret; + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return -EAGAIN; + sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; @@ -5505,11 +5517,14 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = &req->sr_msg; - if (unlikely(sqe->addr2 || sqe->file_index)) + if (unlikely(sqe->file_index)) return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); + sr->flags = READ_ONCE(sqe->addr2); + if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) + return -EINVAL; sr->bgid = READ_ONCE(sqe->buf_group); sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; if (sr->msg_flags & MSG_DONTWAIT) @@ -5546,6 +5561,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) kmsg = &iomsg; } + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return io_setup_async_msg(req, kmsg); + if (req->flags & REQ_F_BUFFER_SELECT) { kbuf = io_recv_buffer_select(req, issue_flags); if (IS_ERR(kbuf)) @@ -5603,6 +5622,10 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) int ret, min_ret = 0; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return -EAGAIN; + sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index fad63564678a..06621a278cb6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -213,6 +213,16 @@ enum { #define IORING_ASYNC_CANCEL_FD (1U << 1) #define IORING_ASYNC_CANCEL_ANY (1U << 2) +/* + * send/sendmsg and recv/recvmsg flags (sqe->addr2) + * + * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send + * or receive and arm poll if that yields an + * -EAGAIN result, arm poll upfront and skip + * the initial transfer attempt. + */ +#define IORING_RECVSEND_POLL_FIRST (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- cgit