diff options
Diffstat (limited to 'io_uring')
| -rw-r--r-- | io_uring/cancel.c | 9 | ||||
| -rw-r--r-- | io_uring/io_uring.c | 70 | ||||
| -rw-r--r-- | io_uring/io_uring.h | 14 | ||||
| -rw-r--r-- | io_uring/net.c | 3 | ||||
| -rw-r--r-- | io_uring/rw.c | 2 | ||||
| -rw-r--r-- | io_uring/timeout.c | 14 |
6 files changed, 66 insertions, 46 deletions
diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 2291a53cdabd..b4f5dfacc0c3 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -288,24 +288,23 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); + mutex_unlock(&ctx->uring_lock); if (ret != -EALREADY) break; - mutex_unlock(&ctx->uring_lock); ret = io_run_task_work_sig(ctx); - if (ret < 0) { - mutex_lock(&ctx->uring_lock); + if (ret < 0) break; - } ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); - mutex_lock(&ctx->uring_lock); if (!ret) { ret = -ETIME; break; } + mutex_lock(&ctx->uring_lock); } while (1); finish_wait(&ctx->cq_wait, &wait); + mutex_lock(&ctx->uring_lock); if (ret == -ENOENT || ret > 0) ret = 0; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index b521186efa5c..58ac13b69dc8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -538,7 +538,7 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx) } else { atomic_inc(&ev_fd->refs); if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) - call_rcu(&ev_fd->rcu, io_eventfd_ops); + call_rcu_hurry(&ev_fd->rcu, io_eventfd_ops); else atomic_dec(&ev_fd->refs); } @@ -572,12 +572,11 @@ static void io_eventfd_flush_signal(struct io_ring_ctx *ctx) void __io_commit_cqring_flush(struct io_ring_ctx *ctx) { - if (ctx->off_timeout_used || ctx->drain_active) { + if (ctx->off_timeout_used) + io_flush_timeouts(ctx); + if (ctx->drain_active) { spin_lock(&ctx->completion_lock); - if (ctx->off_timeout_used) - io_flush_timeouts(ctx); - if (ctx->drain_active) - io_queue_deferred(ctx); + io_queue_deferred(ctx); spin_unlock(&ctx->completion_lock); } if (ctx->has_evfd) @@ -597,6 +596,18 @@ static inline void __io_cq_unlock(struct io_ring_ctx *ctx) spin_unlock(&ctx->completion_lock); } +static inline void io_cq_lock(struct io_ring_ctx *ctx) + __acquires(ctx->completion_lock) +{ + spin_lock(&ctx->completion_lock); +} + +static inline void io_cq_unlock(struct io_ring_ctx *ctx) + __releases(ctx->completion_lock) +{ + spin_unlock(&ctx->completion_lock); +} + /* keep it inlined for io_submit_flush_completions() */ static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx) __releases(ctx->completion_lock) @@ -666,16 +677,20 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx) io_cq_unlock_post(ctx); } +static void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx) +{ + /* iopoll syncs against uring_lock, not completion_lock */ + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + __io_cqring_overflow_flush(ctx); + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); +} + static void io_cqring_overflow_flush(struct io_ring_ctx *ctx) { - if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { - /* iopoll syncs against uring_lock, not completion_lock */ - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_lock(&ctx->uring_lock); - __io_cqring_overflow_flush(ctx); - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_unlock(&ctx->uring_lock); - } + if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) + io_cqring_do_overflow_flush(ctx); } void __io_put_task(struct task_struct *task, int nr) @@ -916,7 +931,7 @@ static void __io_req_complete_post(struct io_kiocb *req) io_cq_lock(ctx); if (!(req->flags & REQ_F_CQE_SKIP)) - __io_fill_cqe_req(ctx, req); + io_fill_cqe_req(ctx, req); /* * If we're the last reference to this request, add to our locked @@ -1074,9 +1089,9 @@ static void __io_req_find_next_prep(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - io_cq_lock(ctx); + spin_lock(&ctx->completion_lock); io_disarm_next(req); - io_cq_unlock_post(ctx); + spin_unlock(&ctx->completion_lock); } static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) @@ -2470,7 +2485,14 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, } if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) return -ETIME; - return 1; + + /* + * Run task_work after scheduling. If we got woken because of + * task_work being processed, run it now rather than let the caller + * do another wait loop. + */ + ret = io_run_task_work_sig(ctx); + return ret < 0 ? ret : 1; } /* @@ -2531,10 +2553,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, trace_io_uring_cqring_wait(ctx, min_events); do { - io_cqring_overflow_flush(ctx); + if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { + finish_wait(&ctx->cq_wait, &iowq.wq); + io_cqring_do_overflow_flush(ctx); + } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); ret = io_cqring_wait_schedule(ctx, &iowq, timeout); + if (__io_cqring_events_user(ctx) >= min_events) + break; cond_resched(); } while (ret > 0); @@ -3993,8 +4020,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, return -EEXIST; if (ctx->restricted) { - if (opcode >= IORING_REGISTER_LAST) - return -EINVAL; opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; @@ -4150,6 +4175,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, long ret = -EBADF; struct fd f; + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; + f = fdget(fd); if (!f.file) return -EBADF; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 1b2f0b2cc888..e9f0d41ebb99 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -87,17 +87,6 @@ static inline void io_req_task_work_add(struct io_kiocb *req) #define io_for_each_link(pos, head) \ for (pos = (head); pos; pos = pos->link) -static inline void io_cq_lock(struct io_ring_ctx *ctx) - __acquires(ctx->completion_lock) -{ - spin_lock(&ctx->completion_lock); -} - -static inline void io_cq_unlock(struct io_ring_ctx *ctx) -{ - spin_unlock(&ctx->completion_lock); -} - void io_cq_unlock_post(struct io_ring_ctx *ctx); static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx, @@ -277,8 +266,7 @@ static inline int io_run_task_work(void) static inline bool io_task_work_pending(struct io_ring_ctx *ctx) { - return test_thread_flag(TIF_NOTIFY_SIGNAL) || - !wq_list_empty(&ctx->work_llist); + return task_work_pending(current) || !wq_list_empty(&ctx->work_llist); } static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx) diff --git a/io_uring/net.c b/io_uring/net.c index 5229976cb582..fbc34a7c2743 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -494,6 +494,7 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, if (req->flags & REQ_F_BUFFER_SELECT) { compat_ssize_t clen; + iomsg->free_iov = NULL; if (msg.msg_iovlen == 0) { sr->len = 0; } else if (msg.msg_iovlen > 1) { @@ -819,10 +820,10 @@ retry_multishot: goto retry_multishot; if (mshot_finished) { - io_netmsg_recycle(req, issue_flags); /* fast path, check for non-NULL to avoid function call */ if (kmsg->free_iov) kfree(kmsg->free_iov); + io_netmsg_recycle(req, issue_flags); req->flags &= ~REQ_F_NEED_CLEANUP; } diff --git a/io_uring/rw.c b/io_uring/rw.c index b9cac5706e8d..8227af2e1c0f 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -1062,7 +1062,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) continue; req->cqe.flags = io_put_kbuf(req, 0); - __io_fill_cqe_req(req->ctx, req); + io_fill_cqe_req(req->ctx, req); } if (unlikely(!nr_events)) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 5b4bc93fd6e0..826a51bca3e4 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -50,7 +50,6 @@ static inline void io_put_req(struct io_kiocb *req) } static bool io_kill_timeout(struct io_kiocb *req, int status) - __must_hold(&req->ctx->completion_lock) __must_hold(&req->ctx->timeout_lock) { struct io_timeout_data *io = req->async_data; @@ -70,12 +69,13 @@ static bool io_kill_timeout(struct io_kiocb *req, int status) } __cold void io_flush_timeouts(struct io_ring_ctx *ctx) - __must_hold(&ctx->completion_lock) { - u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + u32 seq; struct io_timeout *timeout, *tmp; spin_lock_irq(&ctx->timeout_lock); + seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { struct io_kiocb *req = cmd_to_io_kiocb(timeout); u32 events_needed, events_got; @@ -622,7 +622,11 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, struct io_timeout *timeout, *tmp; int canceled = 0; - io_cq_lock(ctx); + /* + * completion_lock is needed for io_match_task(). Take it before + * timeout_lockfirst to keep locking ordering. + */ + spin_lock(&ctx->completion_lock); spin_lock_irq(&ctx->timeout_lock); list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { struct io_kiocb *req = cmd_to_io_kiocb(timeout); @@ -632,6 +636,6 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, canceled++; } spin_unlock_irq(&ctx->timeout_lock); - io_cq_unlock_post(ctx); + spin_unlock(&ctx->completion_lock); return canceled != 0; } |