diff options
Diffstat (limited to 'io_uring/msg_ring.c')
| -rw-r--r-- | io_uring/msg_ring.c | 153 | 
1 files changed, 103 insertions, 50 deletions
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 2d3cd945a531..8803c0979e2a 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -13,6 +13,11 @@  #include "filetable.h"  #include "msg_ring.h" + +/* All valid masks for MSG_RING */ +#define IORING_MSG_RING_MASK		(IORING_MSG_RING_CQE_SKIP | \ +					IORING_MSG_RING_FLAGS_PASS) +  struct io_msg {  	struct file			*file;  	struct file			*src_file; @@ -21,10 +26,35 @@ struct io_msg {  	u32 len;  	u32 cmd;  	u32 src_fd; -	u32 dst_fd; +	union { +		u32 dst_fd; +		u32 cqe_flags; +	};  	u32 flags;  }; +static void io_double_unlock_ctx(struct io_ring_ctx *octx) +{ +	mutex_unlock(&octx->uring_lock); +} + +static int io_double_lock_ctx(struct io_ring_ctx *octx, +			      unsigned int issue_flags) +{ +	/* +	 * To ensure proper ordering between the two ctxs, we can only +	 * attempt a trylock on the target. If that fails and we already have +	 * the source ctx lock, punt to io-wq. +	 */ +	if (!(issue_flags & IO_URING_F_UNLOCKED)) { +		if (!mutex_trylock(&octx->uring_lock)) +			return -EAGAIN; +		return 0; +	} +	mutex_lock(&octx->uring_lock); +	return 0; +} +  void io_msg_ring_cleanup(struct io_kiocb *req)  {  	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); @@ -36,6 +66,29 @@ void io_msg_ring_cleanup(struct io_kiocb *req)  	msg->src_file = NULL;  } +static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) +{ +	if (!target_ctx->task_complete) +		return false; +	return current != target_ctx->submitter_task; +} + +static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func) +{ +	struct io_ring_ctx *ctx = req->file->private_data; +	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); +	struct task_struct *task = READ_ONCE(ctx->submitter_task); + +	if (unlikely(!task)) +		return -EOWNERDEAD; + +	init_task_work(&msg->tw, func); +	if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL)) +		return -EOWNERDEAD; + +	return IOU_ISSUE_SKIP_COMPLETE; +} +  static void io_msg_tw_complete(struct callback_head *head)  {  	struct io_msg *msg = container_of(head, struct io_msg, tw); @@ -43,61 +96,65 @@ static void io_msg_tw_complete(struct callback_head *head)  	struct io_ring_ctx *target_ctx = req->file->private_data;  	int ret = 0; -	if (current->flags & PF_EXITING) +	if (current->flags & PF_EXITING) {  		ret = -EOWNERDEAD; -	else if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) -		ret = -EOVERFLOW; +	} else { +		u32 flags = 0; + +		if (msg->flags & IORING_MSG_RING_FLAGS_PASS) +			flags = msg->cqe_flags; + +		/* +		 * If the target ring is using IOPOLL mode, then we need to be +		 * holding the uring_lock for posting completions. Other ring +		 * types rely on the regular completion locking, which is +		 * handled while posting. +		 */ +		if (target_ctx->flags & IORING_SETUP_IOPOLL) +			mutex_lock(&target_ctx->uring_lock); +		if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) +			ret = -EOVERFLOW; +		if (target_ctx->flags & IORING_SETUP_IOPOLL) +			mutex_unlock(&target_ctx->uring_lock); +	}  	if (ret < 0)  		req_set_fail(req);  	io_req_queue_tw_complete(req, ret);  } -static int io_msg_ring_data(struct io_kiocb *req) +static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)  {  	struct io_ring_ctx *target_ctx = req->file->private_data;  	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); +	u32 flags = 0; +	int ret; -	if (msg->src_fd || msg->dst_fd || msg->flags) +	if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS)  		return -EINVAL; +	if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd) +		return -EINVAL; +	if (target_ctx->flags & IORING_SETUP_R_DISABLED) +		return -EBADFD; -	if (target_ctx->task_complete && current != target_ctx->submitter_task) { -		init_task_work(&msg->tw, io_msg_tw_complete); -		if (task_work_add(target_ctx->submitter_task, &msg->tw, -				  TWA_SIGNAL_NO_IPI)) -			return -EOWNERDEAD; - -		atomic_or(IORING_SQ_TASKRUN, &target_ctx->rings->sq_flags); -		return IOU_ISSUE_SKIP_COMPLETE; -	} - -	if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) -		return 0; - -	return -EOVERFLOW; -} +	if (io_msg_need_remote(target_ctx)) +		return io_msg_exec_remote(req, io_msg_tw_complete); -static void io_double_unlock_ctx(struct io_ring_ctx *octx, -				 unsigned int issue_flags) -{ -	mutex_unlock(&octx->uring_lock); -} +	if (msg->flags & IORING_MSG_RING_FLAGS_PASS) +		flags = msg->cqe_flags; -static int io_double_lock_ctx(struct io_ring_ctx *octx, -			      unsigned int issue_flags) -{ -	/* -	 * To ensure proper ordering between the two ctxs, we can only -	 * attempt a trylock on the target. If that fails and we already have -	 * the source ctx lock, punt to io-wq. -	 */ -	if (!(issue_flags & IO_URING_F_UNLOCKED)) { -		if (!mutex_trylock(&octx->uring_lock)) +	ret = -EOVERFLOW; +	if (target_ctx->flags & IORING_SETUP_IOPOLL) { +		if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))  			return -EAGAIN; -		return 0; +		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) +			ret = 0; +		io_double_unlock_ctx(target_ctx); +	} else { +		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) +			ret = 0;  	} -	mutex_lock(&octx->uring_lock); -	return 0; +	return ret;  }  static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) @@ -148,7 +205,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag  	if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))  		ret = -EOVERFLOW;  out_unlock: -	io_double_unlock_ctx(target_ctx, issue_flags); +	io_double_unlock_ctx(target_ctx);  	return ret;  } @@ -174,6 +231,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)  	if (target_ctx == ctx)  		return -EINVAL; +	if (target_ctx->flags & IORING_SETUP_R_DISABLED) +		return -EBADFD;  	if (!src_file) {  		src_file = io_msg_grab_file(req, issue_flags);  		if (!src_file) @@ -182,14 +241,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)  		req->flags |= REQ_F_NEED_CLEANUP;  	} -	if (target_ctx->task_complete && current != target_ctx->submitter_task) { -		init_task_work(&msg->tw, io_msg_tw_fd_complete); -		if (task_work_add(target_ctx->submitter_task, &msg->tw, -				  TWA_SIGNAL)) -			return -EOWNERDEAD; - -		return IOU_ISSUE_SKIP_COMPLETE; -	} +	if (io_msg_need_remote(target_ctx)) +		return io_msg_exec_remote(req, io_msg_tw_fd_complete);  	return io_msg_install_complete(req, issue_flags);  } @@ -207,7 +260,7 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	msg->src_fd = READ_ONCE(sqe->addr3);  	msg->dst_fd = READ_ONCE(sqe->file_index);  	msg->flags = READ_ONCE(sqe->msg_ring_flags); -	if (msg->flags & ~IORING_MSG_RING_CQE_SKIP) +	if (msg->flags & ~IORING_MSG_RING_MASK)  		return -EINVAL;  	return 0; @@ -224,7 +277,7 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)  	switch (msg->cmd) {  	case IORING_MSG_DATA: -		ret = io_msg_ring_data(req); +		ret = io_msg_ring_data(req, issue_flags);  		break;  	case IORING_MSG_SEND_FD:  		ret = io_msg_send_fd(req, issue_flags);  |