diff options
Diffstat (limited to 'drivers/nvme/host/ioctl.c')
| -rw-r--r-- | drivers/nvme/host/ioctl.c | 207 | 
1 files changed, 148 insertions, 59 deletions
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 81f5550b670d..723e7d5b778f 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -8,6 +8,82 @@  #include <linux/io_uring.h>  #include "nvme.h" +enum { +	NVME_IOCTL_VEC		= (1 << 0), +	NVME_IOCTL_PARTITION	= (1 << 1), +}; + +static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, +		unsigned int flags, fmode_t mode) +{ +	u32 effects; + +	if (capable(CAP_SYS_ADMIN)) +		return true; + +	/* +	 * Do not allow unprivileged passthrough on partitions, as that allows an +	 * escape from the containment of the partition. +	 */ +	if (flags & NVME_IOCTL_PARTITION) +		return false; + +	/* +	 * Do not allow unprivileged processes to send vendor specific or fabrics +	 * commands as we can't be sure about their effects. +	 */ +	if (c->common.opcode >= nvme_cmd_vendor_start || +	    c->common.opcode == nvme_fabrics_command) +		return false; + +	/* +	 * Do not allow unprivileged passthrough of admin commands except +	 * for a subset of identify commands that contain information required +	 * to form proper I/O commands in userspace and do not expose any +	 * potentially sensitive information. +	 */ +	if (!ns) { +		if (c->common.opcode == nvme_admin_identify) { +			switch (c->identify.cns) { +			case NVME_ID_CNS_NS: +			case NVME_ID_CNS_CS_NS: +			case NVME_ID_CNS_NS_CS_INDEP: +			case NVME_ID_CNS_CS_CTRL: +			case NVME_ID_CNS_CTRL: +				return true; +			} +		} +		return false; +	} + +	/* +	 * Check if the controller provides a Commands Supported and Effects log +	 * and marks this command as supported.  If not reject unprivileged +	 * passthrough. +	 */ +	effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); +	if (!(effects & NVME_CMD_EFFECTS_CSUPP)) +		return false; + +	/* +	 * Don't allow passthrough for command that have intrusive (or unknown) +	 * effects. +	 */ +	if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | +			NVME_CMD_EFFECTS_UUID_SEL | +			NVME_CMD_EFFECTS_SCOPE_MASK)) +		return false; + +	/* +	 * Only allow I/O commands that transfer data to the controller or that +	 * change the logical block contents if the file descriptor is open for +	 * writing. +	 */ +	if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) +		return mode & FMODE_WRITE; +	return true; +} +  /*   * Convert integer values from ioctl structures to user pointers, silently   * ignoring the upper bits in the compat case to match behaviour of 32-bit @@ -86,7 +162,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,  static int nvme_map_user_request(struct request *req, u64 ubuffer,  		unsigned bufflen, void __user *meta_buffer, unsigned meta_len,  		u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, -		bool vec) +		unsigned int flags)  {  	struct request_queue *q = req->q;  	struct nvme_ns *ns = q->queuedata; @@ -99,7 +175,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,  		struct iov_iter iter;  		/* fixedbufs is only for non-vectored io */ -		if (WARN_ON_ONCE(vec)) +		if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC))  			return -EINVAL;  		ret = io_uring_cmd_import_fixed(ubuffer, bufflen,  				rq_data_dir(req), &iter, ioucmd); @@ -108,8 +184,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,  		ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);  	} else {  		ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), -				bufflen, GFP_KERNEL, vec, 0, 0, -				rq_data_dir(req)); +				bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, +				0, rq_data_dir(req));  	}  	if (ret) @@ -139,10 +215,11 @@ out:  }  static int nvme_submit_user_cmd(struct request_queue *q, -		struct nvme_command *cmd, u64 ubuffer, -		unsigned bufflen, void __user *meta_buffer, unsigned meta_len, -		u32 meta_seed, u64 *result, unsigned timeout, bool vec) +		struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, +		void __user *meta_buffer, unsigned meta_len, u32 meta_seed, +		u64 *result, unsigned timeout, unsigned int flags)  { +	struct nvme_ns *ns = q->queuedata;  	struct nvme_ctrl *ctrl;  	struct request *req;  	void *meta = NULL; @@ -157,7 +234,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,  	req->timeout = timeout;  	if (ubuffer && bufflen) {  		ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, -				meta_len, meta_seed, &meta, NULL, vec); +				meta_len, meta_seed, &meta, NULL, flags);  		if (ret)  			return ret;  	} @@ -165,8 +242,8 @@ static int nvme_submit_user_cmd(struct request_queue *q,  	bio = req->bio;  	ctrl = nvme_req(req)->ctrl; -	ret = nvme_execute_passthru_rq(req, &effects); - +	effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); +	ret = nvme_execute_rq(req, false);  	if (result)  		*result = le64_to_cpu(nvme_req(req)->result.u64);  	if (meta) @@ -240,10 +317,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)  	c.rw.apptag = cpu_to_le16(io.apptag);  	c.rw.appmask = cpu_to_le16(io.appmask); -	return nvme_submit_user_cmd(ns->queue, &c, -			io.addr, length, -			metadata, meta_len, lower_32_bits(io.slba), NULL, 0, -			false); +	return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, +			meta_len, lower_32_bits(io.slba), NULL, 0, 0);  }  static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, @@ -261,7 +336,8 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,  }  static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, -			struct nvme_passthru_cmd __user *ucmd) +		struct nvme_passthru_cmd __user *ucmd, unsigned int flags, +		fmode_t mode)  {  	struct nvme_passthru_cmd cmd;  	struct nvme_command c; @@ -269,8 +345,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,  	u64 result;  	int status; -	if (!capable(CAP_SYS_ADMIN)) -		return -EACCES;  	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))  		return -EFAULT;  	if (cmd.flags) @@ -291,13 +365,15 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,  	c.common.cdw14 = cpu_to_le32(cmd.cdw14);  	c.common.cdw15 = cpu_to_le32(cmd.cdw15); +	if (!nvme_cmd_allowed(ns, &c, 0, mode)) +		return -EACCES; +  	if (cmd.timeout_ms)  		timeout = msecs_to_jiffies(cmd.timeout_ms);  	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, -			cmd.addr, cmd.data_len, -			nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, -			0, &result, timeout, false); +			cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), +			cmd.metadata_len, 0, &result, timeout, 0);  	if (status >= 0) {  		if (put_user(result, &ucmd->result)) @@ -308,15 +384,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,  }  static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, -			struct nvme_passthru_cmd64 __user *ucmd, bool vec) +		struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, +		fmode_t mode)  {  	struct nvme_passthru_cmd64 cmd;  	struct nvme_command c;  	unsigned timeout = 0;  	int status; -	if (!capable(CAP_SYS_ADMIN)) -		return -EACCES;  	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))  		return -EFAULT;  	if (cmd.flags) @@ -337,13 +412,15 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,  	c.common.cdw14 = cpu_to_le32(cmd.cdw14);  	c.common.cdw15 = cpu_to_le32(cmd.cdw15); +	if (!nvme_cmd_allowed(ns, &c, flags, mode)) +		return -EACCES; +  	if (cmd.timeout_ms)  		timeout = msecs_to_jiffies(cmd.timeout_ms);  	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, -			cmd.addr, cmd.data_len, -			nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, -			0, &cmd.result, timeout, vec); +			cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), +			cmd.metadata_len, 0, &cmd.result, timeout, flags);  	if (status >= 0) {  		if (put_user(cmd.result, &ucmd->result)) @@ -478,14 +555,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,  	struct nvme_uring_data d;  	struct nvme_command c;  	struct request *req; -	blk_opf_t rq_flags = 0; +	blk_opf_t rq_flags = REQ_ALLOC_CACHE;  	blk_mq_req_flags_t blk_flags = 0;  	void *meta = NULL;  	int ret; -	if (!capable(CAP_SYS_ADMIN)) -		return -EACCES; -  	c.common.opcode = READ_ONCE(cmd->opcode);  	c.common.flags = READ_ONCE(cmd->flags);  	if (c.common.flags) @@ -507,6 +581,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,  	c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));  	c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); +	if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode)) +		return -EACCES; +  	d.metadata = READ_ONCE(cmd->metadata);  	d.addr = READ_ONCE(cmd->addr);  	d.data_len = READ_ONCE(cmd->data_len); @@ -514,7 +591,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,  	d.timeout_ms = READ_ONCE(cmd->timeout_ms);  	if (issue_flags & IO_URING_F_NONBLOCK) { -		rq_flags = REQ_NOWAIT; +		rq_flags |= REQ_NOWAIT;  		blk_flags = BLK_MQ_REQ_NOWAIT;  	}  	if (issue_flags & IO_URING_F_IOPOLL) @@ -570,13 +647,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)  }  static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, -		void __user *argp) +		void __user *argp, fmode_t mode)  {  	switch (cmd) {  	case NVME_IOCTL_ADMIN_CMD: -		return nvme_user_cmd(ctrl, NULL, argp); +		return nvme_user_cmd(ctrl, NULL, argp, 0, mode);  	case NVME_IOCTL_ADMIN64_CMD: -		return nvme_user_cmd64(ctrl, NULL, argp, false); +		return nvme_user_cmd64(ctrl, NULL, argp, 0, mode);  	default:  		return sed_ioctl(ctrl->opal_dev, cmd, argp);  	} @@ -601,14 +678,14 @@ struct nvme_user_io32 {  #endif /* COMPAT_FOR_U64_ALIGNMENT */  static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, -		void __user *argp) +		void __user *argp, unsigned int flags, fmode_t mode)  {  	switch (cmd) {  	case NVME_IOCTL_ID:  		force_successful_syscall_return();  		return ns->head->ns_id;  	case NVME_IOCTL_IO_CMD: -		return nvme_user_cmd(ns->ctrl, ns, argp); +		return nvme_user_cmd(ns->ctrl, ns, argp, flags, mode);  	/*  	 * struct nvme_user_io can have different padding on some 32-bit ABIs.  	 * Just accept the compat version as all fields that are used are the @@ -619,36 +696,40 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,  #endif  	case NVME_IOCTL_SUBMIT_IO:  		return nvme_submit_io(ns, argp); -	case NVME_IOCTL_IO64_CMD: -		return nvme_user_cmd64(ns->ctrl, ns, argp, false);  	case NVME_IOCTL_IO64_CMD_VEC: -		return nvme_user_cmd64(ns->ctrl, ns, argp, true); +		flags |= NVME_IOCTL_VEC; +		fallthrough; +	case NVME_IOCTL_IO64_CMD: +		return nvme_user_cmd64(ns->ctrl, ns, argp, flags, mode);  	default:  		return -ENOTTY;  	}  } -static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg) -{ -       if (is_ctrl_ioctl(cmd)) -               return nvme_ctrl_ioctl(ns->ctrl, cmd, arg); -       return nvme_ns_ioctl(ns, cmd, arg); -} -  int nvme_ioctl(struct block_device *bdev, fmode_t mode,  		unsigned int cmd, unsigned long arg)  {  	struct nvme_ns *ns = bdev->bd_disk->private_data; +	void __user *argp = (void __user *)arg; +	unsigned int flags = 0; -	return __nvme_ioctl(ns, cmd, (void __user *)arg); +	if (bdev_is_partition(bdev)) +		flags |= NVME_IOCTL_PARTITION; + +	if (is_ctrl_ioctl(cmd)) +		return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); +	return nvme_ns_ioctl(ns, cmd, argp, flags, mode);  }  long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)  {  	struct nvme_ns *ns =  		container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); +	void __user *argp = (void __user *)arg; -	return __nvme_ioctl(ns, cmd, (void __user *)arg); +	if (is_ctrl_ioctl(cmd)) +		return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, file->f_mode); +	return nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode);  }  static int nvme_uring_cmd_checks(unsigned int issue_flags) @@ -716,7 +797,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,  }  #ifdef CONFIG_NVME_MULTIPATH  static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, -		void __user *argp, struct nvme_ns_head *head, int srcu_idx) +		void __user *argp, struct nvme_ns_head *head, int srcu_idx, +		fmode_t mode)  	__releases(&head->srcu)  {  	struct nvme_ctrl *ctrl = ns->ctrl; @@ -724,7 +806,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,  	nvme_get_ctrl(ns->ctrl);  	srcu_read_unlock(&head->srcu, srcu_idx); -	ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); +	ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);  	nvme_put_ctrl(ctrl);  	return ret; @@ -737,6 +819,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,  	void __user *argp = (void __user *)arg;  	struct nvme_ns *ns;  	int srcu_idx, ret = -EWOULDBLOCK; +	unsigned int flags = 0; + +	if (bdev_is_partition(bdev)) +		flags |= NVME_IOCTL_PARTITION;  	srcu_idx = srcu_read_lock(&head->srcu);  	ns = nvme_find_path(head); @@ -749,9 +835,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,  	 * deadlock when deleting namespaces using the passthrough interface.  	 */  	if (is_ctrl_ioctl(cmd)) -		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); +		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, +					mode); -	ret = nvme_ns_ioctl(ns, cmd, argp); +	ret = nvme_ns_ioctl(ns, cmd, argp, flags, mode);  out_unlock:  	srcu_read_unlock(&head->srcu, srcu_idx);  	return ret; @@ -773,9 +860,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,  		goto out_unlock;  	if (is_ctrl_ioctl(cmd)) -		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); +		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, +				file->f_mode); -	ret = nvme_ns_ioctl(ns, cmd, argp); +	ret = nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode);  out_unlock:  	srcu_read_unlock(&head->srcu, srcu_idx);  	return ret; @@ -849,7 +937,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)  	return ret;  } -static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) +static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, +		fmode_t mode)  {  	struct nvme_ns *ns;  	int ret; @@ -873,7 +962,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)  	kref_get(&ns->kref);  	up_read(&ctrl->namespaces_rwsem); -	ret = nvme_user_cmd(ctrl, ns, argp); +	ret = nvme_user_cmd(ctrl, ns, argp, 0, mode);  	nvme_put_ns(ns);  	return ret; @@ -890,11 +979,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,  	switch (cmd) {  	case NVME_IOCTL_ADMIN_CMD: -		return nvme_user_cmd(ctrl, NULL, argp); +		return nvme_user_cmd(ctrl, NULL, argp, 0, file->f_mode);  	case NVME_IOCTL_ADMIN64_CMD: -		return nvme_user_cmd64(ctrl, NULL, argp, false); +		return nvme_user_cmd64(ctrl, NULL, argp, 0, file->f_mode);  	case NVME_IOCTL_IO_CMD: -		return nvme_dev_user_cmd(ctrl, argp); +		return nvme_dev_user_cmd(ctrl, argp, file->f_mode);  	case NVME_IOCTL_RESET:  		if (!capable(CAP_SYS_ADMIN))  			return -EACCES;  |