aboutsummaryrefslogtreecommitdiff
path: root/drivers/nvme/host/ioctl.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/ioctl.c')
-rw-r--r--drivers/nvme/host/ioctl.c207
1 files changed, 148 insertions, 59 deletions
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 81f5550b670d..723e7d5b778f 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -8,6 +8,82 @@
#include <linux/io_uring.h>
#include "nvme.h"
+enum {
+ NVME_IOCTL_VEC = (1 << 0),
+ NVME_IOCTL_PARTITION = (1 << 1),
+};
+
+static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
+ unsigned int flags, fmode_t mode)
+{
+ u32 effects;
+
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+
+ /*
+ * Do not allow unprivileged passthrough on partitions, as that allows an
+ * escape from the containment of the partition.
+ */
+ if (flags & NVME_IOCTL_PARTITION)
+ return false;
+
+ /*
+ * Do not allow unprivileged processes to send vendor specific or fabrics
+ * commands as we can't be sure about their effects.
+ */
+ if (c->common.opcode >= nvme_cmd_vendor_start ||
+ c->common.opcode == nvme_fabrics_command)
+ return false;
+
+ /*
+ * Do not allow unprivileged passthrough of admin commands except
+ * for a subset of identify commands that contain information required
+ * to form proper I/O commands in userspace and do not expose any
+ * potentially sensitive information.
+ */
+ if (!ns) {
+ if (c->common.opcode == nvme_admin_identify) {
+ switch (c->identify.cns) {
+ case NVME_ID_CNS_NS:
+ case NVME_ID_CNS_CS_NS:
+ case NVME_ID_CNS_NS_CS_INDEP:
+ case NVME_ID_CNS_CS_CTRL:
+ case NVME_ID_CNS_CTRL:
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /*
+ * Check if the controller provides a Commands Supported and Effects log
+ * and marks this command as supported. If not reject unprivileged
+ * passthrough.
+ */
+ effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
+ if (!(effects & NVME_CMD_EFFECTS_CSUPP))
+ return false;
+
+ /*
+ * Don't allow passthrough for command that have intrusive (or unknown)
+ * effects.
+ */
+ if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
+ NVME_CMD_EFFECTS_UUID_SEL |
+ NVME_CMD_EFFECTS_SCOPE_MASK))
+ return false;
+
+ /*
+ * Only allow I/O commands that transfer data to the controller or that
+ * change the logical block contents if the file descriptor is open for
+ * writing.
+ */
+ if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
+ return mode & FMODE_WRITE;
+ return true;
+}
+
/*
* Convert integer values from ioctl structures to user pointers, silently
* ignoring the upper bits in the compat case to match behaviour of 32-bit
@@ -86,7 +162,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd,
- bool vec)
+ unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
@@ -99,7 +175,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
struct iov_iter iter;
/* fixedbufs is only for non-vectored io */
- if (WARN_ON_ONCE(vec))
+ if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC))
return -EINVAL;
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
rq_data_dir(req), &iter, ioucmd);
@@ -108,8 +184,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
} else {
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
- bufflen, GFP_KERNEL, vec, 0, 0,
- rq_data_dir(req));
+ bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
+ 0, rq_data_dir(req));
}
if (ret)
@@ -139,10 +215,11 @@ out:
}
static int nvme_submit_user_cmd(struct request_queue *q,
- struct nvme_command *cmd, u64 ubuffer,
- unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- u32 meta_seed, u64 *result, unsigned timeout, bool vec)
+ struct nvme_command *cmd, u64 ubuffer, unsigned bufflen,
+ void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
+ u64 *result, unsigned timeout, unsigned int flags)
{
+ struct nvme_ns *ns = q->queuedata;
struct nvme_ctrl *ctrl;
struct request *req;
void *meta = NULL;
@@ -157,7 +234,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
- meta_len, meta_seed, &meta, NULL, vec);
+ meta_len, meta_seed, &meta, NULL, flags);
if (ret)
return ret;
}
@@ -165,8 +242,8 @@ static int nvme_submit_user_cmd(struct request_queue *q,
bio = req->bio;
ctrl = nvme_req(req)->ctrl;
- ret = nvme_execute_passthru_rq(req, &effects);
-
+ effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
+ ret = nvme_execute_rq(req, false);
if (result)
*result = le64_to_cpu(nvme_req(req)->result.u64);
if (meta)
@@ -240,10 +317,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask);
- return nvme_submit_user_cmd(ns->queue, &c,
- io.addr, length,
- metadata, meta_len, lower_32_bits(io.slba), NULL, 0,
- false);
+ return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata,
+ meta_len, lower_32_bits(io.slba), NULL, 0, 0);
}
static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
@@ -261,7 +336,8 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
}
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd __user *ucmd)
+ struct nvme_passthru_cmd __user *ucmd, unsigned int flags,
+ fmode_t mode)
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
@@ -269,8 +345,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u64 result;
int status;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
if (cmd.flags)
@@ -291,13 +365,15 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+ if (!nvme_cmd_allowed(ns, &c, 0, mode))
+ return -EACCES;
+
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
- cmd.addr, cmd.data_len,
- nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
- 0, &result, timeout, false);
+ cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata),
+ cmd.metadata_len, 0, &result, timeout, 0);
if (status >= 0) {
if (put_user(result, &ucmd->result))
@@ -308,15 +384,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
}
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd64 __user *ucmd, bool vec)
+ struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags,
+ fmode_t mode)
{
struct nvme_passthru_cmd64 cmd;
struct nvme_command c;
unsigned timeout = 0;
int status;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
if (cmd.flags)
@@ -337,13 +412,15 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+ if (!nvme_cmd_allowed(ns, &c, flags, mode))
+ return -EACCES;
+
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
- cmd.addr, cmd.data_len,
- nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
- 0, &cmd.result, timeout, vec);
+ cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata),
+ cmd.metadata_len, 0, &cmd.result, timeout, flags);
if (status >= 0) {
if (put_user(cmd.result, &ucmd->result))
@@ -478,14 +555,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_uring_data d;
struct nvme_command c;
struct request *req;
- blk_opf_t rq_flags = 0;
+ blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
void *meta = NULL;
int ret;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
c.common.opcode = READ_ONCE(cmd->opcode);
c.common.flags = READ_ONCE(cmd->flags);
if (c.common.flags)
@@ -507,6 +581,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
+ if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode))
+ return -EACCES;
+
d.metadata = READ_ONCE(cmd->metadata);
d.addr = READ_ONCE(cmd->addr);
d.data_len = READ_ONCE(cmd->data_len);
@@ -514,7 +591,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
d.timeout_ms = READ_ONCE(cmd->timeout_ms);
if (issue_flags & IO_URING_F_NONBLOCK) {
- rq_flags = REQ_NOWAIT;
+ rq_flags |= REQ_NOWAIT;
blk_flags = BLK_MQ_REQ_NOWAIT;
}
if (issue_flags & IO_URING_F_IOPOLL)
@@ -570,13 +647,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)
}
static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
- void __user *argp)
+ void __user *argp, fmode_t mode)
{
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp);
+ return nvme_user_cmd(ctrl, NULL, argp, 0, mode);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, false);
+ return nvme_user_cmd64(ctrl, NULL, argp, 0, mode);
default:
return sed_ioctl(ctrl->opal_dev, cmd, argp);
}
@@ -601,14 +678,14 @@ struct nvme_user_io32 {
#endif /* COMPAT_FOR_U64_ALIGNMENT */
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp)
+ void __user *argp, unsigned int flags, fmode_t mode)
{
switch (cmd) {
case NVME_IOCTL_ID:
force_successful_syscall_return();
return ns->head->ns_id;
case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->ctrl, ns, argp);
+ return nvme_user_cmd(ns->ctrl, ns, argp, flags, mode);
/*
* struct nvme_user_io can have different padding on some 32-bit ABIs.
* Just accept the compat version as all fields that are used are the
@@ -619,36 +696,40 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
#endif
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, argp);
- case NVME_IOCTL_IO64_CMD:
- return nvme_user_cmd64(ns->ctrl, ns, argp, false);
case NVME_IOCTL_IO64_CMD_VEC:
- return nvme_user_cmd64(ns->ctrl, ns, argp, true);
+ flags |= NVME_IOCTL_VEC;
+ fallthrough;
+ case NVME_IOCTL_IO64_CMD:
+ return nvme_user_cmd64(ns->ctrl, ns, argp, flags, mode);
default:
return -ENOTTY;
}
}
-static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg)
-{
- if (is_ctrl_ioctl(cmd))
- return nvme_ctrl_ioctl(ns->ctrl, cmd, arg);
- return nvme_ns_ioctl(ns, cmd, arg);
-}
-
int nvme_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct nvme_ns *ns = bdev->bd_disk->private_data;
+ void __user *argp = (void __user *)arg;
+ unsigned int flags = 0;
- return __nvme_ioctl(ns, cmd, (void __user *)arg);
+ if (bdev_is_partition(bdev))
+ flags |= NVME_IOCTL_PARTITION;
+
+ if (is_ctrl_ioctl(cmd))
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
+ return nvme_ns_ioctl(ns, cmd, argp, flags, mode);
}
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct nvme_ns *ns =
container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
+ void __user *argp = (void __user *)arg;
- return __nvme_ioctl(ns, cmd, (void __user *)arg);
+ if (is_ctrl_ioctl(cmd))
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, file->f_mode);
+ return nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode);
}
static int nvme_uring_cmd_checks(unsigned int issue_flags)
@@ -716,7 +797,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp, struct nvme_ns_head *head, int srcu_idx)
+ void __user *argp, struct nvme_ns_head *head, int srcu_idx,
+ fmode_t mode)
__releases(&head->srcu)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -724,7 +806,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
nvme_get_ctrl(ns->ctrl);
srcu_read_unlock(&head->srcu, srcu_idx);
- ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
+ ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
nvme_put_ctrl(ctrl);
return ret;
@@ -737,6 +819,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
void __user *argp = (void __user *)arg;
struct nvme_ns *ns;
int srcu_idx, ret = -EWOULDBLOCK;
+ unsigned int flags = 0;
+
+ if (bdev_is_partition(bdev))
+ flags |= NVME_IOCTL_PARTITION;
srcu_idx = srcu_read_lock(&head->srcu);
ns = nvme_find_path(head);
@@ -749,9 +835,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
* deadlock when deleting namespaces using the passthrough interface.
*/
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+ mode);
- ret = nvme_ns_ioctl(ns, cmd, argp);
+ ret = nvme_ns_ioctl(ns, cmd, argp, flags, mode);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -773,9 +860,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
goto out_unlock;
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+ file->f_mode);
- ret = nvme_ns_ioctl(ns, cmd, argp);
+ ret = nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -849,7 +937,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
return ret;
}
-static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
+ fmode_t mode)
{
struct nvme_ns *ns;
int ret;
@@ -873,7 +962,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
kref_get(&ns->kref);
up_read(&ctrl->namespaces_rwsem);
- ret = nvme_user_cmd(ctrl, ns, argp);
+ ret = nvme_user_cmd(ctrl, ns, argp, 0, mode);
nvme_put_ns(ns);
return ret;
@@ -890,11 +979,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp);
+ return nvme_user_cmd(ctrl, NULL, argp, 0, file->f_mode);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, false);
+ return nvme_user_cmd64(ctrl, NULL, argp, 0, file->f_mode);
case NVME_IOCTL_IO_CMD:
- return nvme_dev_user_cmd(ctrl, argp);
+ return nvme_dev_user_cmd(ctrl, argp, file->f_mode);
case NVME_IOCTL_RESET:
if (!capable(CAP_SYS_ADMIN))
return -EACCES;