diff options
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/Makefile | 3 | ||||
-rw-r--r-- | drivers/infiniband/core/cgroup.c | 10 | ||||
-rw-r--r-- | drivers/infiniband/core/core_priv.h | 9 | ||||
-rw-r--r-- | drivers/infiniband/core/cq.c | 47 | ||||
-rw-r--r-- | drivers/infiniband/core/device.c | 180 | ||||
-rw-r--r-- | drivers/infiniband/core/mr_pool.c | 18 | ||||
-rw-r--r-- | drivers/infiniband/core/nldev.c | 233 | ||||
-rw-r--r-- | drivers/infiniband/core/rdma_core.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/rw.c | 211 | ||||
-rw-r--r-- | drivers/infiniband/core/ucm.c | 1350 | ||||
-rw-r--r-- | drivers/infiniband/core/ucma.c | 114 | ||||
-rw-r--r-- | drivers/infiniband/core/umem.c | 13 | ||||
-rw-r--r-- | drivers/infiniband/core/umem_odp.c | 106 | ||||
-rw-r--r-- | drivers/infiniband/core/user_mad.c | 53 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 56 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 40 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_std_types_cq.c | 21 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_std_types_mr.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_uapi.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/verbs.c | 156 |
20 files changed, 844 insertions, 1785 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 313f2349b518..42f1b2a4f746 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -6,7 +6,6 @@ obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \ $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y) -obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ @@ -29,8 +28,6 @@ rdma_ucm-y := ucma.o ib_umad-y := user_mad.o -ib_ucm-y := ucm.o - ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ uverbs_std_types_cq.o \ diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c index 388fd04e5f63..1f037fe01450 100644 --- a/drivers/infiniband/core/cgroup.c +++ b/drivers/infiniband/core/cgroup.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #include "core_priv.h" diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index ff40a450b5d2..a953c2fa2e78 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -88,6 +88,15 @@ typedef int (*nldev_callback)(struct ib_device *device, int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, struct netlink_callback *cb); +struct ib_client_nl_info { + struct sk_buff *nl_msg; + struct device *cdev; + unsigned int port; + u64 abi; +}; +int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, + struct ib_client_nl_info *res); + enum ib_cache_gid_default_mode { IB_CACHE_GID_DEFAULT_MODE_SET, IB_CACHE_GID_DEFAULT_MODE_DELETE diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index a4c81992267c..00d70f166209 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2015 HGST, a Western Digital Company. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #include <linux/module.h> #include <linux/err.h> @@ -121,7 +113,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) } /** - * __ib_alloc_cq - allocate a completion queue + * __ib_alloc_cq_user - allocate a completion queue * @dev: device to allocate the CQ for * @private: driver private data, accessible from cq->cq_context * @nr_cqe: number of CQEs to allocate @@ -147,23 +139,26 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, struct ib_cq *cq; int ret = -ENOMEM; - cq = dev->ops.create_cq(dev, &cq_attr, NULL); - if (IS_ERR(cq)) - return cq; + cq = rdma_zalloc_drv_obj(dev, ib_cq); + if (!cq) + return ERR_PTR(ret); cq->device = dev; - cq->uobject = NULL; - cq->event_handler = NULL; cq->cq_context = private; cq->poll_ctx = poll_ctx; atomic_set(&cq->usecnt, 0); cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL); if (!cq->wc) - goto out_destroy_cq; + goto out_free_cq; cq->res.type = RDMA_RESTRACK_CQ; rdma_restrack_set_task(&cq->res, caller); + + ret = dev->ops.create_cq(cq, &cq_attr, NULL); + if (ret) + goto out_free_wc; + rdma_restrack_kadd(&cq->res); switch (cq->poll_ctx) { @@ -186,29 +181,29 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, break; default: ret = -EINVAL; - goto out_free_wc; + goto out_destroy_cq; } return cq; -out_free_wc: - kfree(cq->wc); - rdma_restrack_del(&cq->res); out_destroy_cq: + rdma_restrack_del(&cq->res); cq->device->ops.destroy_cq(cq, udata); +out_free_wc: + kfree(cq->wc); +out_free_cq: + kfree(cq); return ERR_PTR(ret); } EXPORT_SYMBOL(__ib_alloc_cq_user); /** - * ib_free_cq - free a completion queue + * ib_free_cq_user - free a completion queue * @cq: completion queue to free. * @udata: User data or NULL for kernel object */ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) { - int ret; - if (WARN_ON_ONCE(atomic_read(&cq->usecnt))) return; @@ -226,9 +221,9 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) WARN_ON_ONCE(1); } - kfree(cq->wc); rdma_restrack_del(&cq->res); - ret = cq->device->ops.destroy_cq(cq, udata); - WARN_ON_ONCE(ret); + cq->device->ops.destroy_cq(cq, udata); + kfree(cq->wc); + kfree(cq); } EXPORT_SYMBOL(ib_free_cq_user); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 78dc07c6ac4b..8a6ccb936dfe 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -270,7 +270,7 @@ struct ib_port_data_rcu { struct ib_port_data pdata[]; }; -static int ib_device_check_mandatory(struct ib_device *device) +static void ib_device_check_mandatory(struct ib_device *device) { #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x } static const struct { @@ -305,8 +305,6 @@ static int ib_device_check_mandatory(struct ib_device *device) break; } } - - return 0; } /* @@ -375,7 +373,7 @@ struct ib_device *ib_device_get_by_name(const char *name, down_read(&devices_rwsem); device = __ib_device_get_by_name(name); if (device && driver_id != RDMA_DRIVER_UNKNOWN && - device->driver_id != driver_id) + device->ops.driver_id != driver_id) device = NULL; if (device) { @@ -409,27 +407,44 @@ static int rename_compat_devs(struct ib_device *device) int ib_device_rename(struct ib_device *ibdev, const char *name) { + unsigned long index; + void *client_data; int ret; down_write(&devices_rwsem); if (!strcmp(name, dev_name(&ibdev->dev))) { - ret = 0; - goto out; + up_write(&devices_rwsem); + return 0; } if (__ib_device_get_by_name(name)) { - ret = -EEXIST; - goto out; + up_write(&devices_rwsem); + return -EEXIST; } ret = device_rename(&ibdev->dev, name); - if (ret) - goto out; + if (ret) { + up_write(&devices_rwsem); + return ret; + } + strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); ret = rename_compat_devs(ibdev); -out: - up_write(&devices_rwsem); - return ret; + + downgrade_write(&devices_rwsem); + down_read(&ibdev->client_data_rwsem); + xan_for_each_marked(&ibdev->client_data, index, client_data, + CLIENT_DATA_REGISTERED) { + struct ib_client *client = xa_load(&clients, index); + + if (!client || !client->rename) + continue; + + client->rename(ibdev, client_data); + } + up_read(&ibdev->client_data_rwsem); + up_read(&devices_rwsem); + return 0; } static int alloc_name(struct ib_device *ibdev, const char *name) @@ -474,14 +489,15 @@ static void ib_device_release(struct device *device) free_netdevs(dev); WARN_ON(refcount_read(&dev->refcount)); - ib_cache_release_one(dev); - ib_security_release_port_pkey_list(dev); - xa_destroy(&dev->compat_devs); - xa_destroy(&dev->client_data); - if (dev->port_data) + if (dev->port_data) { + ib_cache_release_one(dev); + ib_security_release_port_pkey_list(dev); kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, pdata[0]), rcu_head); + } + xa_destroy(&dev->compat_devs); + xa_destroy(&dev->client_data); kfree_rcu(dev, rcu_head); } @@ -1175,10 +1191,7 @@ static int setup_device(struct ib_device *device) int ret; setup_dma_device(device); - - ret = ib_device_check_mandatory(device); - if (ret) - return ret; + ib_device_check_mandatory(device); ret = setup_port_data(device); if (ret) { @@ -1461,7 +1474,7 @@ void ib_unregister_driver(enum rdma_driver_id driver_id) down_read(&devices_rwsem); xa_for_each (&devices, index, ib_dev) { - if (ib_dev->driver_id != driver_id) + if (ib_dev->ops.driver_id != driver_id) continue; get_device(&ib_dev->dev); @@ -1731,6 +1744,104 @@ void ib_unregister_client(struct ib_client *client) } EXPORT_SYMBOL(ib_unregister_client); +static int __ib_get_global_client_nl_info(const char *client_name, + struct ib_client_nl_info *res) +{ + struct ib_client *client; + unsigned long index; + int ret = -ENOENT; + + down_read(&clients_rwsem); + xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { + if (strcmp(client->name, client_name) != 0) + continue; + if (!client->get_global_nl_info) { + ret = -EOPNOTSUPP; + break; + } + ret = client->get_global_nl_info(res); + if (WARN_ON(ret == -ENOENT)) + ret = -EINVAL; + if (!ret && res->cdev) + get_device(res->cdev); + break; + } + up_read(&clients_rwsem); + return ret; +} + +static int __ib_get_client_nl_info(struct ib_device *ibdev, + const char *client_name, + struct ib_client_nl_info *res) +{ + unsigned long index; + void *client_data; + int ret = -ENOENT; + + down_read(&ibdev->client_data_rwsem); + xan_for_each_marked (&ibdev->client_data, index, client_data, + CLIENT_DATA_REGISTERED) { + struct ib_client *client = xa_load(&clients, index); + + if (!client || strcmp(client->name, client_name) != 0) + continue; + if (!client->get_nl_info) { + ret = -EOPNOTSUPP; + break; + } + ret = client->get_nl_info(ibdev, client_data, res); + if (WARN_ON(ret == -ENOENT)) + ret = -EINVAL; + + /* + * The cdev is guaranteed valid as long as we are inside the + * client_data_rwsem as remove_one can't be called. Keep it + * valid for the caller. + */ + if (!ret && res->cdev) + get_device(res->cdev); + break; + } + up_read(&ibdev->client_data_rwsem); + + return ret; +} + +/** + * ib_get_client_nl_info - Fetch the nl_info from a client + * @device - IB device + * @client_name - Name of the client + * @res - Result of the query + */ +int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, + struct ib_client_nl_info *res) +{ + int ret; + + if (ibdev) + ret = __ib_get_client_nl_info(ibdev, client_name, res); + else + ret = __ib_get_global_client_nl_info(client_name, res); +#ifdef CONFIG_MODULES + if (ret == -ENOENT) { + request_module("rdma-client-%s", client_name); + if (ibdev) + ret = __ib_get_client_nl_info(ibdev, client_name, res); + else + ret = __ib_get_global_client_nl_info(client_name, res); + } +#endif + if (ret) { + if (ret == -ENOENT) + return -EOPNOTSUPP; + return ret; + } + + if (WARN_ON(!res->cdev)) + return -EINVAL; + return 0; +} + /** * ib_set_client_data - Set IB client context * @device:Device to set context for @@ -1935,6 +2046,9 @@ static void free_netdevs(struct ib_device *ib_dev) unsigned long flags; unsigned int port; + if (!ib_dev->port_data) + return; + rdma_for_each_port (ib_dev, port) { struct ib_port_data *pdata = &ib_dev->port_data[port]; struct net_device *ndev; @@ -2018,7 +2132,7 @@ struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, (uintptr_t)ndev) { if (rcu_access_pointer(cur->netdev) == ndev && (driver_id == RDMA_DRIVER_UNKNOWN || - cur->ib_dev->driver_id == driver_id) && + cur->ib_dev->ops.driver_id == driver_id) && ib_device_try_get(cur->ib_dev)) { res = cur->ib_dev; break; @@ -2323,12 +2437,28 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) #define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name) + if (ops->driver_id != RDMA_DRIVER_UNKNOWN) { + WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN && + dev_ops->driver_id != ops->driver_id); + dev_ops->driver_id = ops->driver_id; + } + if (ops->owner) { + WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner); + dev_ops->owner = ops->owner; + } + if (ops->uverbs_abi_ver) + dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver; + + dev_ops->uverbs_no_driver_id_binding |= + ops->uverbs_no_driver_id_binding; + SET_DEVICE_OP(dev_ops, add_gid); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); SET_DEVICE_OP(dev_ops, alloc_fmr); SET_DEVICE_OP(dev_ops, alloc_hw_stats); SET_DEVICE_OP(dev_ops, alloc_mr); + SET_DEVICE_OP(dev_ops, alloc_mr_integrity); SET_DEVICE_OP(dev_ops, alloc_mw); SET_DEVICE_OP(dev_ops, alloc_pd); SET_DEVICE_OP(dev_ops, alloc_rdma_netdev); @@ -2388,6 +2518,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, iw_reject); SET_DEVICE_OP(dev_ops, iw_rem_ref); SET_DEVICE_OP(dev_ops, map_mr_sg); + SET_DEVICE_OP(dev_ops, map_mr_sg_pi); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); SET_DEVICE_OP(dev_ops, modify_ah); @@ -2424,6 +2555,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, unmap_fmr); SET_OBJ_SIZE(dev_ops, ib_ah); + SET_OBJ_SIZE(dev_ops, ib_cq); SET_OBJ_SIZE(dev_ops, ib_pd); SET_OBJ_SIZE(dev_ops, ib_srq); SET_OBJ_SIZE(dev_ops, ib_ucontext); diff --git a/drivers/infiniband/core/mr_pool.c b/drivers/infiniband/core/mr_pool.c index 49d478b2ea94..c0e2df128b34 100644 --- a/drivers/infiniband/core/mr_pool.c +++ b/drivers/infiniband/core/mr_pool.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016 HGST, a Western Digital Company. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #include <rdma/ib_verbs.h> #include <rdma/mr_pool.h> @@ -42,14 +34,18 @@ void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr) EXPORT_SYMBOL(ib_mr_pool_put); int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr, - enum ib_mr_type type, u32 max_num_sg) + enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg) { struct ib_mr *mr; unsigned long flags; int ret, i; for (i = 0; i < nr; i++) { - mr = ib_alloc_mr(qp->pd, type, max_num_sg); + if (type == IB_MR_TYPE_INTEGRITY) + mr = ib_alloc_mr_integrity(qp->pd, max_num_sg, + max_num_meta_sg); + else + mr = ib_alloc_mr(qp->pd, type, max_num_sg); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto out; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 69188cbbd99b..5499f5629dc2 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -42,84 +42,94 @@ #include "cma_priv.h" #include "restrack.h" +/* + * Sort array elements by the netlink attribute name + */ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { - [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, - .len = IB_DEVICE_NAME_MAX - 1}, - [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, - .len = IB_FW_VERSION_NAME_MAX - 1}, - [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING, - .len = 16 }, - [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, - .len = TASK_COMM_LEN }, + [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE }, + [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, + .len = IB_DEVICE_NAME_MAX }, + [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, + [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, + [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, + .len = IFNAMSIZ }, + [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, + .len = IFNAMSIZ }, + [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { - .len = sizeof(struct __kernel_sockaddr_storage) }, - [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { - .len = sizeof(struct __kernel_sockaddr_storage) }, [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { + .len = sizeof(struct __kernel_sockaddr_storage) }, [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, - .len = IFNAMSIZ }, - [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, - .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, - [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, - [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, - [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, - .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, - [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, - .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, + [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { + .len = sizeof(struct __kernel_sockaddr_storage) }, + [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 }, [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, + [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -1347,6 +1357,90 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE]; + struct ib_client_nl_info data = {}; + struct ib_device *ibdev = NULL; + struct sk_buff *msg; + u32 index; + int err; + + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, + extack); + if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE]) + return -EINVAL; + + nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], + sizeof(client_name)); + + if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + ibdev = ib_device_get_by_index(sock_net(skb->sk), index); + if (!ibdev) + return -EINVAL; + + if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { + data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(ibdev, data.port)) { + err = -EINVAL; + goto out_put; + } + } else { + data.port = -1; + } + } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { + return -EINVAL; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out_put; + } + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_GET_CHARDEV), + 0, 0); + + data.nl_msg = msg; + err = ib_get_client_nl_info(ibdev, client_name, &data); + if (err) + goto out_nlmsg; + + err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV, + huge_encode_dev(data.cdev->devt), + RDMA_NLDEV_ATTR_PAD); + if (err) + goto out_data; + err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi, + RDMA_NLDEV_ATTR_PAD); + if (err) + goto out_data; + if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME, + dev_name(data.cdev))) { + err = -EMSGSIZE; + goto out_data; + } + + nlmsg_end(msg, nlh); + put_device(data.cdev); + if (ibdev) + ib_device_put(ibdev); + return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +out_data: + put_device(data.cdev); +out_nlmsg: + nlmsg_free(msg); +out_put: + if (ibdev) + ib_device_put(ibdev); + return err; +} + static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -1404,6 +1498,9 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .doit = nldev_get_doit, .dump = nldev_get_dumpit, }, + [RDMA_NLDEV_CMD_GET_CHARDEV] = { + .doit = nldev_get_chardev, + }, [RDMA_NLDEV_CMD_SET] = { .doit = nldev_set_doit, .flags = RDMA_NL_ADMIN_PERM, diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 5445323629b5..e63fbda25e1d 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -110,6 +110,8 @@ int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); +struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs); + /* * This is the runtime description of the uverbs API, used by the syscall * machinery to validate and dispatch calls. diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 89a5be3a2f97..dce06108c8c3 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016 HGST, a Western Digital Company. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #include <linux/moduleparam.h> #include <linux/slab.h> @@ -59,10 +51,34 @@ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, return false; } -static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) +static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev, + bool pi_support) { + u32 max_pages; + + if (pi_support) + max_pages = dev->attrs.max_pi_fast_reg_page_list_len; + else + max_pages = dev->attrs.max_fast_reg_page_list_len; + /* arbitrary limit to avoid allocating gigantic resources */ - return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); + return min_t(u32, max_pages, 256); +} + +static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg) +{ + int count = 0; + + if (reg->mr->need_inval) { + reg->inv_wr.opcode = IB_WR_LOCAL_INV; + reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; + reg->inv_wr.next = ®->reg_wr.wr; + count++; + } else { + reg->inv_wr.next = NULL; + } + + return count; } /* Caller must have zero-initialized *reg. */ @@ -70,7 +86,8 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, u32 sg_cnt, u32 offset) { - u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, + qp->integrity_en); u32 nents = min(sg_cnt, pages_per_mr); int count = 0, ret; @@ -78,14 +95,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, if (!reg->mr) return -EAGAIN; - if (reg->mr->need_inval) { - reg->inv_wr.opcode = IB_WR_LOCAL_INV; - reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; - reg->inv_wr.next = ®->reg_wr.wr; - count++; - } else { - reg->inv_wr.next = NULL; - } + count += rdma_rw_inv_key(reg); ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); if (ret < 0 || ret < nents) { @@ -110,7 +120,8 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct rdma_rw_reg_ctx *prev = NULL; - u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, + qp->integrity_en); int i, j, ret = 0, count = 0; ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr; @@ -351,13 +362,14 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; - u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, + qp->integrity_en); struct ib_rdma_wr *rdma_wr; - struct ib_send_wr *prev_wr = NULL; int count = 0, ret; if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) { - pr_err("SG count too large\n"); + pr_err("SG count too large: sg_cnt=%d, prot_sg_cnt=%d, pages_per_mr=%d\n", + sg_cnt, prot_sg_cnt, pages_per_mr); return -EINVAL; } @@ -366,75 +378,58 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -ENOMEM; sg_cnt = ret; - ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); - if (!ret) { - ret = -ENOMEM; - goto out_unmap_sg; + if (prot_sg_cnt) { + ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); + if (!ret) { + ret = -ENOMEM; + goto out_unmap_sg; + } + prot_sg_cnt = ret; } - prot_sg_cnt = ret; ctx->type = RDMA_RW_SIG_MR; ctx->nr_ops = 1; - ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL); - if (!ctx->sig) { + ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL); + if (!ctx->reg) { ret = -ENOMEM; goto out_unmap_prot_sg; } - ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0); - if (ret < 0) - goto out_free_ctx; - count += ret; - prev_wr = &ctx->sig->data.reg_wr.wr; - - ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot, - prot_sg, prot_sg_cnt, 0); - if (ret < 0) - goto out_destroy_data_mr; - count += ret; - - if (ctx->sig->prot.inv_wr.next) - prev_wr->next = &ctx->sig->prot.inv_wr; - else - prev_wr->next = &ctx->sig->prot.reg_wr.wr; - prev_wr = &ctx->sig->prot.reg_wr.wr; - - ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs); - if (!ctx->sig->sig_mr) { + ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs); + if (!ctx->reg->mr) { ret = -EAGAIN; - goto out_destroy_prot_mr; + goto out_free_ctx; } - if (ctx->sig->sig_mr->need_inval) { - memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr)); + count += rdma_rw_inv_key(ctx->reg); - ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV; - ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey; + memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs)); - prev_wr->next = &ctx->sig->sig_inv_wr; - prev_wr = &ctx->sig->sig_inv_wr; + ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg, + prot_sg_cnt, NULL, SZ_4K); + if (unlikely(ret)) { + pr_err("failed to map PI sg (%d)\n", sg_cnt + prot_sg_cnt); + goto out_destroy_sig_mr; } - ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR; - ctx->sig->sig_wr.wr.wr_cqe = NULL; - ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge; - ctx->sig->sig_wr.wr.num_sge = 1; - ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; - ctx->sig->sig_wr.sig_attrs = sig_attrs; - ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr; - if (prot_sg_cnt) - ctx->sig->sig_wr.prot = &ctx->sig->prot.sge; - prev_wr->next = &ctx->sig->sig_wr.wr; - prev_wr = &ctx->sig->sig_wr.wr; + ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY; + ctx->reg->reg_wr.wr.wr_cqe = NULL; + ctx->reg->reg_wr.wr.num_sge = 0; + ctx->reg->reg_wr.wr.send_flags = 0; + ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE; + if (rdma_protocol_iwarp(qp->device, port_num)) + ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE; + ctx->reg->reg_wr.mr = ctx->reg->mr; + ctx->reg->reg_wr.key = ctx->reg->mr->lkey; count++; - ctx->sig->sig_sge.addr = 0; - ctx->sig->sig_sge.length = ctx->sig->data.sge.length; - if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE) - ctx->sig->sig_sge.length += ctx->sig->prot.sge.length; + ctx->reg->sge.addr = ctx->reg->mr->iova; + ctx->reg->sge.length = ctx->reg->mr->length; + if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE) + ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length; - rdma_wr = &ctx->sig->data.wr; - rdma_wr->wr.sg_list = &ctx->sig->sig_sge; + rdma_wr = &ctx->reg->wr; + rdma_wr->wr.sg_list = &ctx->reg->sge; rdma_wr->wr.num_sge = 1; rdma_wr->remote_addr = remote_addr; rdma_wr->rkey = rkey; @@ -442,21 +437,18 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; else rdma_wr->wr.opcode = IB_WR_RDMA_READ; - prev_wr->next = &rdma_wr->wr; - prev_wr = &rdma_wr->wr; + ctx->reg->reg_wr.wr.next = &rdma_wr->wr; count++; return count; -out_destroy_prot_mr: - if (prot_sg_cnt) - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); -out_destroy_data_mr: - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); +out_destroy_sig_mr: + ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr); out_free_ctx: - kfree(ctx->sig); + kfree(ctx->reg); out_unmap_prot_sg: - ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); + if (prot_sg_cnt) + ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); out_unmap_sg: ib_dma_unmap_sg(dev, sg, sg_cnt, dir); return ret; @@ -499,22 +491,8 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, switch (ctx->type) { case RDMA_RW_SIG_MR: - rdma_rw_update_lkey(&ctx->sig->data, true); - if (ctx->sig->prot.mr) - rdma_rw_update_lkey(&ctx->sig->prot, true); - - ctx->sig->sig_mr->need_inval = true; - ib_update_fast_reg_key(ctx->sig->sig_mr, - ib_inc_rkey(ctx->sig->sig_mr->lkey)); - ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey; - - if (ctx->sig->data.inv_wr.next) - first_wr = &ctx->sig->data.inv_wr; - else - first_wr = &ctx->sig->data.reg_wr.wr; - last_wr = &ctx->sig->data.wr.wr; - break; case RDMA_RW_MR: + /* fallthrough */ for (i = 0; i < ctx->nr_ops; i++) { rdma_rw_update_lkey(&ctx->reg[i], ctx->reg[i].wr.wr.opcode != @@ -613,7 +591,7 @@ EXPORT_SYMBOL(rdma_rw_ctx_destroy); /** * rdma_rw_ctx_destroy_signature - release all resources allocated by - * rdma_rw_ctx_init_signature + * rdma_rw_ctx_signature_init * @ctx: context to release * @qp: queue pair to operate on * @port_num: port num to which the connection is bound @@ -631,16 +609,12 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR)) return; - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); - ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr); + kfree(ctx->reg); - if (ctx->sig->prot.mr) { - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + if (prot_sg_cnt) ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); - } - - ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr); - kfree(ctx->sig); } EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); @@ -661,7 +635,7 @@ unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, unsigned int mr_pages; if (rdma_rw_can_use_mr(device, port_num)) - mr_pages = rdma_rw_fr_page_list_len(device); + mr_pages = rdma_rw_fr_page_list_len(device, false); else mr_pages = device->attrs.max_sge_rd; return DIV_ROUND_UP(maxpages, mr_pages); @@ -687,9 +661,8 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) * we'll need two additional MRs for the registrations and the * invalidation. */ - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) - factor += 6; /* (inv + reg) * (data + prot + sig) */ - else if (rdma_rw_can_use_mr(dev, attr->port_num)) + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN || + rdma_rw_can_use_mr(dev, attr->port_num)) factor += 2; /* inv + reg */ attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs; @@ -705,20 +678,22 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) { struct ib_device *dev = qp->pd->device; - u32 nr_mrs = 0, nr_sig_mrs = 0; + u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0; int ret = 0; - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) { + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) { nr_sig_mrs = attr->cap.max_rdma_ctxs; - nr_mrs = attr->cap.max_rdma_ctxs * 2; + nr_mrs = attr->cap.max_rdma_ctxs; + max_num_sg = rdma_rw_fr_page_list_len(dev, true); } else if (rdma_rw_can_use_mr(dev, attr->port_num)) { nr_mrs = attr->cap.max_rdma_ctxs; + max_num_sg = rdma_rw_fr_page_list_len(dev, false); } if (nr_mrs) { ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs, IB_MR_TYPE_MEM_REG, - rdma_rw_fr_page_list_len(dev)); + max_num_sg, 0); if (ret) { pr_err("%s: failed to allocated %d MRs\n", __func__, nr_mrs); @@ -728,10 +703,10 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) if (nr_sig_mrs) { ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs, - IB_MR_TYPE_SIGNATURE, 2); + IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg); if (ret) { pr_err("%s: failed to allocated %d SIG MRs\n", - __func__, nr_mrs); + __func__, nr_sig_mrs); goto out_free_rdma_mrs; } } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c deleted file mode 100644 index 8e7da2d41fd8..000000000000 --- a/drivers/infiniband/core/ucm.c +++ /dev/null @@ -1,1350 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/completion.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/device.h> -#include <linux/err.h> -#include <linux/poll.h> -#include <linux/sched.h> -#include <linux/file.h> -#include <linux/mount.h> -#include <linux/cdev.h> -#include <linux/xarray.h> -#include <linux/mutex.h> -#include <linux/slab.h> - -#include <linux/nospec.h> - -#include <linux/uaccess.h> - -#include <rdma/ib.h> -#include <rdma/ib_cm.h> -#include <rdma/ib_user_cm.h> -#include <rdma/ib_marshall.h> - -#include "core_priv.h" - -MODULE_AUTHOR("Libor Michalek"); -MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); -MODULE_LICENSE("Dual BSD/GPL"); - -struct ib_ucm_device { - int devnum; - struct cdev cdev; - struct device dev; - struct ib_device *ib_dev; -}; - -struct ib_ucm_file { - struct mutex file_mutex; - struct file *filp; - struct ib_ucm_device *device; - - struct list_head ctxs; - struct list_head events; - wait_queue_head_t poll_wait; -}; - -struct ib_ucm_context { - int id; - struct completion comp; - atomic_t ref; - int events_reported; - - struct ib_ucm_file *file; - struct ib_cm_id *cm_id; - __u64 uid; - - struct list_head events; /* list of pending events. */ - struct list_head file_list; /* member in file ctx list */ -}; - -struct ib_ucm_event { - struct ib_ucm_context *ctx; - struct list_head file_list; /* member in file event list */ - struct list_head ctx_list; /* member in ctx event list */ - - struct ib_cm_id *cm_id; - struct ib_ucm_event_resp resp; - void *data; - void *info; - int data_len; - int info_len; -}; - -enum { - IB_UCM_MAJOR = 231, - IB_UCM_BASE_MINOR = 224, - IB_UCM_MAX_DEVICES = RDMA_MAX_PORTS, - IB_UCM_NUM_FIXED_MINOR = 32, - IB_UCM_NUM_DYNAMIC_MINOR = IB_UCM_MAX_DEVICES - IB_UCM_NUM_FIXED_MINOR, -}; - -#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) -static dev_t dynamic_ucm_dev; - -static void ib_ucm_add_one(struct ib_device *device); -static void ib_ucm_remove_one(struct ib_device *device, void *client_data); - -static struct ib_client ucm_client = { - .name = "ucm", - .add = ib_ucm_add_one, - .remove = ib_ucm_remove_one -}; - -static DEFINE_XARRAY_ALLOC(ctx_id_table); -static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); - -static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) -{ - struct ib_ucm_context *ctx; - - xa_lock(&ctx_id_table); - ctx = xa_load(&ctx_id_table, id); - if (!ctx) - ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file) - ctx = ERR_PTR(-EINVAL); - else - atomic_inc(&ctx->ref); - xa_unlock(&ctx_id_table); - - return ctx; -} - -static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) -{ - if (atomic_dec_and_test(&ctx->ref)) - complete(&ctx->comp); -} - -static inline int ib_ucm_new_cm_id(int event) -{ - return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED; -} - -static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) -{ - struct ib_ucm_event *uevent; - - mutex_lock(&ctx->file->file_mutex); - list_del(&ctx->file_list); - while (!list_empty(&ctx->events)) { - - uevent = list_entry(ctx->events.next, - struct ib_ucm_event, ctx_list); - list_del(&uevent->file_list); - list_del(&uevent->ctx_list); - mutex_unlock(&ctx->file->file_mutex); - - /* clear incoming connections. */ - if (ib_ucm_new_cm_id(uevent->resp.event)) - ib_destroy_cm_id(uevent->cm_id); - - kfree(uevent); - mutex_lock(&ctx->file->file_mutex); - } - mutex_unlock(&ctx->file->file_mutex); -} - -static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) -{ - struct ib_ucm_context *ctx; - - ctx = kzalloc(sizeof *ctx, GFP_KERNEL); - if (!ctx) - return NULL; - - atomic_set(&ctx->ref, 1); - init_completion(&ctx->comp); - ctx->file = file; - INIT_LIST_HEAD(&ctx->events); - - if (xa_alloc(&ctx_id_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) - goto error; - - list_add_tail(&ctx->file_list, &file->ctxs); - return ctx; - -error: - kfree(ctx); - return NULL; -} - -static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, - const struct ib_cm_req_event_param *kreq) -{ - ureq->remote_ca_guid = kreq->remote_ca_guid; - ureq->remote_qkey = kreq->remote_qkey; - ureq->remote_qpn = kreq->remote_qpn; - ureq->qp_type = kreq->qp_type; - ureq->starting_psn = kreq->starting_psn; - ureq->responder_resources = kreq->responder_resources; - ureq->initiator_depth = kreq->initiator_depth; - ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; - ureq->flow_control = kreq->flow_control; - ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; - ureq->retry_count = kreq->retry_count; - ureq->rnr_retry_count = kreq->rnr_retry_count; - ureq->srq = kreq->srq; - ureq->port = kreq->port; - - ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path); - if (kreq->alternate_path) - ib_copy_path_rec_to_user(&ureq->alternate_path, - kreq->alternate_path); -} - -static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, - const struct ib_cm_rep_event_param *krep) -{ - urep->remote_ca_guid = krep->remote_ca_guid; - urep->remote_qkey = krep->remote_qkey; - urep->remote_qpn = krep->remote_qpn; - urep->starting_psn = krep->starting_psn; - urep->responder_resources = krep->responder_resources; - urep->initiator_depth = krep->initiator_depth; - urep->target_ack_delay = krep->target_ack_delay; - urep->failover_accepted = krep->failover_accepted; - urep->flow_control = krep->flow_control; - urep->rnr_retry_count = krep->rnr_retry_count; - urep->srq = krep->srq; -} - -static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, - const struct ib_cm_sidr_rep_event_param *krep) -{ - urep->status = krep->status; - urep->qkey = krep->qkey; - urep->qpn = krep->qpn; -}; - -static int ib_ucm_event_process(const struct ib_cm_event *evt, - struct ib_ucm_event *uvt) -{ - void *info = NULL; - - switch (evt->event) { - case IB_CM_REQ_RECEIVED: - ib_ucm_event_req_get(&uvt->resp.u.req_resp, - &evt->param.req_rcvd); - uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; - uvt->resp.present = IB_UCM_PRES_PRIMARY; - uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? - IB_UCM_PRES_ALTERNATE : 0); - break; - case IB_CM_REP_RECEIVED: - ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, - &evt->param.rep_rcvd); - uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; - break; - case IB_CM_RTU_RECEIVED: - uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; - uvt->resp.u.send_status = evt->param.send_status; - break; - case IB_CM_DREQ_RECEIVED: - uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; - uvt->resp.u.send_status = evt->param.send_status; - break; - case IB_CM_DREP_RECEIVED: - uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; - uvt->resp.u.send_status = evt->param.send_status; - break; - case IB_CM_MRA_RECEIVED: - uvt->resp.u.mra_resp.timeout = - evt->param.mra_rcvd.service_timeout; - uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; - break; - case IB_CM_REJ_RECEIVED: - uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; - uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; - uvt->info_len = evt->param.rej_rcvd.ari_length; - info = evt->param.rej_rcvd.ari; - break; - case IB_CM_LAP_RECEIVED: - ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path, - evt->param.lap_rcvd.alternate_path); - uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; - uvt->resp.present = IB_UCM_PRES_ALTERNATE; - break; - case IB_CM_APR_RECEIVED: - uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; - uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; - uvt->info_len = evt->param.apr_rcvd.info_len; - info = evt->param.apr_rcvd.apr_info; - break; - case IB_CM_SIDR_REQ_RECEIVED: - uvt->resp.u.sidr_req_resp.pkey = - evt->param.sidr_req_rcvd.pkey; - uvt->resp.u.sidr_req_resp.port = - evt->param.sidr_req_rcvd.port; - uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; - break; - case IB_CM_SIDR_REP_RECEIVED: - ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, - &evt->param.sidr_rep_rcvd); - uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; - uvt->info_len = evt->param.sidr_rep_rcvd.info_len; - info = evt->param.sidr_rep_rcvd.info; - break; - default: - uvt->resp.u.send_status = evt->param.send_status; - break; - } - - if (uvt->data_len) { - uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL); - if (!uvt->data) - goto err1; - - uvt->resp.present |= IB_UCM_PRES_DATA; - } - - if (uvt->info_len) { - uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL); - if (!uvt->info) - goto err2; - - uvt->resp.present |= IB_UCM_PRES_INFO; - } - return 0; - -err2: - kfree(uvt->data); -err1: - return -ENOMEM; -} - -static int ib_ucm_event_handler(struct ib_cm_id *cm_id, - const struct ib_cm_event *event) -{ - struct ib_ucm_event *uevent; - struct ib_ucm_context *ctx; - int result = 0; - - ctx = cm_id->context; - - uevent = kzalloc(sizeof *uevent, GFP_KERNEL); - if (!uevent) - goto err1; - - uevent->ctx = ctx; - uevent->cm_id = cm_id; - uevent->resp.uid = ctx->uid; - uevent->resp.id = ctx->id; - uevent->resp.event = event->event; - - result = ib_ucm_event_process(event, uevent); - if (result) - goto err2; - - mutex_lock(&ctx->file->file_mutex); - list_add_tail(&uevent->file_list, &ctx->file->events); - list_add_tail(&uevent->ctx_list, &ctx->events); - wake_up_interruptible(&ctx->file->poll_wait); - mutex_unlock(&ctx->file->file_mutex); - return 0; - -err2: - kfree(uevent); -err1: - /* Destroy new cm_id's */ - return ib_ucm_new_cm_id(event->event); -} - -static ssize_t ib_ucm_event(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_context *ctx; - struct ib_ucm_event_get cmd; - struct ib_ucm_event *uevent; - int result = 0; - - if (out_len < sizeof(struct ib_ucm_event_resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&file->file_mutex); - while (list_empty(&file->events)) { - mutex_unlock(&file->file_mutex); - - if (file->filp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->events))) - return -ERESTARTSYS; - - mutex_lock(&file->file_mutex); - } - - uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); - - if (ib_ucm_new_cm_id(uevent->resp.event)) { - ctx = ib_ucm_ctx_alloc(file); - if (!ctx) { - result = -ENOMEM; - goto done; - } - - ctx->cm_id = uevent->cm_id; - ctx->cm_id->context = ctx; - uevent->resp.id = ctx->id; - } - - if (copy_to_user(u64_to_user_ptr(cmd.response), - &uevent->resp, sizeof(uevent->resp))) { - result = -EFAULT; - goto done; - } - - if (uevent->data) { - if (cmd.data_len < uevent->data_len) { - result = -ENOMEM; - goto done; - } - if (copy_to_user(u64_to_user_ptr(cmd.data), - uevent->data, uevent->data_len)) { - result = -EFAULT; - goto done; - } - } - - if (uevent->info) { - if (cmd.info_len < uevent->info_len) { - result = -ENOMEM; - goto done; - } - if (copy_to_user(u64_to_user_ptr(cmd.info), - uevent->info, uevent->info_len)) { - result = -EFAULT; - goto done; - } - } - - list_del(&uevent->file_list); - list_del(&uevent->ctx_list); - uevent->ctx->events_reported++; - - kfree(uevent->data); - kfree(uevent->info); - kfree(uevent); -done: - mutex_unlock(&file->file_mutex); - return result; -} - -static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_create_id cmd; - struct ib_ucm_create_id_resp resp; - struct ib_ucm_context *ctx; - int result; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&file->file_mutex); - ctx = ib_ucm_ctx_alloc(file); - mutex_unlock(&file->file_mutex); - if (!ctx) - return -ENOMEM; - - ctx->uid = cmd.uid; - ctx->cm_id = ib_create_cm_id(file->device->ib_dev, - ib_ucm_event_handler, ctx); - if (IS_ERR(ctx->cm_id)) { - result = PTR_ERR(ctx->cm_id); - goto err1; - } - - resp.id = ctx->id; - if (copy_to_user(u64_to_user_ptr(cmd.response), - &resp, sizeof(resp))) { - result = -EFAULT; - goto err2; - } - return 0; - -err2: - ib_destroy_cm_id(ctx->cm_id); -err1: - xa_erase(&ctx_id_table, ctx->id); - kfree(ctx); - return result; -} - -static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_destroy_id cmd; - struct ib_ucm_destroy_id_resp resp; - struct ib_ucm_context *ctx; - int result = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - xa_lock(&ctx_id_table); - ctx = xa_load(&ctx_id_table, cmd.id); - if (!ctx) - ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file) - ctx = ERR_PTR(-EINVAL); - else - __xa_erase(&ctx_id_table, ctx->id); - xa_unlock(&ctx_id_table); - - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ib_ucm_ctx_put(ctx); - wait_for_completion(&ctx->comp); - - /* No new events will be generated after destroying the cm_id. */ - ib_destroy_cm_id(ctx->cm_id); - /* Cleanup events not yet reported to the user. */ - ib_ucm_cleanup_events(ctx); - - resp.events_reported = ctx->events_reported; - if (copy_to_user(u64_to_user_ptr(cmd.response), - &resp, sizeof(resp))) - result = -EFAULT; - - kfree(ctx); - return result; -} - -static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_attr_id_resp resp; - struct ib_ucm_attr_id cmd; - struct ib_ucm_context *ctx; - int result = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - resp.service_id = ctx->cm_id->service_id; - resp.service_mask = ctx->cm_id->service_mask; - resp.local_id = ctx->cm_id->local_id; - resp.remote_id = ctx->cm_id->remote_id; - - if (copy_to_user(u64_to_user_ptr(cmd.response), - &resp, sizeof(resp))) - result = -EFAULT; - - ib_ucm_ctx_put(ctx); - return result; -} - -static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_uverbs_qp_attr resp; - struct ib_ucm_init_qp_attr cmd; - struct ib_ucm_context *ctx; - struct ib_qp_attr qp_attr; - int result = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - resp.qp_attr_mask = 0; - memset(&qp_attr, 0, sizeof qp_attr); - qp_attr.qp_state = cmd.qp_state; - result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); - if (result) - goto out; - - ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); - - if (copy_to_user(u64_to_user_ptr(cmd.response), - &resp, sizeof(resp))) - result = -EFAULT; - -out: - ib_ucm_ctx_put(ctx); - return result; -} - -static int ucm_validate_listen(__be64 service_id, __be64 service_mask) -{ - service_id &= service_mask; - - if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || - ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) - return -EINVAL; - - return 0; -} - -static ssize_t ib_ucm_listen(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_listen cmd; - struct ib_ucm_context *ctx; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - result = ucm_validate_listen(cmd.service_id, cmd.service_mask); - if (result) - goto out; - - result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); -out: - ib_ucm_ctx_put(ctx); - return result; -} - -static ssize_t ib_ucm_notify(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_notify cmd; - struct ib_ucm_context *ctx; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event); - ib_ucm_ctx_put(ctx); - return result; -} - -static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) -{ - void *data; - - *dest = NULL; - - if (!len) - return 0; - - data = memdup_user(u64_to_user_ptr(src), len); - if (IS_ERR(data)) - return PTR_ERR(data); - - *dest = data; - return 0; -} - -static int ib_ucm_path_get(struct sa_path_rec **path, u64 src) -{ - struct ib_user_path_rec upath; - struct sa_path_rec *sa_path; - - *path = NULL; - - if (!src) - return 0; - - sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); - if (!sa_path) - return -ENOMEM; - - if (copy_from_user(&upath, u64_to_user_ptr(src), - sizeof(upath))) { - - kfree(sa_path); - return -EFAULT; - } - - ib_copy_path_rec_from_user(sa_path, &upath); - *path = sa_path; - return 0; -} - -static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_req_param param; - struct ib_ucm_context *ctx; - struct ib_ucm_req cmd; - int result; - - param.private_data = NULL; - param.primary_path = NULL; - param.alternate_path = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); - if (result) - goto done; - - result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); - if (result) - goto done; - - result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); - if (result) - goto done; - - param.private_data_len = cmd.len; - param.service_id = cmd.sid; - param.qp_num = cmd.qpn; - param.qp_type = cmd.qp_type; - param.starting_psn = cmd.psn; - param.peer_to_peer = cmd.peer_to_peer; - param.responder_resources = cmd.responder_resources; - param.initiator_depth = cmd.initiator_depth; - param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; - param.flow_control = cmd.flow_control; - param.local_cm_response_timeout = cmd.local_cm_response_timeout; - param.retry_count = cmd.retry_count; - param.rnr_retry_count = cmd.rnr_retry_count; - param.max_cm_retries = cmd.max_cm_retries; - param.srq = cmd.srq; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_req(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(param.private_data); - kfree(param.primary_path); - kfree(param.alternate_path); - return result; -} - -static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_rep_param param; - struct ib_ucm_context *ctx; - struct ib_ucm_rep cmd; - int result; - - param.private_data = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); - if (result) - return result; - - param.qp_num = cmd.qpn; - param.starting_psn = cmd.psn; - param.private_data_len = cmd.len; - param.responder_resources = cmd.responder_resources; - param.initiator_depth = cmd.initiator_depth; - param.failover_accepted = cmd.failover_accepted; - param.flow_control = cmd.flow_control; - param.rnr_retry_count = cmd.rnr_retry_count; - param.srq = cmd.srq; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - ctx->uid = cmd.uid; - result = ib_send_cm_rep(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - - kfree(param.private_data); - return result; -} - -static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, - const char __user *inbuf, int in_len, - int (*func)(struct ib_cm_id *cm_id, - const void *private_data, - u8 private_data_len)) -{ - struct ib_ucm_private_data cmd; - struct ib_ucm_context *ctx; - const void *private_data = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); - if (result) - return result; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = func(ctx->cm_id, private_data, cmd.len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - - kfree(private_data); - return result; -} - -static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); -} - -static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); -} - -static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); -} - -static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, - const char __user *inbuf, int in_len, - int (*func)(struct ib_cm_id *cm_id, - int status, - const void *info, - u8 info_len, - const void *data, - u8 data_len)) -{ - struct ib_ucm_context *ctx; - struct ib_ucm_info cmd; - const void *data = NULL; - const void *info = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); - if (result) - goto done; - - result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); - if (result) - goto done; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = func(ctx->cm_id, cmd.status, info, cmd.info_len, - data, cmd.data_len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(data); - kfree(info); - return result; -} - -static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); -} - -static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); -} - -static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_context *ctx; - struct ib_ucm_mra cmd; - const void *data = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); - if (result) - return result; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - - kfree(data); - return result; -} - -static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_context *ctx; - struct sa_path_rec *path = NULL; - struct ib_ucm_lap cmd; - const void *data = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); - if (result) - goto done; - - result = ib_ucm_path_get(&path, cmd.path); - if (result) - goto done; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(data); - kfree(path); - return result; -} - -static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_sidr_req_param param = {}; - struct ib_ucm_context *ctx; - struct ib_ucm_sidr_req cmd; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); - if (result) - goto done; - - result = ib_ucm_path_get(¶m.path, cmd.path); - if (result) - goto done; - - param.private_data_len = cmd.len; - param.service_id = cmd.sid; - param.timeout_ms = cmd.timeout; - param.max_cm_retries = cmd.max_cm_retries; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(param.private_data); - kfree(param.path); - return result; -} - -static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_sidr_rep_param param; - struct ib_ucm_sidr_rep cmd; - struct ib_ucm_context *ctx; - int result; - - param.info = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, - cmd.data, cmd.data_len); - if (result) - goto done; - - result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); - if (result) - goto done; - - param.qp_num = cmd.qpn; - param.qkey = cmd.qkey; - param.status = cmd.status; - param.info_length = cmd.info_len; - param.private_data_len = cmd.data_len; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(param.private_data); - kfree(param.info); - return result; -} - -static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) = { - [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, - [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, - [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, - [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, - [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify, - [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, - [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, - [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, - [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, - [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, - [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, - [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, - [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, - [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, - [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, - [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, - [IB_USER_CM_CMD_EVENT] = ib_ucm_event, - [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr, -}; - -static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, - size_t len, loff_t *pos) -{ - struct ib_ucm_file *file = filp->private_data; - struct ib_ucm_cmd_hdr hdr; - ssize_t result; - - if (!ib_safe_file_access(filp)) { - pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", - task_tgid_vnr(current), current->comm); - return -EACCES; - } - - if (len < sizeof(hdr)) - return -EINVAL; - - if (copy_from_user(&hdr, buf, sizeof(hdr))) - return -EFAULT; - - if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) - return -EINVAL; - hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table)); - - if (hdr.in + sizeof(hdr) > len) - return -EINVAL; - - result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), - hdr.in, hdr.out); - if (!result) - result = len; - - return result; -} - -static __poll_t ib_ucm_poll(struct file *filp, - struct poll_table_struct *wait) -{ - struct ib_ucm_file *file = filp->private_data; - __poll_t mask = 0; - - poll_wait(filp, &file->poll_wait, wait); - - if (!list_empty(&file->events)) - mask = EPOLLIN | EPOLLRDNORM; - - return mask; -} - -/* - * ib_ucm_open() does not need the BKL: - * - * - no global state is referred to; - * - there is no ioctl method to race against; - * - no further module initialization is required for open to work - * after the device is registered. - */ -static int ib_ucm_open(struct inode *inode, struct file *filp) -{ - struct ib_ucm_file *file; - - file = kmalloc(sizeof(*file), GFP_KERNEL); - if (!file) - return -ENOMEM; - - INIT_LIST_HEAD(&file->events); - INIT_LIST_HEAD(&file->ctxs); - init_waitqueue_head(&file->poll_wait); - - mutex_init(&file->file_mutex); - - filp->private_data = file; - file->filp = filp; - file->device = container_of(inode->i_cdev, struct ib_ucm_device, cdev); - - return stream_open(inode, filp); -} - -static int ib_ucm_close(struct inode *inode, struct file *filp) -{ - struct ib_ucm_file *file = filp->private_data; - struct ib_ucm_context *ctx; - - mutex_lock(&file->file_mutex); - while (!list_empty(&file->ctxs)) { - ctx = list_entry(file->ctxs.next, - struct ib_ucm_context, file_list); - mutex_unlock(&file->file_mutex); - - xa_erase(&ctx_id_table, ctx->id); - ib_destroy_cm_id(ctx->cm_id); - ib_ucm_cleanup_events(ctx); - kfree(ctx); - - mutex_lock(&file->file_mutex); - } - mutex_unlock(&file->file_mutex); - kfree(file); - return 0; -} - -static void ib_ucm_release_dev(struct device *dev) -{ - struct ib_ucm_device *ucm_dev; - - ucm_dev = container_of(dev, struct ib_ucm_device, dev); - kfree(ucm_dev); -} - -static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev) -{ - clear_bit(ucm_dev->devnum, dev_map); -} - -static const struct file_operations ucm_fops = { - .owner = THIS_MODULE, - .open = ib_ucm_open, - .release = ib_ucm_close, - .write = ib_ucm_write, - .poll = ib_ucm_poll, - .llseek = no_llseek, -}; - -static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct ib_ucm_device *ucm_dev; - - ucm_dev = container_of(dev, struct ib_ucm_device, dev); - return sprintf(buf, "%s\n", ucm_dev->ib_dev->name); -} -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); - -static void ib_ucm_add_one(struct ib_device *device) -{ - int devnum; - dev_t base; - struct ib_ucm_device *ucm_dev; - - if (!device->ops.alloc_ucontext || !rdma_cap_ib_cm(device, 1)) - return; - - ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); - if (!ucm_dev) - return; - - device_initialize(&ucm_dev->dev); - ucm_dev->ib_dev = device; - ucm_dev->dev.release = ib_ucm_release_dev; - - devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); - if (devnum >= IB_UCM_MAX_DEVICES) - goto err; - ucm_dev->devnum = devnum; - set_bit(devnum, dev_map); - if (devnum >= IB_UCM_NUM_FIXED_MINOR) - base = dynamic_ucm_dev + devnum - IB_UCM_NUM_FIXED_MINOR; - else - base = IB_UCM_BASE_DEV + devnum; - - cdev_init(&ucm_dev->cdev, &ucm_fops); - ucm_dev->cdev.owner = THIS_MODULE; - kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); - - ucm_dev->dev.class = &cm_class; - ucm_dev->dev.parent = device->dev.parent; - ucm_dev->dev.devt = base; - - dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); - if (cdev_device_add(&ucm_dev->cdev, &ucm_dev->dev)) - goto err_devnum; - - if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev)) - goto err_dev; - - ib_set_client_data(device, &ucm_client, ucm_dev); - return; - -err_dev: - cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev); -err_devnum: - ib_ucm_free_dev(ucm_dev); -err: - put_device(&ucm_dev->dev); - return; -} - -static void ib_ucm_remove_one(struct ib_device *device, void *client_data) -{ - struct ib_ucm_device *ucm_dev = client_data; - - if (!ucm_dev) - return; - - cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev); - ib_ucm_free_dev(ucm_dev); - put_device(&ucm_dev->dev); -} - -static CLASS_ATTR_STRING(abi_version, S_IRUGO, - __stringify(IB_USER_CM_ABI_VERSION)); - -static int __init ib_ucm_init(void) -{ - int ret; - - ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR, - "infiniband_cm"); - if (ret) { - pr_err("ucm: couldn't register device number\n"); - goto error1; - } - - ret = alloc_chrdev_region(&dynamic_ucm_dev, 0, IB_UCM_NUM_DYNAMIC_MINOR, - "infiniband_cm"); - if (ret) { - pr_err("ucm: couldn't register dynamic device number\n"); - goto err_alloc; - } - - ret = class_create_file(&cm_class, &class_attr_abi_version.attr); - if (ret) { - pr_err("ucm: couldn't create abi_version attribute\n"); - goto error2; - } - - ret = ib_register_client(&ucm_client); - if (ret) { - pr_err("ucm: couldn't register client\n"); - goto error3; - } - return 0; - -error3: - class_remove_file(&cm_class, &class_attr_abi_version.attr); -error2: - unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR); -err_alloc: - unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR); -error1: - return ret; -} - -static void __exit ib_ucm_cleanup(void) -{ - ib_unregister_client(&ucm_client); - class_remove_file(&cm_class, &class_attr_abi_version.attr); - unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR); - unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR); - WARN_ON(!xa_empty(&ctx_id_table)); -} - -module_init(ib_ucm_init); -module_exit(ib_ucm_cleanup); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 140a338a135f..0274e9b704be 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -52,6 +52,8 @@ #include <rdma/rdma_cm_ib.h> #include <rdma/ib_addr.h> #include <rdma/ib.h> +#include <rdma/rdma_netlink.h> +#include "core_priv.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); @@ -81,7 +83,7 @@ struct ucma_file { }; struct ucma_context { - int id; + u32 id; struct completion comp; atomic_t ref; int events_reported; @@ -94,7 +96,7 @@ struct ucma_context { struct list_head list; struct list_head mc_list; /* mark that device is in process of destroying the internal HW - * resources, protected by the global mut + * resources, protected by the ctx_table lock */ int closing; /* sync between removal event and id destroy, protected by file mut */ @@ -104,7 +106,7 @@ struct ucma_context { struct ucma_multicast { struct ucma_context *ctx; - int id; + u32 id; int events_reported; u64 uid; @@ -122,9 +124,8 @@ struct ucma_event { struct work_struct close_work; }; -static DEFINE_MUTEX(mut); -static DEFINE_IDR(ctx_idr); -static DEFINE_IDR(multicast_idr); +static DEFINE_XARRAY_ALLOC(ctx_table); +static DEFINE_XARRAY_ALLOC(multicast_table); static const struct file_operations ucma_fops; @@ -133,7 +134,7 @@ static inline struct ucma_context *_ucma_find_context(int id, { struct ucma_context *ctx; - ctx = idr_find(&ctx_idr, id); + ctx = xa_load(&ctx_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file || !ctx->cm_id) @@ -145,7 +146,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) { struct ucma_context *ctx; - mutex_lock(&mut); + xa_lock(&ctx_table); ctx = _ucma_find_context(id, file); if (!IS_ERR(ctx)) { if (ctx->closing) @@ -153,7 +154,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) else atomic_inc(&ctx->ref); } - mutex_unlock(&mut); + xa_unlock(&ctx_table); return ctx; } @@ -216,10 +217,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; - mutex_lock(&mut); - ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&mut); - if (ctx->id < 0) + if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) goto error; list_add_tail(&ctx->list, &file->ctx_list); @@ -238,13 +236,10 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) if (!mc) return NULL; - mutex_lock(&mut); - mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL); - mutex_unlock(&mut); - if (mc->id < 0) + mc->ctx = ctx; + if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL)) goto error; - mc->ctx = ctx; list_add_tail(&mc->list, &ctx->mc_list); return mc; @@ -319,9 +314,9 @@ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) * handled separately below. */ if (ctx->cm_id == cm_id) { - mutex_lock(&mut); + xa_lock(&ctx_table); ctx->closing = 1; - mutex_unlock(&mut); + xa_unlock(&ctx_table); queue_work(ctx->file->close_wq, &ctx->close_work); return; } @@ -523,9 +518,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, err2: rdma_destroy_id(cm_id); err1: - mutex_lock(&mut); - idr_remove(&ctx_idr, ctx->id); - mutex_unlock(&mut); + xa_erase(&ctx_table, ctx->id); mutex_lock(&file->mut); list_del(&ctx->list); mutex_unlock(&file->mut); @@ -537,13 +530,13 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc, *tmp; - mutex_lock(&mut); + mutex_lock(&ctx->file->mut); list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { list_del(&mc->list); - idr_remove(&multicast_idr, mc->id); + xa_erase(&multicast_table, mc->id); kfree(mc); } - mutex_unlock(&mut); + mutex_unlock(&ctx->file->mut); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) @@ -614,11 +607,11 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - mutex_lock(&mut); + xa_lock(&ctx_table); ctx = _ucma_find_context(cmd.id, file); if (!IS_ERR(ctx)) - idr_remove(&ctx_idr, ctx->id); - mutex_unlock(&mut); + __xa_erase(&ctx_table, ctx->id); + xa_unlock(&ctx_table); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -630,14 +623,14 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, flush_workqueue(ctx->file->close_wq); /* At this point it's guaranteed that there is no inflight * closing task */ - mutex_lock(&mut); + xa_lock(&ctx_table); if (!ctx->closing) { - mutex_unlock(&mut); + xa_unlock(&ctx_table); ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); rdma_destroy_id(ctx->cm_id); } else { - mutex_unlock(&mut); + xa_unlock(&ctx_table); } resp.events_reported = ucma_free_ctx(ctx); @@ -951,8 +944,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx, } } - if (copy_to_user(response, resp, - sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) + if (copy_to_user(response, resp, struct_size(resp, path_data, i))) ret = -EFAULT; kfree(resp); @@ -1432,9 +1424,7 @@ static ssize_t ucma_process_join(struct ucma_file *file, goto err3; } - mutex_lock(&mut); - idr_replace(&multicast_idr, mc, mc->id); - mutex_unlock(&mut); + xa_store(&multicast_table, mc->id, mc, 0); mutex_unlock(&file->mut); ucma_put_ctx(ctx); @@ -1444,9 +1434,7 @@ err3: rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); ucma_cleanup_mc_events(mc); err2: - mutex_lock(&mut); - idr_remove(&multicast_idr, mc->id); - mutex_unlock(&mut); + xa_erase(&multicast_table, mc->id); list_del(&mc->list); kfree(mc); err1: @@ -1508,8 +1496,8 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - mutex_lock(&mut); - mc = idr_find(&multicast_idr, cmd.id); + xa_lock(&multicast_table); + mc = xa_load(&multicast_table, cmd.id); if (!mc) mc = ERR_PTR(-ENOENT); else if (mc->ctx->file != file) @@ -1517,8 +1505,8 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, else if (!atomic_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); else - idr_remove(&multicast_idr, mc->id); - mutex_unlock(&mut); + __xa_erase(&multicast_table, mc->id); + xa_unlock(&multicast_table); if (IS_ERR(mc)) { ret = PTR_ERR(mc); @@ -1615,14 +1603,14 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, * events being added before existing events. */ ucma_lock_files(cur_file, new_file); - mutex_lock(&mut); + xa_lock(&ctx_table); list_move_tail(&ctx->list, &new_file->ctx_list); ucma_move_events(ctx, new_file); ctx->file = new_file; resp.events_reported = ctx->events_reported; - mutex_unlock(&mut); + xa_unlock(&ctx_table); ucma_unlock_files(cur_file, new_file); response: @@ -1757,18 +1745,15 @@ static int ucma_close(struct inode *inode, struct file *filp) ctx->destroying = 1; mutex_unlock(&file->mut); - mutex_lock(&mut); - idr_remove(&ctx_idr, ctx->id); - mutex_unlock(&mut); - + xa_erase(&ctx_table, ctx->id); flush_workqueue(file->close_wq); /* At that step once ctx was marked as destroying and workqueue * was flushed we are safe from any inflights handlers that * might put other closing task. */ - mutex_lock(&mut); + xa_lock(&ctx_table); if (!ctx->closing) { - mutex_unlock(&mut); + xa_unlock(&ctx_table); ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); /* rdma_destroy_id ensures that no event handlers are @@ -1776,7 +1761,7 @@ static int ucma_close(struct inode *inode, struct file *filp) */ rdma_destroy_id(ctx->cm_id); } else { - mutex_unlock(&mut); + xa_unlock(&ctx_table); } ucma_free_ctx(ctx); @@ -1805,6 +1790,19 @@ static struct miscdevice ucma_misc = { .fops = &ucma_fops, }; +static int ucma_get_global_nl_info(struct ib_client_nl_info *res) +{ + res->abi = RDMA_USER_CM_ABI_VERSION; + res->cdev = ucma_misc.this_device; + return 0; +} + +static struct ib_client rdma_cma_client = { + .name = "rdma_cm", + .get_global_nl_info = ucma_get_global_nl_info, +}; +MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); + static ssize_t show_abi_version(struct device *dev, struct device_attribute *attr, char *buf) @@ -1833,7 +1831,14 @@ static int __init ucma_init(void) ret = -ENOMEM; goto err2; } + + ret = ib_register_client(&rdma_cma_client); + if (ret) + goto err3; + return 0; +err3: + unregister_net_sysctl_table(ucma_ctl_table_hdr); err2: device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); err1: @@ -1843,11 +1848,10 @@ err1: static void __exit ucma_cleanup(void) { + ib_unregister_client(&rdma_cma_client); unregister_net_sysctl_table(ucma_ctl_table_hdr); device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); - idr_destroy(&ctx_idr); - idr_destroy(&multicast_idr); } module_init(ucma_init); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index e7ea819fcb11..08da840ed7ee 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -54,9 +54,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { page = sg_page_iter_page(&sg_iter); - if (!PageDirty(page) && umem->writable && dirty) - set_page_dirty_lock(page); - put_page(page); + if (umem->writable && dirty) + put_user_pages_dirty_lock(&page, 1); + else + put_user_page(page); } sg_free_table(&umem->sg_head); @@ -244,7 +245,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, umem->context = context; umem->length = size; umem->address = addr; - umem->page_shift = PAGE_SHIFT; umem->writable = ib_access_writable(access); umem->owning_mm = mm = current->mm; mmgrab(mm); @@ -361,6 +361,9 @@ static void __ib_umem_release_tail(struct ib_umem *umem) */ void ib_umem_release(struct ib_umem *umem) { + if (!umem) + return; + if (umem->is_odp) { ib_umem_odp_release(to_ib_umem_odp(umem)); __ib_umem_release_tail(umem); @@ -385,7 +388,7 @@ int ib_umem_page_count(struct ib_umem *umem) n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) - n += sg_dma_len(sg) >> umem->page_shift; + n += sg_dma_len(sg) >> PAGE_SHIFT; return n; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index f962b5bbfa40..2a75c6f8d827 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -59,7 +59,7 @@ static u64 node_start(struct umem_odp_node *n) struct ib_umem_odp *umem_odp = container_of(n, struct ib_umem_odp, interval_tree); - return ib_umem_start(&umem_odp->umem); + return ib_umem_start(umem_odp); } /* Note that the representation of the intervals in the interval tree @@ -72,7 +72,7 @@ static u64 node_last(struct umem_odp_node *n) struct ib_umem_odp *umem_odp = container_of(n, struct ib_umem_odp, interval_tree); - return ib_umem_end(&umem_odp->umem) - 1; + return ib_umem_end(umem_odp) - 1; } INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, @@ -107,8 +107,6 @@ static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp) static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, u64 start, u64 end, void *cookie) { - struct ib_umem *umem = &umem_odp->umem; - /* * Increase the number of notifiers running, to * prevent any further fault handling on this MR. @@ -119,8 +117,8 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, * all pending page faults. */ smp_wmb(); complete_all(&umem_odp->notifier_completion); - umem->context->invalidate_range(umem_odp, ib_umem_start(umem), - ib_umem_end(umem)); + umem_odp->umem.context->invalidate_range( + umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); return 0; } @@ -151,6 +149,7 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, { struct ib_ucontext_per_mm *per_mm = container_of(mn, struct ib_ucontext_per_mm, mn); + int rc; if (mmu_notifier_range_blockable(range)) down_read(&per_mm->umem_rwsem); @@ -167,11 +166,14 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, return 0; } - return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, - range->end, - invalidate_range_start_trampoline, - mmu_notifier_range_blockable(range), - NULL); + rc = rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, + range->end, + invalidate_range_start_trampoline, + mmu_notifier_range_blockable(range), + NULL); + if (rc) + up_read(&per_mm->umem_rwsem); + return rc; } static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start, @@ -205,10 +207,9 @@ static const struct mmu_notifier_ops ib_umem_notifiers = { static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) { struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - struct ib_umem *umem = &umem_odp->umem; down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem) != ib_umem_end(umem))) + if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) rbt_ib_umem_insert(&umem_odp->interval_tree, &per_mm->umem_tree); up_write(&per_mm->umem_rwsem); @@ -217,10 +218,9 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp) { struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - struct ib_umem *umem = &umem_odp->umem; down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem) != ib_umem_end(umem))) + if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) rbt_ib_umem_remove(&umem_odp->interval_tree, &per_mm->umem_tree); complete_all(&umem_odp->notifier_completion); @@ -351,7 +351,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, umem->context = ctx; umem->length = size; umem->address = addr; - umem->page_shift = PAGE_SHIFT; + odp_data->page_shift = PAGE_SHIFT; umem->writable = root->umem.writable; umem->is_odp = 1; odp_data->per_mm = per_mm; @@ -405,18 +405,19 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) struct mm_struct *mm = umem->owning_mm; int ret_val; + umem_odp->page_shift = PAGE_SHIFT; if (access & IB_ACCESS_HUGETLB) { struct vm_area_struct *vma; struct hstate *h; down_read(&mm->mmap_sem); - vma = find_vma(mm, ib_umem_start(umem)); + vma = find_vma(mm, ib_umem_start(umem_odp)); if (!vma || !is_vm_hugetlb_page(vma)) { up_read(&mm->mmap_sem); return -EINVAL; } h = hstate_vma(vma); - umem->page_shift = huge_page_shift(h); + umem_odp->page_shift = huge_page_shift(h); up_read(&mm->mmap_sem); } @@ -424,16 +425,16 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) init_completion(&umem_odp->notifier_completion); - if (ib_umem_num_pages(umem)) { + if (ib_umem_odp_num_pages(umem_odp)) { umem_odp->page_list = vzalloc(array_size(sizeof(*umem_odp->page_list), - ib_umem_num_pages(umem))); + ib_umem_odp_num_pages(umem_odp))); if (!umem_odp->page_list) return -ENOMEM; umem_odp->dma_list = vzalloc(array_size(sizeof(*umem_odp->dma_list), - ib_umem_num_pages(umem))); + ib_umem_odp_num_pages(umem_odp))); if (!umem_odp->dma_list) { ret_val = -ENOMEM; goto out_page_list; @@ -456,16 +457,14 @@ out_page_list: void ib_umem_odp_release(struct ib_umem_odp *umem_odp) { - struct ib_umem *umem = &umem_odp->umem; - /* * Ensure that no more pages are mapped in the umem. * * It is the driver's responsibility to ensure, before calling us, * that the hardware will not attempt to access the MR any more. */ - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), - ib_umem_end(umem)); + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); remove_umem_from_per_mm(umem_odp); put_per_mm(umem_odp); @@ -487,7 +486,7 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * The function returns -EFAULT if the DMA mapping operation fails. It returns * -EAGAIN if a concurrent invalidation prevents us from updating the page. * - * The page is released via put_page even if the operation failed. For + * The page is released via put_user_page even if the operation failed. For * on-demand pinning, the page is released whenever it isn't stored in the * umem. */ @@ -498,8 +497,8 @@ static int ib_umem_odp_map_dma_single_page( u64 access_mask, unsigned long current_seq) { - struct ib_umem *umem = &umem_odp->umem; - struct ib_device *dev = umem->context->device; + struct ib_ucontext *context = umem_odp->umem.context; + struct ib_device *dev = context->device; dma_addr_t dma_addr; int remove_existing_mapping = 0; int ret = 0; @@ -514,10 +513,9 @@ static int ib_umem_odp_map_dma_single_page( goto out; } if (!(umem_odp->dma_list[page_index])) { - dma_addr = ib_dma_map_page(dev, - page, - 0, BIT(umem->page_shift), - DMA_BIDIRECTIONAL); + dma_addr = + ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift), + DMA_BIDIRECTIONAL); if (ib_dma_mapping_error(dev, dma_addr)) { ret = -EFAULT; goto out; @@ -536,15 +534,16 @@ static int ib_umem_odp_map_dma_single_page( } out: - put_page(page); + put_user_page(page); if (remove_existing_mapping) { ib_umem_notifier_start_account(umem_odp); - umem->context->invalidate_range( + context->invalidate_range( umem_odp, - ib_umem_start(umem) + (page_index << umem->page_shift), - ib_umem_start(umem) + - ((page_index + 1) << umem->page_shift)); + ib_umem_start(umem_odp) + + (page_index << umem_odp->page_shift), + ib_umem_start(umem_odp) + + ((page_index + 1) << umem_odp->page_shift)); ib_umem_notifier_end_account(umem_odp); ret = -EAGAIN; } @@ -581,27 +580,26 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, u64 bcnt, u64 access_mask, unsigned long current_seq) { - struct ib_umem *umem = &umem_odp->umem; struct task_struct *owning_process = NULL; struct mm_struct *owning_mm = umem_odp->umem.owning_mm; struct page **local_page_list = NULL; u64 page_mask, off; - int j, k, ret = 0, start_idx, npages = 0, page_shift; - unsigned int flags = 0; + int j, k, ret = 0, start_idx, npages = 0; + unsigned int flags = 0, page_shift; phys_addr_t p = 0; if (access_mask == 0) return -EINVAL; - if (user_virt < ib_umem_start(umem) || - user_virt + bcnt > ib_umem_end(umem)) + if (user_virt < ib_umem_start(umem_odp) || + user_virt + bcnt > ib_umem_end(umem_odp)) return -EFAULT; local_page_list = (struct page **)__get_free_page(GFP_KERNEL); if (!local_page_list) return -ENOMEM; - page_shift = umem->page_shift; + page_shift = umem_odp->page_shift; page_mask = ~(BIT(page_shift) - 1); off = user_virt & (~page_mask); user_virt = user_virt & page_mask; @@ -621,7 +619,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, if (access_mask & ODP_WRITE_ALLOWED_BIT) flags |= FOLL_WRITE; - start_idx = (user_virt - ib_umem_start(umem)) >> page_shift; + start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift; k = start_idx; while (bcnt > 0) { @@ -659,7 +657,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = -EFAULT; break; } - put_page(local_page_list[j]); + put_user_page(local_page_list[j]); continue; } @@ -686,8 +684,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * ib_umem_odp_map_dma_single_page(). */ if (npages - (j + 1) > 0) - release_pages(&local_page_list[j+1], - npages - (j + 1)); + put_user_pages(&local_page_list[j+1], + npages - (j + 1)); break; } } @@ -711,21 +709,20 @@ EXPORT_SYMBOL(ib_umem_odp_map_dma_pages); void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, u64 bound) { - struct ib_umem *umem = &umem_odp->umem; int idx; u64 addr; - struct ib_device *dev = umem->context->device; + struct ib_device *dev = umem_odp->umem.context->device; - virt = max_t(u64, virt, ib_umem_start(umem)); - bound = min_t(u64, bound, ib_umem_end(umem)); + virt = max_t(u64, virt, ib_umem_start(umem_odp)); + bound = min_t(u64, bound, ib_umem_end(umem_odp)); /* Note that during the run of this function, the * notifiers_count of the MR is > 0, preventing any racing * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ mutex_lock(&umem_odp->umem_mutex); - for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) { - idx = (addr - ib_umem_start(umem)) >> umem->page_shift; + for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { + idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; if (umem_odp->page_list[idx]) { struct page *page = umem_odp->page_list[idx]; dma_addr_t dma = umem_odp->dma_list[idx]; @@ -733,7 +730,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, WARN_ON(!dma_addr); - ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE, + ib_dma_unmap_page(dev, dma_addr, + BIT(umem_odp->page_shift), DMA_BIDIRECTIONAL); if (dma & ODP_WRITE_ALLOWED_BIT) { struct page *head_page = compound_head(page); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 671f07ba1fad..9f8a48016b41 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -54,6 +54,7 @@ #include <rdma/ib_mad.h> #include <rdma/ib_user_mad.h> +#include <rdma/rdma_netlink.h> #include "core_priv.h" @@ -744,7 +745,7 @@ found: "process %s did not enable P_Key index support.\n", current->comm); dev_warn(&file->port->dev, - " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); + " Documentation/infiniband/user_mad.rst has info on the new ABI.\n"); } } @@ -1124,11 +1125,48 @@ static const struct file_operations umad_sm_fops = { .llseek = no_llseek, }; +static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data, + struct ib_client_nl_info *res) +{ + struct ib_umad_device *umad_dev = client_data; + + if (!rdma_is_port_valid(ibdev, res->port)) + return -EINVAL; + + res->abi = IB_USER_MAD_ABI_VERSION; + res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].dev; + + return 0; +} + static struct ib_client umad_client = { .name = "umad", .add = ib_umad_add_one, - .remove = ib_umad_remove_one + .remove = ib_umad_remove_one, + .get_nl_info = ib_umad_get_nl_info, }; +MODULE_ALIAS_RDMA_CLIENT("umad"); + +static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data, + struct ib_client_nl_info *res) +{ + struct ib_umad_device *umad_dev = + ib_get_client_data(ibdev, &umad_client); + + if (!rdma_is_port_valid(ibdev, res->port)) + return -EINVAL; + + res->abi = IB_USER_MAD_ABI_VERSION; + res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].sm_dev; + + return 0; +} + +static struct ib_client issm_client = { + .name = "issm", + .get_nl_info = ib_issm_get_nl_info, +}; +MODULE_ALIAS_RDMA_CLIENT("issm"); static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1387,13 +1425,17 @@ static int __init ib_umad_init(void) } ret = ib_register_client(&umad_client); - if (ret) { - pr_err("couldn't register ib_umad client\n"); + if (ret) goto out_class; - } + + ret = ib_register_client(&issm_client); + if (ret) + goto out_client; return 0; +out_client: + ib_unregister_client(&umad_client); out_class: class_unregister(&umad_class); @@ -1411,6 +1453,7 @@ out: static void __exit ib_umad_cleanup(void) { + ib_unregister_client(&issm_client); ib_unregister_client(&umad_client); class_unregister(&umad_class); unregister_chrdev_region(base_umad_dev, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 5a3a1780ceea..750c4d484329 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -174,6 +174,17 @@ static int uverbs_request_finish(struct uverbs_req_iter *iter) return 0; } +/* + * When calling a destroy function during an error unwind we need to pass in + * the udata that is sanitized of all user arguments. Ie from the driver + * perspective it looks like no udata was passed. + */ +struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs) +{ + attrs->driver_udata = (struct ib_udata){}; + return &attrs->driver_udata; +} + static struct ib_uverbs_completion_event_file * _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs) { @@ -441,7 +452,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(uobj, attrs); err_copy: - ib_dealloc_pd_user(pd, &attrs->driver_udata); + ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs)); pd = NULL; err_alloc: kfree(pd); @@ -644,7 +655,7 @@ err_copy: } err_dealloc_xrcd: - ib_dealloc_xrcd(xrcd, &attrs->driver_udata); + ib_dealloc_xrcd(xrcd, uverbs_get_cleared_udata(attrs)); err: uobj_alloc_abort(&obj->uobject, attrs); @@ -745,7 +756,9 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_USER; mr->dm = NULL; + mr->sig_attrs = NULL; mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; @@ -767,7 +780,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(uobj, attrs); err_copy: - ib_dereg_mr_user(mr, &attrs->driver_udata); + ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); err_put: uobj_put_obj_read(pd); @@ -1010,12 +1023,11 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, attr.comp_vector = cmd->comp_vector; attr.flags = cmd->flags; - cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata); - if (IS_ERR(cq)) { - ret = PTR_ERR(cq); + cq = rdma_zalloc_drv_obj(ib_dev, ib_cq); + if (!cq) { + ret = -ENOMEM; goto err_file; } - cq->device = ib_dev; cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; @@ -1023,6 +1035,10 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); + ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); + if (ret) + goto err_free; + obj->uobject.object = cq; memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uobject.id; @@ -1042,8 +1058,10 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, return obj; err_cb: - ib_destroy_cq(cq); - + ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); + cq = NULL; +err_free: + kfree(cq); err_file: if (ev_file) ib_uverbs_release_ucq(attrs->ufile, ev_file, obj); @@ -1478,7 +1496,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_cb: - ib_destroy_qp(qp); + ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); err_put: if (!IS_ERR(xrcd_uobj)) @@ -1611,7 +1629,7 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_destroy: - ib_destroy_qp(qp); + ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); err_xrcd: uobj_put_read(xrcd_uobj); err_put: @@ -2453,7 +2471,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(uobj, attrs); err_copy: - rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); + rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE, + uverbs_get_cleared_udata(attrs)); err_put: uobj_put_obj_read(pd); @@ -2964,7 +2983,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: - ib_destroy_wq(wq, &attrs->driver_udata); + ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); err_put_cq: uobj_put_obj_read(cq); err_put_pd: @@ -3464,7 +3483,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: - ib_destroy_srq_user(srq, &attrs->driver_udata); + ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); err_free: kfree(srq); @@ -3703,9 +3722,6 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) * trailing driver_data flex array. In this case the size of the base struct * cannot be changed. */ -#define offsetof_after(_struct, _member) \ - (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member)) - #define UAPI_DEF_WRITE_IO(req, resp) \ .write.has_resp = 1 + \ BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \ @@ -3736,11 +3752,11 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) */ #define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \ .write.has_resp = 1, \ - .write.req_size = offsetof_after(req, req_last_member), \ - .write.resp_size = offsetof_after(resp, resp_last_member) + .write.req_size = offsetofend(req, req_last_member), \ + .write.resp_size = offsetofend(resp, resp_last_member) #define UAPI_DEF_WRITE_I_EX(req, req_last_member) \ - .write.req_size = offsetof_after(req, req_last_member) + .write.req_size = offsetofend(req, req_last_member) const struct uapi_definition uverbs_def_write_intf[] = { DECLARE_UVERBS_OBJECT( diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 84a5e9a6d483..11c13c1381cf 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -51,6 +51,7 @@ #include <rdma/ib.h> #include <rdma/uverbs_std_types.h> +#include <rdma/rdma_netlink.h> #include "uverbs.h" #include "core_priv.h" @@ -198,7 +199,7 @@ void ib_uverbs_release_file(struct kref *ref) ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (ib_dev && !ib_dev->ops.disassociate_ucontext) - module_put(ib_dev->owner); + module_put(ib_dev->ops.owner); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); if (atomic_dec_and_test(&file->device->refcount)) @@ -1065,7 +1066,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) module_dependent = !(ib_dev->ops.disassociate_ucontext); if (module_dependent) { - if (!try_module_get(ib_dev->owner)) { + if (!try_module_get(ib_dev->ops.owner)) { ret = -ENODEV; goto err; } @@ -1100,7 +1101,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) return stream_open(inode, filp); err_module: - module_put(ib_dev->owner); + module_put(ib_dev->ops.owner); err: mutex_unlock(&dev->lists_mutex); @@ -1148,12 +1149,41 @@ static const struct file_operations uverbs_mmap_fops = { .compat_ioctl = ib_uverbs_ioctl, }; +static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data, + struct ib_client_nl_info *res) +{ + struct ib_uverbs_device *uverbs_dev = client_data; + int ret; + + if (res->port != -1) + return -EINVAL; + + res->abi = ibdev->ops.uverbs_abi_ver; + res->cdev = &uverbs_dev->dev; + + /* + * To support DRIVER_ID binding in userspace some of the driver need + * upgrading to expose their PCI dependent revision information + * through get_context instead of relying on modalias matching. When + * the drivers are fixed they can drop this flag. + */ + if (!ibdev->ops.uverbs_no_driver_id_binding) { + ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID, + ibdev->ops.driver_id); + if (ret) + return ret; + } + return 0; +} + static struct ib_client uverbs_client = { .name = "uverbs", .no_kverbs_req = true, .add = ib_uverbs_add_one, - .remove = ib_uverbs_remove_one + .remove = ib_uverbs_remove_one, + .get_nl_info = ib_uverbs_get_nl_info, }; +MODULE_ALIAS_RDMA_CLIENT("uverbs"); static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, char *buf) @@ -1186,7 +1216,7 @@ static ssize_t abi_version_show(struct device *device, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); + ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index db5c46a1bb2d..e39fe6a8aac4 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -111,9 +111,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata); - if (IS_ERR(cq)) { - ret = PTR_ERR(cq); + cq = rdma_zalloc_drv_obj(ib_dev, ib_cq); + if (!cq) { + ret = -ENOMEM; goto err_event_file; } @@ -122,10 +122,15 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; - obj->uobject.object = cq; - obj->uobject.user_handle = user_handle; atomic_set(&cq->usecnt, 0); cq->res.type = RDMA_RESTRACK_CQ; + + ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); + if (ret) + goto err_free; + + obj->uobject.object = cq; + obj->uobject.user_handle = user_handle; rdma_restrack_uadd(&cq->res); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, @@ -135,8 +140,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( return 0; err_cq: - ib_destroy_cq(cq); - + ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); + cq = NULL; +err_free: + kfree(cq); err_event_file: if (ev_file) uverbs_uobject_put(ev_file_uobj); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 610d3b9f7654..c1286a52dc84 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -128,6 +128,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_DM; mr->dm = dm; mr->uobject = uobj; atomic_inc(&pd->usecnt); @@ -148,7 +149,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( return 0; err_dereg: - ib_dereg_mr_user(mr, &attrs->driver_udata); + ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); return ret; } diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 7a987acf0c0b..00c547887132 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -22,6 +22,8 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) return ERR_PTR(-EOVERFLOW); elm = kzalloc(alloc_size, GFP_KERNEL); + if (!elm) + return ERR_PTR(-ENOMEM); rc = radix_tree_insert(&uapi->radix, key, elm); if (rc) { kfree(elm); @@ -645,7 +647,7 @@ struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev) return ERR_PTR(-ENOMEM); INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL); - uapi->driver_id = ibdev->driver_id; + uapi->driver_id = ibdev->ops.driver_id; rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false); if (rc) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e666a1f7608d..4a04e94a72db 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -209,7 +209,7 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) EXPORT_SYMBOL(ib_rate_to_mbps); __attribute_const__ enum rdma_transport_type -rdma_node_get_transport(enum rdma_node_type node_type) +rdma_node_get_transport(unsigned int node_type) { if (node_type == RDMA_NODE_USNIC) @@ -299,6 +299,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_DMA; mr->uobject = NULL; mr->need_inval = false; @@ -316,7 +317,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, EXPORT_SYMBOL(__ib_alloc_pd); /** - * ib_dealloc_pd - Deallocates a protection domain. + * ib_dealloc_pd_user - Deallocates a protection domain. * @pd: The protection domain to deallocate. * @udata: Valid user data or NULL for kernel object * @@ -1157,6 +1158,10 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, qp_init_attr->cap.max_recv_sge)) return ERR_PTR(-EINVAL); + if ((qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) && + !(device->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER)) + return ERR_PTR(-EINVAL); + /* * If the callers is using the RDMA API calculate the resources * needed for the RDMA READ/WRITE operations. @@ -1232,6 +1237,8 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, qp->max_write_sge = qp_init_attr->cap.max_send_sge; qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, device->attrs.max_sge_rd); + if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) + qp->integrity_en = true; return qp; @@ -1916,21 +1923,28 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, const char *caller) { struct ib_cq *cq; + int ret; - cq = device->ops.create_cq(device, cq_attr, NULL); - - if (!IS_ERR(cq)) { - cq->device = device; - cq->uobject = NULL; - cq->comp_handler = comp_handler; - cq->event_handler = event_handler; - cq->cq_context = cq_context; - atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_set_task(&cq->res, caller); - rdma_restrack_kadd(&cq->res); + cq = rdma_zalloc_drv_obj(device, ib_cq); + if (!cq) + return ERR_PTR(-ENOMEM); + + cq->device = device; + cq->uobject = NULL; + cq->comp_handler = comp_handler; + cq->event_handler = event_handler; + cq->cq_context = cq_context; + atomic_set(&cq->usecnt, 0); + cq->res.type = RDMA_RESTRACK_CQ; + rdma_restrack_set_task(&cq->res, caller); + + ret = device->ops.create_cq(cq, cq_attr, NULL); + if (ret) { + kfree(cq); + return ERR_PTR(ret); } + rdma_restrack_kadd(&cq->res); return cq; } EXPORT_SYMBOL(__ib_create_cq); @@ -1949,7 +1963,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) return -EBUSY; rdma_restrack_del(&cq->res); - return cq->device->ops.destroy_cq(cq, udata); + cq->device->ops.destroy_cq(cq, udata); + kfree(cq); + return 0; } EXPORT_SYMBOL(ib_destroy_cq_user); @@ -1966,6 +1982,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; struct ib_dm *dm = mr->dm; + struct ib_sig_attrs *sig_attrs = mr->sig_attrs; int ret; rdma_restrack_del(&mr->res); @@ -1974,6 +1991,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) atomic_dec(&pd->usecnt); if (dm) atomic_dec(&dm->usecnt); + kfree(sig_attrs); } return ret; @@ -1981,7 +1999,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) EXPORT_SYMBOL(ib_dereg_mr_user); /** - * ib_alloc_mr() - Allocates a memory region + * ib_alloc_mr_user() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. @@ -2001,6 +2019,9 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, if (!pd->device->ops.alloc_mr) return ERR_PTR(-EOPNOTSUPP); + if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY)) + return ERR_PTR(-EINVAL); + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); if (!IS_ERR(mr)) { mr->device = pd->device; @@ -2011,12 +2032,66 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, mr->need_inval = false; mr->res.type = RDMA_RESTRACK_MR; rdma_restrack_kadd(&mr->res); + mr->type = mr_type; + mr->sig_attrs = NULL; } return mr; } EXPORT_SYMBOL(ib_alloc_mr_user); +/** + * ib_alloc_mr_integrity() - Allocates an integrity memory region + * @pd: protection domain associated with the region + * @max_num_data_sg: maximum data sg entries available for registration + * @max_num_meta_sg: maximum metadata sg entries available for + * registration + * + * Notes: + * Memory registration page/sg lists must not exceed max_num_sg, + * also the integrity page/sg lists must not exceed max_num_meta_sg. + * + */ +struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg) +{ + struct ib_mr *mr; + struct ib_sig_attrs *sig_attrs; + + if (!pd->device->ops.alloc_mr_integrity || + !pd->device->ops.map_mr_sg_pi) + return ERR_PTR(-EOPNOTSUPP); + + if (!max_num_meta_sg) + return ERR_PTR(-EINVAL); + + sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL); + if (!sig_attrs) + return ERR_PTR(-ENOMEM); + + mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, + max_num_meta_sg); + if (IS_ERR(mr)) { + kfree(sig_attrs); + return mr; + } + + mr->device = pd->device; + mr->pd = pd; + mr->dm = NULL; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + mr->need_inval = false; + mr->res.type = RDMA_RESTRACK_MR; + rdma_restrack_kadd(&mr->res); + mr->type = IB_MR_TYPE_INTEGRITY; + mr->sig_attrs = sig_attrs; + + return mr; +} +EXPORT_SYMBOL(ib_alloc_mr_integrity); + /* "Fast" memory regions */ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, @@ -2226,19 +2301,17 @@ EXPORT_SYMBOL(ib_create_wq); */ int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { - int err; struct ib_cq *cq = wq->cq; struct ib_pd *pd = wq->pd; if (atomic_read(&wq->usecnt)) return -EBUSY; - err = wq->device->ops.destroy_wq(wq, udata); - if (!err) { - atomic_dec(&pd->usecnt); - atomic_dec(&cq->usecnt); - } - return err; + wq->device->ops.destroy_wq(wq, udata); + atomic_dec(&pd->usecnt); + atomic_dec(&cq->usecnt); + + return 0; } EXPORT_SYMBOL(ib_destroy_wq); @@ -2376,6 +2449,43 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, EXPORT_SYMBOL(ib_set_vf_guid); /** + * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection + * information) and set an appropriate memory region for registration. + * @mr: memory region + * @data_sg: dma mapped scatterlist for data + * @data_sg_nents: number of entries in data_sg + * @data_sg_offset: offset in bytes into data_sg + * @meta_sg: dma mapped scatterlist for metadata + * @meta_sg_nents: number of entries in meta_sg + * @meta_sg_offset: offset in bytes into meta_sg + * @page_size: page vector desired page size + * + * Constraints: + * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY. + * + * Return: 0 on success. + * + * After this completes successfully, the memory region + * is ready for registration. + */ +int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset, unsigned int page_size) +{ + if (unlikely(!mr->device->ops.map_mr_sg_pi || + WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY))) + return -EOPNOTSUPP; + + mr->page_size = page_size; + + return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, + meta_sg_nents, meta_sg_offset); +} +EXPORT_SYMBOL(ib_map_mr_sg_pi); + +/** * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list * and set it the memory region. * @mr: memory region |