aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/cma_configfs.c3
-rw-r--r--drivers/infiniband/core/cma_priv.h28
-rw-r--r--drivers/infiniband/core/core_priv.h30
-rw-r--r--drivers/infiniband/core/device.c29
-rw-r--r--drivers/infiniband/core/fmr_pool.c4
-rw-r--r--drivers/infiniband/core/nldev.c21
-rw-r--r--drivers/infiniband/core/rdma_core.c48
-rw-r--r--drivers/infiniband/core/rdma_core.h74
-rw-r--r--drivers/infiniband/core/restrack.c5
-rw-r--r--drivers/infiniband/core/umem_odp.c14
-rw-r--r--drivers/infiniband/core/uverbs.h62
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1989
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c10
-rw-r--r--drivers/infiniband/core/uverbs_main.c228
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c50
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c10
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c19
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c11
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c17
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c8
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c512
-rw-r--r--drivers/infiniband/core/verbs.c12
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c3
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile1
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c47
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h4
-rw-r--r--drivers/infiniband/hw/hfi1/common.h19
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c49
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c72
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h35
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c31
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h5
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c20
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c32
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c10
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h1
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c48
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h13
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c33
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c5
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c10
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h2
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c4
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c18
-rw-r--r--drivers/infiniband/hw/hns/Makefile2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h92
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c41
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c552
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h142
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c83
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c141
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c21
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c456
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c20
-rw-r--r--drivers/infiniband/hw/mlx4/main.c27
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c12
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile4
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c207
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c51
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c4
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c9
-rw-r--r--drivers/infiniband/hw/mlx5/main.c327
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h51
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c331
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c436
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c65
-rw-r--r--drivers/infiniband/hw/mlx5/srq.h73
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c (renamed from drivers/net/ethernet/mellanox/mlx5/core/srq.c)302
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c33
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h2
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c5
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c140
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c1257
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c298
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c343
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c109
-rw-r--r--include/linux/mlx5/cq.h2
-rw-r--r--include/linux/mlx5/device.h17
-rw-r--r--include/linux/mlx5/driver.h252
-rw-r--r--include/linux/mlx5/eq.h72
-rw-r--r--include/linux/mlx5/mlx5_ifc.h42
-rw-r--r--include/linux/mlx5/port.h3
-rw-r--r--include/linux/mlx5/qp.h5
-rw-r--r--include/linux/mlx5/srq.h72
-rw-r--r--include/linux/mlx5/transobj.h11
-rw-r--r--include/rdma/ib_fmr_pool.h2
-rw-r--r--include/rdma/ib_mad.h10
-rw-r--r--include/rdma/ib_verbs.h34
-rw-r--r--include/rdma/rdma_vt.h7
-rw-r--r--include/rdma/restrack.h4
-rw-r--r--include/rdma/uverbs_ioctl.h220
-rw-r--r--include/rdma/uverbs_named_ioctl.h13
-rw-r--r--include/rdma/uverbs_std_types.h73
-rw-r--r--include/uapi/rdma/hfi/hfi1_user.h6
-rw-r--r--include/uapi/rdma/hns-abi.h6
-rw-r--r--include/uapi/rdma/ib_user_verbs.h16
-rw-r--r--include/uapi/rdma/mlx5-abi.h2
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h1
153 files changed, 6825 insertions, 4142 deletions
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 8c2dfb3e294e..3ec2c415bb70 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -33,7 +33,10 @@
#include <linux/module.h>
#include <linux/configfs.h>
#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+
#include "core_priv.h"
+#include "cma_priv.h"
struct cma_device;
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 194cfe78c447..cf47c69436a7 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -94,4 +94,32 @@ struct rdma_id_private {
*/
struct rdma_restrack_entry res;
};
+
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+ return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+
+void cma_ref_dev(struct cma_device *dev);
+void cma_deref_dev(struct cma_device *dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie);
+int cma_get_default_gid_type(struct cma_device *dev, unsigned int port);
+int cma_set_default_gid_type(struct cma_device *dev, unsigned int port,
+ enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *dev, unsigned int port);
+int cma_set_default_roce_tos(struct cma_device *dev, unsigned int port,
+ u8 default_roce_tos);
+struct ib_device *cma_get_ib_dev(struct cma_device *dev);
+
#endif /* _CMA_PRIV_H */
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index bb9007a0cca7..cc7535c5e192 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -54,35 +54,6 @@ struct pkey_index_qp_list {
struct list_head qp_list;
};
-#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
-int cma_configfs_init(void);
-void cma_configfs_exit(void);
-#else
-static inline int cma_configfs_init(void)
-{
- return 0;
-}
-
-static inline void cma_configfs_exit(void)
-{
-}
-#endif
-struct cma_device;
-void cma_ref_dev(struct cma_device *cma_dev);
-void cma_deref_dev(struct cma_device *cma_dev);
-typedef bool (*cma_device_filter)(struct ib_device *, void *);
-struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
- void *cookie);
-int cma_get_default_gid_type(struct cma_device *cma_dev,
- unsigned int port);
-int cma_set_default_gid_type(struct cma_device *cma_dev,
- unsigned int port,
- enum ib_gid_type default_gid_type);
-int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
-int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
- u8 default_roce_tos);
-struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
-
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
@@ -296,6 +267,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
#endif
struct ib_device *ib_device_get_by_index(u32 ifindex);
+void ib_device_put(struct ib_device *device);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 87eb4f2cdd7d..348a7fb1f945 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -145,7 +145,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index)
}
/*
- * Caller is responsible to return refrerence count by calling put_device()
+ * Caller must perform ib_device_put() to return the device reference count
+ * when ib_device_get_by_index() returns valid device pointer.
*/
struct ib_device *ib_device_get_by_index(u32 index)
{
@@ -153,13 +154,21 @@ struct ib_device *ib_device_get_by_index(u32 index)
down_read(&lists_rwsem);
device = __ib_device_get_by_index(index);
- if (device)
- get_device(&device->dev);
-
+ if (device) {
+ /* Do not return a device if unregistration has started. */
+ if (!refcount_inc_not_zero(&device->refcount))
+ device = NULL;
+ }
up_read(&lists_rwsem);
return device;
}
+void ib_device_put(struct ib_device *device)
+{
+ if (refcount_dec_and_test(&device->refcount))
+ complete(&device->unreg_completion);
+}
+
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
@@ -293,6 +302,8 @@ struct ib_device *ib_alloc_device(size_t size)
rwlock_init(&device->client_data_lock);
INIT_LIST_HEAD(&device->client_data_list);
INIT_LIST_HEAD(&device->port_list);
+ refcount_set(&device->refcount, 1);
+ init_completion(&device->unreg_completion);
return device;
}
@@ -641,6 +652,13 @@ void ib_unregister_device(struct ib_device *device)
struct ib_client_data *context, *tmp;
unsigned long flags;
+ /*
+ * Wait for all netlink command callers to finish working on the
+ * device.
+ */
+ ib_device_put(device);
+ wait_for_completion(&device->unreg_completion);
+
mutex_lock(&device_mutex);
down_write(&lists_rwsem);
@@ -1024,6 +1042,9 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey)
{
+ if (!rdma_is_port_valid(device, port_num))
+ return -EINVAL;
+
return device->query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 83ba0068e8bb..b00dfd2ad31e 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -474,7 +474,7 @@ EXPORT_SYMBOL(ib_fmr_pool_map_phys);
* Unmap an FMR. The FMR mapping may remain valid until the FMR is
* reused (or until ib_flush_fmr_pool() is called).
*/
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
+void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
{
struct ib_fmr_pool *pool;
unsigned long flags;
@@ -503,7 +503,5 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
#endif
spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return 0;
}
EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 573399e3ccc1..9abbadb9e366 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -308,6 +308,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_QP] = "qp",
[RDMA_RESTRACK_CM_ID] = "cm_id",
[RDMA_RESTRACK_MR] = "mr",
+ [RDMA_RESTRACK_CTX] = "ctx",
};
struct rdma_restrack_root *res = &device->res;
@@ -636,13 +637,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
- put_device(&device->dev);
+ ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err:
- put_device(&device->dev);
+ ib_device_put(device);
return err;
}
@@ -672,7 +673,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
err = ib_device_rename(device, name);
}
- put_device(&device->dev);
+ ib_device_put(device);
return err;
}
@@ -756,14 +757,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_free;
nlmsg_end(msg, nlh);
- put_device(&device->dev);
+ ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err:
- put_device(&device->dev);
+ ib_device_put(device);
return err;
}
@@ -820,7 +821,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
}
out:
- put_device(&device->dev);
+ ib_device_put(device);
cb->args[0] = idx;
return skb->len;
}
@@ -859,13 +860,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_free;
nlmsg_end(msg, nlh);
- put_device(&device->dev);
+ ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err:
- put_device(&device->dev);
+ ib_device_put(device);
return ret;
}
@@ -1058,7 +1059,7 @@ next: idx++;
if (!filled)
goto err;
- put_device(&device->dev);
+ ib_device_put(device);
return skb->len;
res_err:
@@ -1069,7 +1070,7 @@ err:
nlmsg_cancel(skb, nlh);
err_index:
- put_device(&device->dev);
+ ib_device_put(device);
return ret;
}
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 752a55c6bdce..7d2f1ef75025 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -224,12 +224,14 @@ out_unlock:
* uverbs_put_destroy.
*/
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
- u32 id, struct ib_uverbs_file *ufile)
+ u32 id,
+ const struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
int ret;
- uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
+ uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_DESTROY);
if (IS_ERR(uobj))
return uobj;
@@ -243,21 +245,20 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
}
/*
- * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res
- * on success (negative errno on failure). For use by callers that do not need
- * the uobj.
+ * Does both uobj_get_destroy() and uobj_put_destroy(). Returns 0 on success
+ * (negative errno on failure). For use by callers that do not need the uobj.
*/
int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
- struct ib_uverbs_file *ufile, int success_res)
+ const struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
- uobj = __uobj_get_destroy(obj, id, ufile);
+ uobj = __uobj_get_destroy(obj, id, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
- return success_res;
+ return 0;
}
/* alloc_uobj must be undone by uverbs_destroy_uobject() */
@@ -267,7 +268,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
struct ib_uobject *uobj;
struct ib_ucontext *ucontext;
- ucontext = ib_uverbs_get_ucontext(ufile);
+ ucontext = ib_uverbs_get_ucontext_file(ufile);
if (IS_ERR(ucontext))
return ERR_CAST(ucontext);
@@ -397,16 +398,23 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
struct ib_uobject *uobj;
int ret;
- if (!obj)
- return ERR_PTR(-EINVAL);
+ if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) {
+ /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
+ uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
+ } else {
+ if (IS_ERR(obj))
+ return ERR_PTR(-EINVAL);
- uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
- if (IS_ERR(uobj))
- return uobj;
+ uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
- if (uobj->uapi_object != obj) {
- ret = -EINVAL;
- goto free;
+ if (uobj->uapi_object != obj) {
+ ret = -EINVAL;
+ goto free;
+ }
}
/*
@@ -426,7 +434,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
return uobj;
free:
- obj->type_class->lookup_put(uobj, mode);
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
@@ -490,7 +498,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
{
struct ib_uobject *ret;
- if (!obj)
+ if (IS_ERR(obj))
return ERR_PTR(-EINVAL);
/*
@@ -819,6 +827,8 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
RDMACG_RESOURCE_HCA_HANDLE);
+ rdma_restrack_del(&ucontext->res);
+
/*
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
* the error return.
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 4886d2bba7c7..b3ca7457ac42 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -118,43 +118,67 @@ void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
* Depending on ID the slot pointer in the radix tree points at one of these
* structs.
*/
-struct uverbs_api_object {
- const struct uverbs_obj_type *type_attrs;
- const struct uverbs_obj_type_class *type_class;
-};
struct uverbs_api_ioctl_method {
- int (__rcu *handler)(struct ib_uverbs_file *ufile,
- struct uverbs_attr_bundle *ctx);
+ int(__rcu *handler)(struct uverbs_attr_bundle *attrs);
DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
u16 bundle_size;
u8 use_stack:1;
u8 driver_method:1;
+ u8 disabled:1;
+ u8 has_udata:1;
u8 key_bitmap_len;
u8 destroy_bkey;
};
+struct uverbs_api_write_method {
+ int (*handler)(struct uverbs_attr_bundle *attrs);
+ u8 disabled:1;
+ u8 is_ex:1;
+ u8 has_udata:1;
+ u8 has_resp:1;
+ u8 req_size;
+ u8 resp_size;
+};
+
struct uverbs_api_attr {
struct uverbs_attr_spec spec;
};
-struct uverbs_api_object;
struct uverbs_api {
/* radix tree contains struct uverbs_api_* pointers */
struct radix_tree_root radix;
enum rdma_driver_id driver_id;
+
+ unsigned int num_write;
+ unsigned int num_write_ex;
+ struct uverbs_api_write_method notsupp_method;
+ const struct uverbs_api_write_method **write_methods;
+ const struct uverbs_api_write_method **write_ex_methods;
};
+/*
+ * Get an uverbs_api_object that corresponds to the given object_id.
+ * Note:
+ * -ENOMSG means that any object is allowed to match during lookup.
+ */
static inline const struct uverbs_api_object *
uapi_get_object(struct uverbs_api *uapi, u16 object_id)
{
- return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+ const struct uverbs_api_object *res;
+
+ if (object_id == UVERBS_IDR_ANY_OBJECT)
+ return ERR_PTR(-ENOMSG);
+
+ res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+ if (!res)
+ return ERR_PTR(-ENOENT);
+
+ return res;
}
char *uapi_key_format(char *S, unsigned int key);
-struct uverbs_api *uverbs_alloc_api(
- const struct uverbs_object_tree_def *const *driver_specs,
- enum rdma_driver_id driver_id);
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev);
void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
void uverbs_disassociate_api(struct uverbs_api *uapi);
void uverbs_destroy_api(struct uverbs_api *uapi);
@@ -162,4 +186,32 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
unsigned int num_attrs);
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
+extern const struct uapi_definition uverbs_def_obj_counters[];
+extern const struct uapi_definition uverbs_def_obj_cq[];
+extern const struct uapi_definition uverbs_def_obj_dm[];
+extern const struct uapi_definition uverbs_def_obj_flow_action[];
+extern const struct uapi_definition uverbs_def_obj_intf[];
+extern const struct uapi_definition uverbs_def_obj_mr[];
+extern const struct uapi_definition uverbs_def_write_intf[];
+
+static inline const struct uverbs_api_write_method *
+uapi_get_method(const struct uverbs_api *uapi, u32 command)
+{
+ u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return ERR_PTR(-EINVAL);
+
+ if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ if (cmd_idx >= uapi->num_write_ex)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_ex_methods[cmd_idx];
+ }
+
+ if (cmd_idx >= uapi->num_write)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_methods[cmd_idx];
+}
+
#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 06d8657ce583..3dd316159f5f 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -32,6 +32,7 @@ static const char *type2str(enum rdma_restrack_type type)
[RDMA_RESTRACK_QP] = "QP",
[RDMA_RESTRACK_CM_ID] = "CM_ID",
[RDMA_RESTRACK_MR] = "MR",
+ [RDMA_RESTRACK_CTX] = "CTX",
};
return names[type];
@@ -130,6 +131,8 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
res)->id.device;
case RDMA_RESTRACK_MR:
return container_of(res, struct ib_mr, res)->device;
+ case RDMA_RESTRACK_CTX:
+ return container_of(res, struct ib_ucontext, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
@@ -149,6 +152,8 @@ static bool res_is_user(struct rdma_restrack_entry *res)
return !res->kern_name;
case RDMA_RESTRACK_MR:
return container_of(res, struct ib_mr, res)->pd->uobject;
+ case RDMA_RESTRACK_CTX:
+ return true;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return false;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 676c1fd1119d..9608681224e6 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -647,8 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
flags, local_page_list, NULL, NULL);
up_read(&owning_mm->mmap_sem);
- if (npages < 0)
+ if (npages < 0) {
+ if (npages != -EAGAIN)
+ pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
+ else
+ pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
break;
+ }
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
mutex_lock(&umem_odp->umem_mutex);
@@ -666,8 +671,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
ret = ib_umem_odp_map_dma_single_page(
umem_odp, k, local_page_list[j],
access_mask, current_seq);
- if (ret < 0)
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
+ else
+ pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
break;
+ }
p = page_to_phys(local_page_list[j]);
k++;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c97935a0c7c6..8b41c95300c6 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -161,9 +161,6 @@ struct ib_uverbs_file {
struct mutex umap_lock;
struct list_head umaps;
- u64 uverbs_cmd_mask;
- u64 uverbs_ex_cmd_mask;
-
struct idr idr;
/* spinlock protects write access to idr */
spinlock_t idr_lock;
@@ -297,63 +294,4 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
-#define IB_UVERBS_DECLARE_CMD(name) \
- ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
- const char __user *buf, int in_len, \
- int out_len)
-
-IB_UVERBS_DECLARE_CMD(get_context);
-IB_UVERBS_DECLARE_CMD(query_device);
-IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(alloc_pd);
-IB_UVERBS_DECLARE_CMD(dealloc_pd);
-IB_UVERBS_DECLARE_CMD(reg_mr);
-IB_UVERBS_DECLARE_CMD(rereg_mr);
-IB_UVERBS_DECLARE_CMD(dereg_mr);
-IB_UVERBS_DECLARE_CMD(alloc_mw);
-IB_UVERBS_DECLARE_CMD(dealloc_mw);
-IB_UVERBS_DECLARE_CMD(create_comp_channel);
-IB_UVERBS_DECLARE_CMD(create_cq);
-IB_UVERBS_DECLARE_CMD(resize_cq);
-IB_UVERBS_DECLARE_CMD(poll_cq);
-IB_UVERBS_DECLARE_CMD(req_notify_cq);
-IB_UVERBS_DECLARE_CMD(destroy_cq);
-IB_UVERBS_DECLARE_CMD(create_qp);
-IB_UVERBS_DECLARE_CMD(open_qp);
-IB_UVERBS_DECLARE_CMD(query_qp);
-IB_UVERBS_DECLARE_CMD(modify_qp);
-IB_UVERBS_DECLARE_CMD(destroy_qp);
-IB_UVERBS_DECLARE_CMD(post_send);
-IB_UVERBS_DECLARE_CMD(post_recv);
-IB_UVERBS_DECLARE_CMD(post_srq_recv);
-IB_UVERBS_DECLARE_CMD(create_ah);
-IB_UVERBS_DECLARE_CMD(destroy_ah);
-IB_UVERBS_DECLARE_CMD(attach_mcast);
-IB_UVERBS_DECLARE_CMD(detach_mcast);
-IB_UVERBS_DECLARE_CMD(create_srq);
-IB_UVERBS_DECLARE_CMD(modify_srq);
-IB_UVERBS_DECLARE_CMD(query_srq);
-IB_UVERBS_DECLARE_CMD(destroy_srq);
-IB_UVERBS_DECLARE_CMD(create_xsrq);
-IB_UVERBS_DECLARE_CMD(open_xrcd);
-IB_UVERBS_DECLARE_CMD(close_xrcd);
-
-#define IB_UVERBS_DECLARE_EX_CMD(name) \
- int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
- struct ib_udata *ucore, \
- struct ib_udata *uhw)
-
-IB_UVERBS_DECLARE_EX_CMD(create_flow);
-IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
-IB_UVERBS_DECLARE_EX_CMD(query_device);
-IB_UVERBS_DECLARE_EX_CMD(create_cq);
-IB_UVERBS_DECLARE_EX_CMD(create_qp);
-IB_UVERBS_DECLARE_EX_CMD(create_wq);
-IB_UVERBS_DECLARE_EX_CMD(modify_wq);
-IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
-IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(modify_qp);
-IB_UVERBS_DECLARE_EX_CMD(modify_cq);
-
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a93853770e3c..b70749542471 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -47,11 +47,132 @@
#include "uverbs.h"
#include "core_priv.h"
+/*
+ * Copy a response to userspace. If the provided 'resp' is larger than the
+ * user buffer it is silently truncated. If the user provided a larger buffer
+ * then the trailing portion is zero filled.
+ *
+ * These semantics are intended to support future extension of the output
+ * structures.
+ */
+static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp,
+ size_t resp_len)
+{
+ u8 __user *cur = attrs->ucore.outbuf + resp_len;
+ u8 __user *end = attrs->ucore.outbuf + attrs->ucore.outlen;
+ int ret;
+
+ if (copy_to_user(attrs->ucore.outbuf, resp,
+ min(attrs->ucore.outlen, resp_len)))
+ return -EFAULT;
+
+ /* Zero fill any extra memory that user space might have provided */
+ for (; cur < end; cur++) {
+ ret = put_user(0, cur);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy a request from userspace. If the provided 'req' is larger than the
+ * user buffer then the user buffer is zero extended into the 'req'. If 'req'
+ * is smaller than the user buffer then the uncopied bytes in the user buffer
+ * must be zero.
+ */
+static int uverbs_request(struct uverbs_attr_bundle *attrs, void *req,
+ size_t req_len)
+{
+ if (copy_from_user(req, attrs->ucore.inbuf,
+ min(attrs->ucore.inlen, req_len)))
+ return -EFAULT;
+
+ if (attrs->ucore.inlen < req_len) {
+ memset(req + attrs->ucore.inlen, 0,
+ req_len - attrs->ucore.inlen);
+ } else if (attrs->ucore.inlen > req_len) {
+ if (!ib_is_buffer_cleared(attrs->ucore.inbuf + req_len,
+ attrs->ucore.inlen - req_len))
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+/*
+ * Generate the value for the 'response_length' protocol used by write_ex.
+ * This is the number of bytes the kernel actually wrote. Userspace can use
+ * this to detect what structure members in the response the kernel
+ * understood.
+ */
+static u32 uverbs_response_length(struct uverbs_attr_bundle *attrs,
+ size_t resp_len)
+{
+ return min_t(size_t, attrs->ucore.outlen, resp_len);
+}
+
+/*
+ * The iterator version of the request interface is for handlers that need to
+ * step over a flex array at the end of a command header.
+ */
+struct uverbs_req_iter {
+ const void __user *cur;
+ const void __user *end;
+};
+
+static int uverbs_request_start(struct uverbs_attr_bundle *attrs,
+ struct uverbs_req_iter *iter,
+ void *req,
+ size_t req_len)
+{
+ if (attrs->ucore.inlen < req_len)
+ return -ENOSPC;
+
+ if (copy_from_user(req, attrs->ucore.inbuf, req_len))
+ return -EFAULT;
+
+ iter->cur = attrs->ucore.inbuf + req_len;
+ iter->end = attrs->ucore.inbuf + attrs->ucore.inlen;
+ return 0;
+}
+
+static int uverbs_request_next(struct uverbs_req_iter *iter, void *val,
+ size_t len)
+{
+ if (iter->cur + len > iter->end)
+ return -ENOSPC;
+
+ if (copy_from_user(val, iter->cur, len))
+ return -EFAULT;
+
+ iter->cur += len;
+ return 0;
+}
+
+static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter,
+ size_t len)
+{
+ const void __user *res = iter->cur;
+
+ if (iter->cur + len > iter->end)
+ return ERR_PTR(-ENOSPC);
+ iter->cur += len;
+ return res;
+}
+
+static int uverbs_request_finish(struct uverbs_req_iter *iter)
+{
+ if (!ib_is_buffer_cleared(iter->cur, iter->end - iter->cur))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
static struct ib_uverbs_completion_event_file *
-_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
+_ib_uverbs_lookup_comp_file(s32 fd, const struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
- fd, ufile);
+ fd, attrs);
if (IS_ERR(uobj))
return (void *)uobj;
@@ -65,24 +186,20 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
#define ib_uverbs_lookup_comp_file(_fd, _ufile) \
_ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
-ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_file *file = attrs->ufile;
struct ib_uverbs_get_context cmd;
struct ib_uverbs_get_context_resp resp;
- struct ib_udata udata;
struct ib_ucontext *ucontext;
struct file *filp;
struct ib_rdmacg_object cg_obj;
struct ib_device *ib_dev;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
mutex_lock(&file->ucontext_lock);
ib_dev = srcu_dereference(file->device->ib_dev,
@@ -97,16 +214,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err;
}
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
if (ret)
goto err;
- ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
+ ucontext = ib_dev->alloc_ucontext(ib_dev, &attrs->driver_udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
goto err_alloc;
@@ -141,13 +253,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_fd;
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_file;
- }
fd_install(resp.async_fd, filp);
+ ucontext->res.type = RDMA_RESTRACK_CTX;
+ rdma_restrack_add(&ucontext->res);
+
/*
* Make sure that ib_uverbs_get_ucontext() sees the pointer update
* only after all writes to setup the ucontext have completed
@@ -156,7 +270,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
mutex_unlock(&file->ucontext_lock);
- return in_len;
+ return 0;
err_file:
ib_uverbs_free_async_event_file(file);
@@ -224,31 +338,25 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->phys_port_cnt = ib_dev->phys_port_cnt;
}
-ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
struct ib_ucontext *ucontext;
+ int ret;
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&resp, 0, sizeof resp);
copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
/*
@@ -272,9 +380,7 @@ static u32 make_port_cap_flags(const struct ib_port_attr *attr)
return res;
}
-ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_port cmd;
struct ib_uverbs_query_port_resp resp;
@@ -283,16 +389,14 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
struct ib_ucontext *ucontext;
struct ib_device *ib_dev;
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
ib_dev = ucontext->device;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
ret = ib_query_port(ib_dev, cmd.port_num, &attr);
if (ret)
@@ -331,40 +435,27 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
resp.link_layer = rdma_port_get_link_layer(ib_dev,
cmd.port_num);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_pd cmd;
struct ib_uverbs_alloc_pd_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata);
+ pd = ib_dev->alloc_pd(ib_dev, uobj->context, &attrs->driver_udata);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
goto err;
@@ -381,12 +472,11 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
pd->res.type = RDMA_RESTRACK_PD;
rdma_restrack_add(&pd->res);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
ib_dealloc_pd(pd);
@@ -396,17 +486,16 @@ err:
return ret;
}
-ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_dealloc_pd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_pd cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
}
struct xrcd_table_entry {
@@ -494,13 +583,11 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
}
}
-ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_device *ibudev = attrs->ufile->device;
struct ib_uverbs_open_xrcd cmd;
struct ib_uverbs_open_xrcd_resp resp;
- struct ib_udata udata;
struct ib_uxrcd_object *obj;
struct ib_xrcd *xrcd = NULL;
struct fd f = {NULL, 0};
@@ -509,18 +596,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
int new_xrcd = 0;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- mutex_lock(&file->device->xrcd_tree_mutex);
+ mutex_lock(&ibudev->xrcd_tree_mutex);
if (cmd.fd != -1) {
/* search for file descriptor */
@@ -531,7 +611,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
inode = file_inode(f.file);
- xrcd = find_xrcd(file->device, inode);
+ xrcd = find_xrcd(ibudev, inode);
if (!xrcd && !(cmd.oflags & O_CREAT)) {
/* no file descriptor. Need CREATE flag */
ret = -EAGAIN;
@@ -544,7 +624,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file,
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, attrs,
&ib_dev);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
@@ -552,7 +632,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
if (!xrcd) {
- xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata);
+ xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context,
+ &attrs->driver_udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
@@ -574,29 +655,28 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (inode) {
if (new_xrcd) {
/* create new inode/xrcd table entry */
- ret = xrcd_table_insert(file->device, inode, xrcd);
+ ret = xrcd_table_insert(ibudev, inode, xrcd);
if (ret)
goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
if (f.file)
fdput(f);
- mutex_unlock(&file->device->xrcd_tree_mutex);
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
- return uobj_alloc_commit(&obj->uobject, in_len);
+ return uobj_alloc_commit(&obj->uobject);
err_copy:
if (inode) {
if (new_xrcd)
- xrcd_table_delete(file->device, inode);
+ xrcd_table_delete(ibudev, inode);
atomic_dec(&xrcd->usecnt);
}
@@ -610,22 +690,21 @@ err_tree_mutex_unlock:
if (f.file)
fdput(f);
- mutex_unlock(&file->device->xrcd_tree_mutex);
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
return ret;
}
-ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_close_xrcd cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs);
}
int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
@@ -653,29 +732,19 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
return ret;
}
-ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
@@ -684,11 +753,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -704,7 +773,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
}
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags, &udata);
+ cmd.access_flags, &attrs->driver_udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -725,14 +794,13 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
resp.rkey = mr->rkey;
resp.mr_handle = uobj->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
ib_dereg_mr(mr);
@@ -745,29 +813,19 @@ err_free:
return ret;
}
-ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_rereg_mr cmd;
struct ib_uverbs_rereg_mr_resp resp;
- struct ib_udata udata;
struct ib_pd *pd = NULL;
struct ib_mr *mr;
struct ib_pd *old_pd;
int ret;
struct ib_uobject *uobj;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
return -EINVAL;
@@ -777,7 +835,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -796,7 +854,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
if (cmd.flags & IB_MR_REREG_PD) {
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
- file);
+ attrs);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -804,9 +862,9 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
old_pd = mr->pd;
- ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
- cmd.length, cmd.hca_va,
- cmd.access_flags, pd, &udata);
+ ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length,
+ cmd.hca_va, cmd.access_flags, pd,
+ &attrs->driver_udata);
if (!ret) {
if (cmd.flags & IB_MR_REREG_PD) {
atomic_inc(&pd->usecnt);
@@ -821,10 +879,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
- ret = -EFAULT;
- else
- ret = in_len;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
@@ -836,54 +891,43 @@ put_uobjs:
return ret;
}
-ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dereg_mr cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
}
-ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_mw cmd;
struct ib_uverbs_alloc_mw_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mw *mw;
- struct ib_udata udata;
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
}
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
- mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
+ mw = pd->device->alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
goto err_put;
@@ -900,13 +944,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
resp.rkey = mw->rkey;
resp.mw_handle = uobj->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
uverbs_dealloc_mw(mw);
@@ -917,36 +960,32 @@ err_free:
return ret;
}
-ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dealloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_mw cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, attrs);
}
-ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
struct ib_uobject *uobj;
struct ib_uverbs_completion_event_file *ev_file;
struct ib_device *ib_dev;
+ int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -956,25 +995,17 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
uobj);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret) {
uobj_alloc_abort(uobj);
- return -EFAULT;
+ return ret;
}
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
}
-static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_cq *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *udata,
- void *context),
- void *context)
+static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_cq *cmd)
{
struct ib_ucq_object *obj;
struct ib_uverbs_completion_event_file *ev_file = NULL;
@@ -984,21 +1015,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
struct ib_cq_init_attr attr = {};
struct ib_device *ib_dev;
- if (cmd->comp_vector >= file->device->num_comp_vectors)
+ if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file,
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs,
&ib_dev);
if (IS_ERR(obj))
return obj;
- if (!ib_dev->create_cq) {
- ret = -EOPNOTSUPP;
- goto err;
- }
-
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs);
if (IS_ERR(ev_file)) {
ret = PTR_ERR(ev_file);
goto err;
@@ -1013,11 +1039,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
attr.cqe = cmd->cqe;
attr.comp_vector = cmd->comp_vector;
+ attr.flags = cmd->flags;
- if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
- attr.flags = cmd->flags;
-
- cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context,
+ &attrs->driver_udata);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1034,18 +1059,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uobject.id;
resp.base.cqe = cq->cqe;
-
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
cq->res.type = RDMA_RESTRACK_CQ;
rdma_restrack_add(&cq->res);
- ret = cb(file, obj, &resp, ucore, context);
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
goto err_cb;
- ret = uobj_alloc_commit(&obj->uobject, 0);
+ ret = uobj_alloc_commit(&obj->uobject);
if (ret)
return ERR_PTR(ret);
return obj;
@@ -1055,7 +1078,7 @@ err_cb:
err_file:
if (ev_file)
- ib_uverbs_release_ucq(file, ev_file, obj);
+ ib_uverbs_release_ucq(attrs->ufile, ev_file, obj);
err:
uobj_alloc_abort(&obj->uobject);
@@ -1063,41 +1086,16 @@ err:
return ERR_PTR(ret);
}
-static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
-}
-
-ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_ex_create_cq cmd_ex;
- struct ib_uverbs_create_cq_resp resp;
- struct ib_udata ucore;
- struct ib_udata uhw;
struct ib_ucq_object *obj;
+ int ret;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
- sizeof(cmd), sizeof(resp));
-
- ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -1105,43 +1103,19 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
- obj = create_cq(file, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), comp_channel) +
- sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
- NULL);
-
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return in_len;
-}
-
-static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
-{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
+ obj = create_cq(attrs, &cmd_ex);
+ return PTR_ERR_OR_ZERO(obj);
}
-int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_cq_resp resp;
struct ib_uverbs_ex_create_cq cmd;
struct ib_ucq_object *obj;
- int err;
-
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask)
return -EINVAL;
@@ -1149,52 +1123,36 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- obj = create_cq(file, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_cq_cb, NULL);
-
+ obj = create_cq(attrs, &cmd);
return PTR_ERR_OR_ZERO(obj);
}
-ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_resize_cq cmd;
struct ib_uverbs_resize_cq_resp resp = {};
- struct ib_udata udata;
struct ib_cq *cq;
int ret = -EINVAL;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
- ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
+ ret = cq->device->resize_cq(cq, cmd.cqe, &attrs->driver_udata);
if (ret)
goto out;
resp.cqe = cq->cqe;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp.cqe))
- ret = -EFAULT;
-
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
uobj_put_obj_read(cq);
- return ret ? ret : in_len;
+ return ret;
}
static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
@@ -1227,9 +1185,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
return 0;
}
-ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_poll_cq cmd;
struct ib_uverbs_poll_cq_resp resp;
@@ -1239,15 +1195,16 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
struct ib_wc wc;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
/* we copy a struct ib_uverbs_poll_cq_resp to user space */
- header_ptr = u64_to_user_ptr(cmd.response);
+ header_ptr = attrs->ucore.outbuf;
data_ptr = header_ptr + sizeof resp;
memset(&resp, 0, sizeof resp);
@@ -1271,24 +1228,24 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
goto out_put;
}
- ret = in_len;
+ ret = 0;
out_put:
uobj_put_obj_read(cq);
return ret;
}
-ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_req_notify_cq cmd;
struct ib_cq *cq;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
@@ -1297,22 +1254,22 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
uobj_put_obj_read(cq);
- return in_len;
+ return 0;
}
-ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_cq cmd;
struct ib_uverbs_destroy_cq_resp resp;
struct ib_uobject *uobj;
struct ib_ucq_object *obj;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -1323,21 +1280,11 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-static int create_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_qp *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *udata),
- void *context)
+static int create_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_qp *cmd)
{
struct ib_uqp_object *obj;
struct ib_device *device;
@@ -1347,7 +1294,6 @@ static int create_qp(struct ib_uverbs_file *file,
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
- char *buf;
struct ib_qp_init_attr attr = {};
struct ib_uverbs_ex_create_qp_resp resp;
int ret;
@@ -1358,7 +1304,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -1366,12 +1312,10 @@ static int create_qp(struct ib_uverbs_file *file,
obj->uevent.uobject.user_handle = cmd->user_handle;
mutex_init(&obj->mcast_lock);
- if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
- sizeof(cmd->rwq_ind_tbl_handle) &&
- (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
+ if (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE) {
ind_tbl = uobj_get_obj_read(rwq_ind_table,
UVERBS_OBJECT_RWQ_IND_TBL,
- cmd->rwq_ind_tbl_handle, file);
+ cmd->rwq_ind_tbl_handle, attrs);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1380,13 +1324,6 @@ static int create_qp(struct ib_uverbs_file *file,
attr.rwq_ind_tbl = ind_tbl;
}
- if (cmd_sz > sizeof(*cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(*cmd),
- cmd_sz - sizeof(*cmd))) {
- ret = -EOPNOTSUPP;
- goto err_put;
- }
-
if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
ret = -EINVAL;
goto err_put;
@@ -1397,7 +1334,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_XRC_TGT) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
- file);
+ attrs);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -1417,7 +1354,7 @@ static int create_qp(struct ib_uverbs_file *file,
} else {
if (cmd->is_srq) {
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
- cmd->srq_handle, file);
+ cmd->srq_handle, attrs);
if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
goto err_put;
@@ -1428,7 +1365,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
rcq = uobj_get_obj_read(
cq, UVERBS_OBJECT_CQ,
- cmd->recv_cq_handle, file);
+ cmd->recv_cq_handle, attrs);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1439,11 +1376,11 @@ static int create_qp(struct ib_uverbs_file *file,
if (has_sq)
scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
- cmd->send_cq_handle, file);
+ cmd->send_cq_handle, attrs);
if (!ind_tbl)
rcq = rcq ?: scq;
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
- file);
+ attrs);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1453,7 +1390,7 @@ static int create_qp(struct ib_uverbs_file *file,
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
+ attr.qp_context = attrs->ufile;
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
@@ -1473,10 +1410,7 @@ static int create_qp(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
- sizeof(cmd->create_flags))
- attr.create_flags = cmd->create_flags;
-
+ attr.create_flags = cmd->create_flags;
if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
@@ -1498,18 +1432,10 @@ static int create_qp(struct ib_uverbs_file *file,
attr.source_qpn = cmd->source_qpn;
}
- buf = (void *)cmd + sizeof(*cmd);
- if (cmd_sz > sizeof(*cmd))
- if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
- cmd_sz - sizeof(*cmd) - 1))) {
- ret = -EINVAL;
- goto err_put;
- }
-
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = _ib_create_qp(device, pd, &attr, uhw,
+ qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
&obj->uevent.uobject);
if (IS_ERR(qp)) {
@@ -1557,11 +1483,9 @@ static int create_qp(struct ib_uverbs_file *file,
resp.base.max_recv_wr = attr.cap.max_recv_wr;
resp.base.max_send_wr = attr.cap.max_send_wr;
resp.base.max_inline_data = attr.cap.max_inline_data;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
-
- ret = cb(file, &resp, ucore);
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
goto err_cb;
@@ -1583,7 +1507,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (ind_tbl)
uobj_put_obj_read(ind_tbl);
- return uobj_alloc_commit(&obj->uevent.uobject, 0);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_cb:
ib_destroy_qp(qp);
@@ -1605,39 +1529,15 @@ err_put:
return ret;
}
-static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
-}
-
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_qp cmd;
struct ib_uverbs_ex_create_qp cmd_ex;
- struct ib_udata ucore;
- struct ib_udata uhw;
- ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
- int err;
-
- if (out_len < resp_size)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ int ret;
- ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
- sizeof(cmd), resp_size);
- ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + resp_size,
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - resp_size);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -1654,42 +1554,17 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
cmd_ex.qp_type = cmd.qp_type;
cmd_ex.is_srq = cmd.is_srq;
- err = create_qp(file, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), is_srq) +
- sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
- NULL);
-
- if (err)
- return err;
-
- return in_len;
+ return create_qp(attrs, &cmd_ex);
}
-static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
+static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs)
{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
-}
-
-int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
-{
- struct ib_uverbs_ex_create_qp_resp resp;
- struct ib_uverbs_ex_create_qp cmd = {0};
- int err;
-
- if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
- sizeof(cmd.comp_mask)))
- return -EINVAL;
+ struct ib_uverbs_ex_create_qp cmd;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
return -EINVAL;
@@ -1697,26 +1572,13 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- err = create_qp(file, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_qp_cb, NULL);
-
- if (err)
- return err;
-
- return 0;
+ return create_qp(attrs, &cmd);
}
-ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_open_qp cmd;
struct ib_uverbs_create_qp_resp resp;
- struct ib_udata udata;
struct ib_uqp_object *obj;
struct ib_xrcd *xrcd;
struct ib_uobject *uninitialized_var(xrcd_uobj);
@@ -1725,23 +1587,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
- xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, attrs);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err_put;
@@ -1754,7 +1609,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
+ attr.qp_context = attrs->ufile;
attr.qp_num = cmd.qpn;
attr.qp_type = cmd.qp_type;
@@ -1775,17 +1630,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
resp.qpn = qp->qp_num;
resp.qp_handle = obj->uevent.uobject.id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_destroy;
- }
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
qp->uobject = &obj->uevent.uobject;
uobj_put_read(xrcd_uobj);
- return uobj_alloc_commit(&obj->uevent.uobject, in_len);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_destroy:
ib_destroy_qp(qp);
@@ -1818,9 +1672,7 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
uverb_attr->port_num = rdma_ah_get_port_num(rdma_attr);
}
-ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_qp cmd;
struct ib_uverbs_query_qp_resp resp;
@@ -1829,8 +1681,9 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_qp_init_attr *init_attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
@@ -1839,7 +1692,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1886,14 +1739,13 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.max_inline_data = init_attr->cap.max_inline_data;
resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
kfree(attr);
kfree(init_attr);
- return ret ? ret : in_len;
+ return ret;
}
/* Remove ignored fields set in the attribute mask */
@@ -1933,8 +1785,8 @@ static void copy_ah_attr_from_uverbs(struct ib_device *dev,
rdma_ah_set_make_grd(rdma_attr, false);
}
-static int modify_qp(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
+static int modify_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_modify_qp *cmd)
{
struct ib_qp_attr *attr;
struct ib_qp *qp;
@@ -1944,7 +1796,8 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle,
+ attrs);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2081,7 +1934,7 @@ static int modify_qp(struct ib_uverbs_file *file,
ret = ib_modify_qp_with_udata(qp, attr,
modify_qp_mask(qp->qp_type,
cmd->base.attr_mask),
- udata);
+ &attrs->driver_udata);
release_qp:
uobj_put_obj_read(qp);
@@ -2091,80 +1944,64 @@ out:
return ret;
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_qp cmd = {};
- struct ib_udata udata;
+ struct ib_uverbs_ex_modify_qp cmd;
int ret;
- if (copy_from_user(&cmd.base, buf, sizeof(cmd.base)))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd.base, sizeof(cmd.base));
+ if (ret)
+ return ret;
if (cmd.base.attr_mask &
~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
return -EOPNOTSUPP;
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd.base), NULL,
- in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len);
-
- ret = modify_qp(file, &cmd, &udata);
- if (ret)
- return ret;
-
- return in_len;
+ return modify_qp(attrs, &cmd);
}
-int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_qp cmd = {};
+ struct ib_uverbs_ex_modify_qp cmd;
+ struct ib_uverbs_ex_modify_qp_resp resp = {
+ .response_length = uverbs_response_length(attrs, sizeof(resp))
+ };
int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+
/*
* Last bit is reserved for extending the attr_mask by
* using another field.
*/
BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
- if (ucore->inlen < sizeof(cmd.base))
- return -EINVAL;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
- if (ret)
- return ret;
-
if (cmd.base.attr_mask &
~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
return -EOPNOTSUPP;
- if (ucore->inlen > sizeof(cmd)) {
- if (!ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
- }
-
- ret = modify_qp(file, &cmd, uhw);
+ ret = modify_qp(attrs, &cmd);
+ if (ret)
+ return ret;
- return ret;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
struct ib_uobject *uobj;
struct ib_uqp_object *obj;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2174,10 +2011,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
static void *alloc_wr(size_t wr_size, __u32 num_sge)
@@ -2190,9 +2024,7 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge)
num_sge * sizeof (struct ib_sge), GFP_KERNEL);
}
-ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
@@ -2202,24 +2034,31 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_qp *qp;
int i, sg_ind;
int is_ud;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
size_t next_size;
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
- cmd.sge_count * sizeof (struct ib_uverbs_sge))
- return -EINVAL;
-
- if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
- return -EINVAL;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+ wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count);
+ if (IS_ERR(wqes))
+ return PTR_ERR(wqes);
+ sgls = uverbs_request_next_ptr(
+ &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge));
+ if (IS_ERR(sgls))
+ return PTR_ERR(sgls);
+ ret = uverbs_request_finish(&iter);
+ if (ret)
+ return ret;
user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
if (!user_wr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
goto out;
@@ -2227,8 +2066,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
sg_ind = 0;
last = NULL;
for (i = 0; i < cmd.wr_count; ++i) {
- if (copy_from_user(user_wr,
- buf + sizeof cmd + i * cmd.wqe_size,
+ if (copy_from_user(user_wr, wqes + i * cmd.wqe_size,
cmd.wqe_size)) {
ret = -EFAULT;
goto out_put;
@@ -2256,7 +2094,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
- user_wr->wr.ud.ah, file);
+ user_wr->wr.ud.ah, attrs);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2336,11 +2174,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(next_size, sizeof(struct ib_sge));
- if (copy_from_user(next->sg_list,
- buf + sizeof cmd +
- cmd.wr_count * cmd.wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto out_put;
}
@@ -2358,8 +2194,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out_put:
uobj_put_obj_read(qp);
@@ -2375,28 +2212,35 @@ out_put:
out:
kfree(user_wr);
- return ret ? ret : in_len;
+ return ret;
}
-static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
- int in_len,
- u32 wr_count,
- u32 sge_count,
- u32 wqe_size)
+static struct ib_recv_wr *
+ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
+ u32 wqe_size, u32 sge_count)
{
struct ib_uverbs_recv_wr *user_wr;
struct ib_recv_wr *wr = NULL, *last, *next;
int sg_ind;
int i;
int ret;
-
- if (in_len < wqe_size * wr_count +
- sge_count * sizeof (struct ib_uverbs_sge))
- return ERR_PTR(-EINVAL);
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
return ERR_PTR(-EINVAL);
+ wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count);
+ if (IS_ERR(wqes))
+ return ERR_CAST(wqes);
+ sgls = uverbs_request_next_ptr(
+ iter, sge_count * sizeof(struct ib_uverbs_sge));
+ if (IS_ERR(sgls))
+ return ERR_CAST(sgls);
+ ret = uverbs_request_finish(iter);
+ if (ret)
+ return ERR_PTR(ret);
+
user_wr = kmalloc(wqe_size, GFP_KERNEL);
if (!user_wr)
return ERR_PTR(-ENOMEM);
@@ -2404,7 +2248,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
sg_ind = 0;
last = NULL;
for (i = 0; i < wr_count; ++i) {
- if (copy_from_user(user_wr, buf + i * wqe_size,
+ if (copy_from_user(user_wr, wqes + i * wqe_size,
wqe_size)) {
ret = -EFAULT;
goto err;
@@ -2443,10 +2287,9 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(sizeof *next, sizeof (struct ib_sge));
- if (copy_from_user(next->sg_list,
- buf + wr_count * wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto err;
}
@@ -2470,29 +2313,30 @@ err:
return ERR_PTR(ret);
}
-ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_recv cmd;
struct ib_uverbs_post_recv_resp resp;
struct ib_recv_wr *wr, *next;
const struct ib_recv_wr *bad_wr;
struct ib_qp *qp;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
- if (!qp)
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (!qp) {
+ ret = -EINVAL;
goto out;
+ }
resp.bad_wr = 0;
ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
@@ -2506,9 +2350,9 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
}
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
-
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
next = wr->next;
@@ -2516,36 +2360,36 @@ out:
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_srq_recv cmd;
struct ib_uverbs_post_srq_recv_resp resp;
struct ib_recv_wr *wr, *next;
const struct ib_recv_wr *bad_wr;
struct ib_srq *srq;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
- if (!srq)
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+ if (!srq) {
+ ret = -EINVAL;
goto out;
+ }
resp.bad_wr = 0;
- ret = srq->device->post_srq_recv ?
- srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP;
+ ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
uobj_put_obj_read(srq);
@@ -2556,8 +2400,9 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
@@ -2566,12 +2411,10 @@ out:
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_ah cmd;
struct ib_uverbs_create_ah_resp resp;
@@ -2580,21 +2423,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_ah *ah;
struct rdma_ah_attr attr = {};
int ret;
- struct ib_udata udata;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2603,7 +2438,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
goto err;
}
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -2627,7 +2462,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
rdma_ah_set_ah_flags(&attr, 0);
}
- ah = rdma_create_user_ah(pd, &attr, &udata);
+ ah = rdma_create_user_ah(pd, &attr, &attrs->driver_udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_put;
@@ -2639,13 +2474,12 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
resp.ah_handle = uobj->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
rdma_destroy_ah(ah);
@@ -2658,21 +2492,19 @@ err:
return ret;
}
-ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_destroy_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_ah cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, attrs);
}
-ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_attach_mcast cmd;
struct ib_qp *qp;
@@ -2680,10 +2512,11 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
struct ib_uverbs_mcast_entry *mcast;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
return -EINVAL;
@@ -2716,12 +2549,10 @@ out_put:
mutex_unlock(&obj->mcast_lock);
uobj_put_obj_read(qp);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_detach_mcast cmd;
struct ib_uqp_object *obj;
@@ -2730,10 +2561,11 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
int ret = -EINVAL;
bool found = false;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
return -EINVAL;
@@ -2759,7 +2591,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
out_put:
mutex_unlock(&obj->mcast_lock);
uobj_put_obj_read(qp);
- return ret ? ret : in_len;
+ return ret;
}
struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
@@ -2838,7 +2670,7 @@ void flow_resources_add(struct ib_uflow_resources *uflow_res,
}
EXPORT_SYMBOL(flow_resources_add);
-static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
+static int kern_spec_to_ib_spec_action(const struct uverbs_attr_bundle *attrs,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -2867,7 +2699,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
ib_spec->action.act = uobj_get_obj_read(flow_action,
UVERBS_OBJECT_FLOW_ACTION,
kern_spec->action.handle,
- ufile);
+ attrs);
if (!ib_spec->action.act)
return -EINVAL;
ib_spec->action.size =
@@ -2885,7 +2717,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
uobj_get_obj_read(counters,
UVERBS_OBJECT_COUNTERS,
kern_spec->flow_count.handle,
- ufile);
+ attrs);
if (!ib_spec->flow_count.counters)
return -EINVAL;
ib_spec->flow_count.size =
@@ -3066,7 +2898,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
kern_filter_sz, ib_spec);
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
+static int kern_spec_to_ib_spec(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -3075,17 +2907,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
return -EINVAL;
if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
- return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
+ return kern_spec_to_ib_spec_action(attrs, kern_spec, ib_spec,
uflow_res);
else
return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
}
-int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_wq cmd = {};
+ struct ib_uverbs_ex_create_wq cmd;
struct ib_uverbs_ex_create_wq_resp resp = {};
struct ib_uwq_object *obj;
int err = 0;
@@ -3093,43 +2923,27 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_pd *pd;
struct ib_wq *wq;
struct ib_wq_init_attr wq_init_attr = {};
- size_t required_cmd_sz;
- size_t required_resp_len;
struct ib_device *ib_dev;
- required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
- required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file,
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3138,20 +2952,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.cq = cq;
wq_init_attr.max_sge = cmd.max_sge;
wq_init_attr.max_wr = cmd.max_wr;
- wq_init_attr.wq_context = file;
+ wq_init_attr.wq_context = attrs->ufile;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
- if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
- sizeof(cmd.create_flags)))
- wq_init_attr.create_flags = cmd.create_flags;
+ wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
- if (!pd->device->create_wq) {
- err = -EOPNOTSUPP;
- goto err_put_cq;
- }
- wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
+ wq = pd->device->create_wq(pd, &wq_init_attr, &attrs->driver_udata);
if (IS_ERR(wq)) {
err = PTR_ERR(wq);
goto err_put_cq;
@@ -3175,15 +2983,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
resp.max_sge = wq_init_attr.max_sge;
resp.max_wr = wq_init_attr.max_wr;
resp.wqn = wq->wq_num;
- resp.response_length = required_resp_len;
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
uobj_put_obj_read(pd);
uobj_put_obj_read(cq);
- return uobj_alloc_commit(&obj->uevent.uobject, 0);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_copy:
ib_destroy_wq(wq);
@@ -3197,41 +3004,23 @@ err_uobj:
return err;
}
-int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_wq cmd = {};
+ struct ib_uverbs_ex_destroy_wq cmd;
struct ib_uverbs_ex_destroy_wq_resp resp = {};
struct ib_uobject *uobj;
struct ib_uwq_object *obj;
- size_t required_cmd_sz;
- size_t required_resp_len;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
- required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- resp.response_length = required_resp_len;
- uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3240,29 +3029,17 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- return ib_copy_to_udata(ucore, &resp, resp.response_length);
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_wq cmd = {};
+ struct ib_uverbs_ex_modify_wq cmd;
struct ib_wq *wq;
struct ib_wq_attr wq_attr = {};
- size_t required_cmd_sz;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
@@ -3272,7 +3049,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
if (!wq)
return -EINVAL;
@@ -3282,24 +3059,18 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.flags = cmd.flags;
wq_attr.flags_mask = cmd.flags_mask;
}
- if (!wq->device->modify_wq) {
- ret = -EOPNOTSUPP;
- goto out;
- }
- ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
-out:
+ ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask,
+ &attrs->driver_udata);
uobj_put_obj_read(wq);
return ret;
}
-int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_rwq_ind_table cmd = {};
+ struct ib_uverbs_ex_create_rwq_ind_table cmd;
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
struct ib_uobject *uobj;
- int err = 0;
+ int err;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_wq **wqs = NULL;
@@ -3307,27 +3078,13 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_wq *wq = NULL;
int i, j, num_read_wqs;
u32 num_wq_handles;
- u32 expected_in_size;
- size_t required_cmd_sz_header;
- size_t required_resp_len;
+ struct uverbs_req_iter iter;
struct ib_device *ib_dev;
- required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
- required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
-
- if (ucore->inlen < required_cmd_sz_header)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf += required_cmd_sz_header;
- ucore->inlen -= required_cmd_sz_header;
-
if (cmd.comp_mask)
return -EOPNOTSUPP;
@@ -3335,26 +3092,17 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
return -EINVAL;
num_wq_handles = 1 << cmd.log_ind_tbl_size;
- expected_in_size = num_wq_handles * sizeof(__u32);
- if (num_wq_handles == 1)
- /* input size for wq handles is u64 aligned */
- expected_in_size += sizeof(__u32);
-
- if (ucore->inlen < expected_in_size)
- return -EINVAL;
-
- if (ucore->inlen > expected_in_size &&
- !ib_is_udata_cleared(ucore, expected_in_size,
- ucore->inlen - expected_in_size))
- return -EOPNOTSUPP;
-
wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
GFP_KERNEL);
if (!wqs_handles)
return -ENOMEM;
- err = ib_copy_from_udata(wqs_handles, ucore,
- num_wq_handles * sizeof(__u32));
+ err = uverbs_request_next(&iter, wqs_handles,
+ num_wq_handles * sizeof(__u32));
+ if (err)
+ goto err_free;
+
+ err = uverbs_request_finish(&iter);
if (err)
goto err_free;
@@ -3367,7 +3115,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
- wqs_handles[num_read_wqs], file);
+ wqs_handles[num_read_wqs], attrs);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3376,7 +3124,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto put_wqs;
@@ -3385,11 +3133,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
- if (!ib_dev->create_rwq_ind_table) {
- err = -EOPNOTSUPP;
- goto err_uobj;
- }
- rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
+ rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr,
+ &attrs->driver_udata);
if (IS_ERR(rwq_ind_tbl)) {
err = PTR_ERR(rwq_ind_tbl);
@@ -3408,10 +3153,9 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
- resp.response_length = required_resp_len;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
@@ -3420,7 +3164,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (j = 0; j < num_read_wqs; j++)
uobj_put_obj_read(wqs[j]);
- return uobj_alloc_commit(uobj, 0);
+ return uobj_alloc_commit(uobj);
err_copy:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3435,25 +3179,12 @@ err_free:
return err;
}
-int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- int ret;
- size_t required_cmd_sz;
-
- required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
+ struct ib_uverbs_ex_destroy_rwq_ind_table cmd;
+ int ret;
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
@@ -3461,12 +3192,10 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
- cmd.ind_tbl_handle, file, 0);
+ cmd.ind_tbl_handle, attrs);
}
-int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
@@ -3477,24 +3206,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_uflow_resources *uflow_res;
struct ib_uverbs_flow_spec_hdr *kern_spec;
- int err = 0;
+ struct uverbs_req_iter iter;
+ int err;
void *ib_spec;
int i;
struct ib_device *ib_dev;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- if (ucore->outlen < sizeof(resp))
- return -ENOSPC;
-
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf += sizeof(cmd);
- ucore->inlen -= sizeof(cmd);
-
if (cmd.comp_mask)
return -EINVAL;
@@ -3512,8 +3233,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
return -EINVAL;
- if (cmd.flow_attr.size > ucore->inlen ||
- cmd.flow_attr.size >
+ if (cmd.flow_attr.size >
(cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
return -EINVAL;
@@ -3528,21 +3248,25 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
return -ENOMEM;
*kern_flow_attr = cmd.flow_attr;
- err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore,
- cmd.flow_attr.size);
+ err = uverbs_request_next(&iter, &kern_flow_attr->flow_specs,
+ cmd.flow_attr.size);
if (err)
goto err_free_attr;
} else {
kern_flow_attr = &cmd.flow_attr;
}
- uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev);
+ err = uverbs_request_finish(&iter);
+ if (err)
+ goto err_free_attr;
+
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, attrs, &ib_dev);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto err_free_attr;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3553,11 +3277,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
goto err_put;
}
- if (!qp->device->create_flow) {
- err = -EOPNOTSUPP;
- goto err_put;
- }
-
flow_attr = kzalloc(struct_size(flow_attr, flows,
cmd.flow_attr.num_of_specs), GFP_KERNEL);
if (!flow_attr) {
@@ -3584,7 +3303,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
cmd.flow_attr.size >= kern_spec->size;
i++) {
err = kern_spec_to_ib_spec(
- file, (struct ib_uverbs_flow_spec *)kern_spec,
+ attrs, (struct ib_uverbs_flow_spec *)kern_spec,
ib_spec, uflow_res);
if (err)
goto err_free;
@@ -3602,8 +3321,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
goto err_free;
}
- flow_id = qp->device->create_flow(qp, flow_attr,
- IB_FLOW_DOMAIN_USER, uhw);
+ flow_id = qp->device->create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER,
+ &attrs->driver_udata);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
@@ -3615,8 +3334,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
- err = ib_copy_to_udata(ucore,
- &resp, sizeof(resp));
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
@@ -3624,7 +3342,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- return uobj_alloc_commit(uobj, 0);
+ return uobj_alloc_commit(uobj);
err_copy:
if (!qp->device->destroy_flow(flow_id))
atomic_dec(&qp->usecnt);
@@ -3642,28 +3360,22 @@ err_free_attr:
return err;
}
-int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_flow cmd;
int ret;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EINVAL;
- return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
- 0);
+ return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, attrs);
}
-static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
+static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
@@ -3676,7 +3388,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
int ret;
struct ib_device *ib_dev;
- obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file,
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -3686,7 +3398,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
- file);
+ attrs);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
@@ -3704,21 +3416,21 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (ib_srq_has_cq(cmd->srq_type)) {
attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
- cmd->cq_handle, file);
+ cmd->cq_handle, attrs);
if (!attr.ext.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
}
attr.event_handler = ib_uverbs_srq_event_handler;
- attr.srq_context = file;
+ attr.srq_context = attrs->ufile;
attr.srq_type = cmd->srq_type;
attr.attr.max_wr = cmd->max_wr;
attr.attr.max_sge = cmd->max_sge;
@@ -3763,11 +3475,9 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user(u64_to_user_ptr(cmd->response),
- &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
if (cmd->srq_type == IB_SRQT_XRC)
uobj_put_read(xrcd_uobj);
@@ -3776,7 +3486,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
uobj_put_obj_read(attr.ext.cq);
uobj_put_obj_read(pd);
- return uobj_alloc_commit(&obj->uevent.uobject, 0);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_copy:
ib_destroy_srq(srq);
@@ -3799,21 +3509,15 @@ err:
return ret;
}
-ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_xsrq xcmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&xcmd, 0, sizeof(xcmd));
xcmd.response = cmd.response;
@@ -3824,77 +3528,48 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
xcmd.max_sge = cmd.max_sge;
xcmd.srq_limit = cmd.srq_limit;
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
- ret = __uverbs_create_xsrq(file, &xcmd, &udata);
- if (ret)
- return ret;
-
- return in_len;
+ return __uverbs_create_xsrq(attrs, &xcmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_create_xsrq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_xsrq cmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
- ret = __uverbs_create_xsrq(file, &cmd, &udata);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- return in_len;
+ return __uverbs_create_xsrq(attrs, &cmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_modify_srq cmd;
- struct ib_udata udata;
struct ib_srq *srq;
struct ib_srq_attr attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
- out_len);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (!srq)
return -EINVAL;
attr.max_wr = cmd.max_wr;
attr.srq_limit = cmd.srq_limit;
- ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
+ ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask,
+ &attrs->driver_udata);
uobj_put_obj_read(srq);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_srq cmd;
struct ib_uverbs_query_srq_resp resp;
@@ -3902,13 +3577,11 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (!srq)
return -EINVAL;
@@ -3925,25 +3598,22 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
resp.max_sge = attr.max_sge;
resp.srq_limit = attr.srq_limit;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
struct ib_uobject *uobj;
struct ib_uevent_object *obj;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3953,35 +3623,24 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_query_device_resp resp = { {0} };
+ struct ib_uverbs_ex_query_device_resp resp = {};
struct ib_uverbs_ex_query_device cmd;
struct ib_device_attr attr = {0};
struct ib_ucontext *ucontext;
struct ib_device *ib_dev;
int err;
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
ib_dev = ucontext->device;
- if (!ib_dev->query_device)
- return -EOPNOTSUPP;
-
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
@@ -3991,20 +3650,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- resp.response_length = offsetof(typeof(resp), odp_caps);
-
- if (ucore->outlen < resp.response_length)
- return -ENOSPC;
-
- err = ib_dev->query_device(ib_dev, &attr, uhw);
+ err = ib_dev->query_device(ib_dev, &attr, &attrs->driver_udata);
if (err)
return err;
copy_query_dev_fields(ucontext, &resp.base, &attr);
- if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
- goto end;
-
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
resp.odp_caps.general_caps = attr.odp_caps.general_caps;
resp.odp_caps.per_transport_caps.rc_odp_caps =
@@ -4014,99 +3665,39 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.odp_caps.per_transport_caps.ud_odp_caps =
attr.odp_caps.per_transport_caps.ud_odp_caps;
#endif
- resp.response_length += sizeof(resp.odp_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
- goto end;
resp.timestamp_mask = attr.timestamp_mask;
- resp.response_length += sizeof(resp.timestamp_mask);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
- goto end;
-
resp.hca_core_clock = attr.hca_core_clock;
- resp.response_length += sizeof(resp.hca_core_clock);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
- goto end;
-
resp.device_cap_flags_ex = attr.device_cap_flags;
- resp.response_length += sizeof(resp.device_cap_flags_ex);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps))
- goto end;
-
resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts;
resp.rss_caps.max_rwq_indirection_tables =
attr.rss_caps.max_rwq_indirection_tables;
resp.rss_caps.max_rwq_indirection_table_size =
attr.rss_caps.max_rwq_indirection_table_size;
-
- resp.response_length += sizeof(resp.rss_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq))
- goto end;
-
resp.max_wq_type_rq = attr.max_wq_type_rq;
- resp.response_length += sizeof(resp.max_wq_type_rq);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
- goto end;
-
resp.raw_packet_caps = attr.raw_packet_caps;
- resp.response_length += sizeof(resp.raw_packet_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
- goto end;
-
resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size;
resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags;
resp.tm_caps.max_ops = attr.tm_caps.max_ops;
resp.tm_caps.max_sge = attr.tm_caps.max_sge;
resp.tm_caps.flags = attr.tm_caps.flags;
- resp.response_length += sizeof(resp.tm_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps))
- goto end;
-
resp.cq_moderation_caps.max_cq_moderation_count =
attr.cq_caps.max_cq_moderation_count;
resp.cq_moderation_caps.max_cq_moderation_period =
attr.cq_caps.max_cq_moderation_period;
- resp.response_length += sizeof(resp.cq_moderation_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size))
- goto end;
-
resp.max_dm_size = attr.max_dm_size;
- resp.response_length += sizeof(resp.max_dm_size);
-end:
- err = ib_copy_to_udata(ucore, &resp, resp.response_length);
- return err;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_cq cmd = {};
+ struct ib_uverbs_ex_modify_cq cmd;
struct ib_cq *cq;
- size_t required_cmd_sz;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), reserved) +
- sizeof(cmd.reserved);
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- /* sanity checks */
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
@@ -4116,7 +3707,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
if (cmd.attr_mask > IB_CQ_MODERATE)
return -EOPNOTSUPP;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
@@ -4126,3 +3717,381 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
return ret;
}
+
+/*
+ * Describe the input structs for write(). Some write methods have an input
+ * only struct, most have an input and output. If the struct has an output then
+ * the 'response' u64 must be the first field in the request structure.
+ *
+ * If udata is present then both the request and response structs have a
+ * trailing driver_data flex array. In this case the size of the base struct
+ * cannot be changed.
+ */
+#define offsetof_after(_struct, _member) \
+ (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member))
+
+#define UAPI_DEF_WRITE_IO(req, resp) \
+ .write.has_resp = 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \
+ BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \
+ sizeof(u64)), \
+ .write.req_size = sizeof(req), .write.resp_size = sizeof(resp)
+
+#define UAPI_DEF_WRITE_I(req) .write.req_size = sizeof(req)
+
+#define UAPI_DEF_WRITE_UDATA_IO(req, resp) \
+ UAPI_DEF_WRITE_IO(req, resp), \
+ .write.has_udata = \
+ 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req)) + \
+ BUILD_BUG_ON_ZERO(offsetof(resp, driver_data) != \
+ sizeof(resp))
+
+#define UAPI_DEF_WRITE_UDATA_I(req) \
+ UAPI_DEF_WRITE_I(req), \
+ .write.has_udata = \
+ 1 + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req))
+
+/*
+ * The _EX versions are for use with WRITE_EX and allow the last struct member
+ * to be specified. Buffers that do not include that member will be rejected.
+ */
+#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \
+ .write.has_resp = 1, \
+ .write.req_size = offsetof_after(req, req_last_member), \
+ .write.resp_size = offsetof_after(resp, resp_last_member)
+
+#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \
+ .write.req_size = offsetof_after(req, req_last_member)
+
+const struct uapi_definition uverbs_def_write_intf[] = {
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_AH,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_AH,
+ ib_uverbs_create_ah,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_ah,
+ struct ib_uverbs_create_ah_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_ah)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_AH,
+ ib_uverbs_destroy_ah,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_COMP_CHANNEL,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
+ ib_uverbs_create_comp_channel,
+ UAPI_DEF_WRITE_IO(
+ struct ib_uverbs_create_comp_channel,
+ struct ib_uverbs_create_comp_channel_resp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_CQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_CQ,
+ ib_uverbs_create_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_cq,
+ struct ib_uverbs_create_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_CQ,
+ ib_uverbs_destroy_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_cq,
+ struct ib_uverbs_destroy_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POLL_CQ,
+ ib_uverbs_poll_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_poll_cq,
+ struct ib_uverbs_poll_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(poll_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
+ ib_uverbs_req_notify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_req_notify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(req_notify_cq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_RESIZE_CQ,
+ ib_uverbs_resize_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_resize_cq,
+ struct ib_uverbs_resize_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(resize_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_CQ,
+ ib_uverbs_ex_create_cq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_cq,
+ reserved,
+ struct ib_uverbs_ex_create_cq_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_CQ,
+ ib_uverbs_ex_modify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_DEVICE,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_GET_CONTEXT,
+ ib_uverbs_get_context,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_get_context,
+ struct ib_uverbs_get_context_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_DEVICE,
+ ib_uverbs_query_device,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_device,
+ struct ib_uverbs_query_device_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_PORT,
+ ib_uverbs_query_port,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_port,
+ struct ib_uverbs_query_port_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_port)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_QUERY_DEVICE,
+ ib_uverbs_ex_query_device,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_query_device,
+ reserved,
+ struct ib_uverbs_ex_query_device_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(query_device)),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_ucontext),
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_ucontext)),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_FLOW,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_FLOW,
+ ib_uverbs_ex_create_flow,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_create_flow,
+ flow_attr,
+ struct ib_uverbs_create_flow_resp,
+ flow_handle),
+ UAPI_DEF_METHOD_NEEDS_FN(create_flow)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
+ ib_uverbs_ex_destroy_flow,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_flow),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_flow))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MR,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_DEREG_MR,
+ ib_uverbs_dereg_mr,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dereg_mr),
+ UAPI_DEF_METHOD_NEEDS_FN(dereg_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REG_MR,
+ ib_uverbs_reg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr,
+ struct ib_uverbs_reg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REREG_MR,
+ ib_uverbs_rereg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr,
+ struct ib_uverbs_rereg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MW,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_MW,
+ ib_uverbs_alloc_mw,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_mw,
+ struct ib_uverbs_alloc_mw_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_mw)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_MW,
+ ib_uverbs_dealloc_mw,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_mw),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_mw))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_PD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_PD,
+ ib_uverbs_alloc_pd,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_pd,
+ struct ib_uverbs_alloc_pd_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_pd)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_PD,
+ ib_uverbs_dealloc_pd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_pd),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_pd))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_QP,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ATTACH_MCAST,
+ ib_uverbs_attach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_QP,
+ ib_uverbs_create_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_qp,
+ struct ib_uverbs_create_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_QP,
+ ib_uverbs_destroy_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_qp,
+ struct ib_uverbs_destroy_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DETACH_MCAST,
+ ib_uverbs_detach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_detach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_QP,
+ ib_uverbs_modify_qp,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_modify_qp),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_RECV,
+ ib_uverbs_post_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_recv,
+ struct ib_uverbs_post_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SEND,
+ ib_uverbs_post_send,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_send,
+ struct ib_uverbs_post_send_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_send)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_QP,
+ ib_uverbs_query_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_qp,
+ struct ib_uverbs_query_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_QP,
+ ib_uverbs_ex_create_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_qp,
+ comp_mask,
+ struct ib_uverbs_ex_create_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_QP,
+ ib_uverbs_ex_modify_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_modify_qp,
+ base,
+ struct ib_uverbs_ex_modify_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
+ ib_uverbs_ex_create_rwq_ind_table,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_create_rwq_ind_table,
+ log_ind_tbl_size,
+ struct ib_uverbs_ex_create_rwq_ind_table_resp,
+ ind_tbl_num),
+ UAPI_DEF_METHOD_NEEDS_FN(create_rwq_ind_table)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL,
+ ib_uverbs_ex_destroy_rwq_ind_table,
+ UAPI_DEF_WRITE_I(
+ struct ib_uverbs_ex_destroy_rwq_ind_table),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_rwq_ind_table))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_WQ,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_WQ,
+ ib_uverbs_ex_create_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_wq,
+ max_sge,
+ struct ib_uverbs_ex_create_wq_resp,
+ wqn),
+ UAPI_DEF_METHOD_NEEDS_FN(create_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_WQ,
+ ib_uverbs_ex_destroy_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_destroy_wq,
+ wq_handle,
+ struct ib_uverbs_ex_destroy_wq_resp,
+ reserved),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_WQ,
+ ib_uverbs_ex_modify_wq,
+ UAPI_DEF_WRITE_I_EX(struct ib_uverbs_ex_modify_wq,
+ curr_wq_state),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_wq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_SRQ,
+ ib_uverbs_create_srq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_srq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_XSRQ,
+ ib_uverbs_create_xsrq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_xsrq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_SRQ,
+ ib_uverbs_destroy_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_srq,
+ struct ib_uverbs_destroy_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_SRQ,
+ ib_uverbs_modify_srq,
+ UAPI_DEF_WRITE_UDATA_I(struct ib_uverbs_modify_srq),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SRQ_RECV,
+ ib_uverbs_post_srq_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_srq_recv,
+ struct ib_uverbs_post_srq_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_srq_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_SRQ,
+ ib_uverbs_query_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_srq,
+ struct ib_uverbs_query_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_srq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_XRCD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CLOSE_XRCD,
+ ib_uverbs_close_xrcd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP,
+ ib_uverbs_open_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_qp,
+ struct ib_uverbs_create_qp_resp)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_XRCD,
+ ib_uverbs_open_xrcd,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_xrcd,
+ struct ib_uverbs_open_xrcd_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))),
+
+ {},
+};
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index b0e493e8d860..e643a43dce8d 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -404,8 +404,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
static int ib_uverbs_run_method(struct bundle_priv *pbundle,
unsigned int num_attrs)
{
- int (*handler)(struct ib_uverbs_file *ufile,
- struct uverbs_attr_bundle *ctx);
+ int (*handler)(struct uverbs_attr_bundle *attrs);
size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
unsigned int i;
@@ -436,6 +435,9 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
pbundle->method_elm->key_bitmap_len)))
return -EINVAL;
+ if (pbundle->method_elm->has_udata)
+ create_udata(&pbundle->bundle, &pbundle->bundle.driver_udata);
+
if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
struct uverbs_obj_attr *destroy_attr =
&pbundle->bundle.attrs[destroy_bkey].obj_attr;
@@ -445,10 +447,10 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
return ret;
__clear_bit(destroy_bkey, pbundle->uobj_finalize);
- ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+ ret = handler(&pbundle->bundle);
uobj_put_destroy(destroy_attr->uobject);
} else {
- ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+ ret = handler(&pbundle->bundle);
}
/*
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 6d373f5515b7..96a5f89bbb75 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -74,64 +74,6 @@ static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
static DEFINE_IDA(uverbs_ida);
-
-static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len) = {
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
- [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
- [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
- [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
- [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
- [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
- [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
- [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
- [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
- [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
- [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
- [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
- [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
- [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
-};
-
-static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw) = {
- [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
- [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
- [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
- [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
- [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
- [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
- [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
- [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
- [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
- [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
- [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp,
- [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq,
-};
-
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
@@ -139,7 +81,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
* Must be called with the ufile->device->disassociate_srcu held, and the lock
* must be held until use of the ucontext is finished.
*/
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
{
/*
* We do not hold the hw_destroy_rwsem lock for this flow, instead
@@ -157,7 +99,7 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
return ucontext;
}
-EXPORT_SYMBOL(ib_uverbs_get_ucontext);
+EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
int uverbs_dealloc_mw(struct ib_mw *mw)
{
@@ -646,51 +588,19 @@ err_put_refs:
return filp;
}
-static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command,
- bool extended)
-{
- if (!extended)
- return ufile->uverbs_cmd_mask & BIT_ULL(command);
-
- return ufile->uverbs_ex_cmd_mask & BIT_ULL(command);
-}
-
-static bool verify_command_idx(u32 command, bool extended)
-{
- if (extended)
- return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
- uverbs_ex_cmd_table[command];
-
- return command < ARRAY_SIZE(uverbs_cmd_table) &&
- uverbs_cmd_table[command];
-}
-
-static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
- u32 *command, bool *extended)
-{
- if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
- IB_USER_VERBS_CMD_COMMAND_MASK))
- return -EINVAL;
-
- *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
- *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
-
- if (!verify_command_idx(*command, *extended))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
- struct ib_uverbs_ex_cmd_hdr *ex_hdr,
- size_t count, bool extended)
+ struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
+ const struct uverbs_api_write_method *method_elm)
{
- if (extended) {
+ if (method_elm->is_ex) {
count -= sizeof(*hdr) + sizeof(*ex_hdr);
if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
return -EINVAL;
+ if (hdr->in_words * 8 < method_elm->req_size)
+ return -ENOSPC;
+
if (ex_hdr->cmd_hdr_reserved)
return -EINVAL;
@@ -698,6 +608,9 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
if (!hdr->out_words && !ex_hdr->provider_out_words)
return -EINVAL;
+ if (hdr->out_words * 8 < method_elm->resp_size)
+ return -ENOSPC;
+
if (!access_ok(VERIFY_WRITE,
u64_to_user_ptr(ex_hdr->response),
(hdr->out_words + ex_hdr->provider_out_words) * 8))
@@ -714,6 +627,24 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
if (hdr->in_words * 4 != count)
return -EINVAL;
+ if (count < method_elm->req_size + sizeof(hdr)) {
+ /*
+ * rdma-core v18 and v19 have a bug where they send DESTROY_CQ
+ * with a 16 byte write instead of 24. Old kernels didn't
+ * check the size so they allowed this. Now that the size is
+ * checked provide a compatibility work around to not break
+ * those userspaces.
+ */
+ if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
+ count == 16) {
+ hdr->in_words = 6;
+ return 0;
+ }
+ return -ENOSPC;
+ }
+ if (hdr->out_words * 4 < method_elm->resp_size)
+ return -ENOSPC;
+
return 0;
}
@@ -721,11 +652,12 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
+ const struct uverbs_api_write_method *method_elm;
+ struct uverbs_api *uapi = file->device->uapi;
struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_uverbs_cmd_hdr hdr;
- bool extended;
+ struct uverbs_attr_bundle bundle;
int srcu_key;
- u32 command;
ssize_t ret;
if (!ib_safe_file_access(filp)) {
@@ -740,57 +672,92 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- ret = process_hdr(&hdr, &command, &extended);
- if (ret)
- return ret;
+ method_elm = uapi_get_method(uapi, hdr.command);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
- if (extended) {
+ if (method_elm->is_ex) {
if (count < (sizeof(hdr) + sizeof(ex_hdr)))
return -EINVAL;
if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
return -EFAULT;
}
- ret = verify_hdr(&hdr, &ex_hdr, count, extended);
+ ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
if (ret)
return ret;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- if (!verify_command_mask(file, command, extended)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
-
buf += sizeof(hdr);
- if (!extended) {
- ret = uverbs_cmd_table[command](file, buf,
- hdr.in_words * 4,
- hdr.out_words * 4);
- } else {
- struct ib_udata ucore;
- struct ib_udata uhw;
+ bundle.ufile = file;
+ if (!method_elm->is_ex) {
+ size_t in_len = hdr.in_words * 4 - sizeof(hdr);
+ size_t out_len = hdr.out_words * 4;
+ u64 response = 0;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.inlen =
+ in_len - method_elm->req_size;
+ in_len = method_elm->req_size;
+ if (bundle.driver_udata.inlen)
+ bundle.driver_udata.inbuf = buf + in_len;
+ else
+ bundle.driver_udata.inbuf = NULL;
+ } else {
+ memset(&bundle.driver_udata, 0,
+ sizeof(bundle.driver_udata));
+ }
+
+ if (method_elm->has_resp) {
+ /*
+ * The macros check that if has_resp is set
+ * then the command request structure starts
+ * with a '__aligned u64 response' member.
+ */
+ ret = get_user(response, (const u64 *)buf);
+ if (ret)
+ goto out_unlock;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.outlen =
+ out_len - method_elm->resp_size;
+ out_len = method_elm->resp_size;
+ if (bundle.driver_udata.outlen)
+ bundle.driver_udata.outbuf =
+ u64_to_user_ptr(response +
+ out_len);
+ else
+ bundle.driver_udata.outbuf = NULL;
+ }
+ } else {
+ bundle.driver_udata.outlen = 0;
+ bundle.driver_udata.outbuf = NULL;
+ }
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.ucore, buf, u64_to_user_ptr(response),
+ in_len, out_len);
+ } else {
buf += sizeof(ex_hdr);
- ib_uverbs_init_udata_buf_or_null(&ucore, buf,
+ ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
u64_to_user_ptr(ex_hdr.response),
hdr.in_words * 8, hdr.out_words * 8);
- ib_uverbs_init_udata_buf_or_null(&uhw,
- buf + ucore.inlen,
- u64_to_user_ptr(ex_hdr.response) + ucore.outlen,
- ex_hdr.provider_in_words * 8,
- ex_hdr.provider_out_words * 8);
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.driver_udata, buf + bundle.ucore.inlen,
+ u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen,
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
- ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
- ret = (ret) ? : count;
}
-out:
+ ret = method_elm->handler(&bundle);
+out_unlock:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
- return ret;
+ return (ret) ? : count;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -801,7 +768,7 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
int srcu_key;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext_file(file);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
goto out;
@@ -1102,9 +1069,6 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
- file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask;
- file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask;
-
setup_ufile_idr_uobject(file);
return nonseekable_open(inode, filp);
@@ -1224,7 +1188,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
{
struct uverbs_api *uapi;
- uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
+ uapi = uverbs_alloc_api(device);
if (IS_ERR(uapi))
return PTR_ERR(uapi);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 203cc96ac6f5..063aff9e7a04 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -210,8 +210,7 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
return 0;
};
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
{
return 0;
}
@@ -262,25 +261,28 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE);
-DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
- &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
- &UVERBS_OBJECT(UVERBS_OBJECT_PD),
- &UVERBS_OBJECT(UVERBS_OBJECT_MR),
- &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
- &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_QP),
- &UVERBS_OBJECT(UVERBS_OBJECT_AH),
- &UVERBS_OBJECT(UVERBS_OBJECT_MW),
- &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
- &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
- &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
- &UVERBS_OBJECT(UVERBS_OBJECT_DM),
- &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
-
-const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
- return &uverbs_default_objects;
-}
+const struct uapi_definition uverbs_def_obj_intf[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_PD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_XRCD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index a0ffdcf9a51c..8835bad5c6dd 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -48,7 +48,7 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
@@ -82,7 +82,7 @@ err_create_counters:
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_counters_read_attr read_attr = {};
const struct uverbs_attr *uattr;
@@ -149,3 +149,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
+
+const struct uapi_definition uverbs_def_obj_counters[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COUNTERS,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_counters)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 5b5f2052cd52..859518eab583 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -58,13 +58,12 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_ucq_object *obj = container_of(
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
typeof(*obj), uobject);
struct ib_device *ib_dev = obj->uobject.context->device;
- struct ib_udata uhw;
int ret;
u64 user_handle;
struct ib_cq_init_attr attr = {};
@@ -101,7 +100,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
uverbs_uobject_get(ev_file_uobj);
}
- if (attr.comp_vector >= file->device->num_comp_vectors) {
+ if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) {
ret = -EINVAL;
goto err_event_file;
}
@@ -111,10 +110,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
INIT_LIST_HEAD(&obj->comp_list);
INIT_LIST_HEAD(&obj->async_list);
- /* Temporary, only until drivers get the new uverbs_attr_bundle */
- create_udata(attrs, &uhw);
-
- cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context,
+ &attrs->driver_udata);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_event_file;
@@ -173,7 +170,7 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_UHW());
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj =
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
@@ -207,3 +204,9 @@ DECLARE_UVERBS_NAMED_OBJECT(
&UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
#endif
);
+
+const struct uapi_definition uverbs_def_obj_cq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_CQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_cq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index edc3ff7733d4..658261b8f08e 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -46,9 +46,8 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
return dm->device->dealloc_dm(dm);
}
-static int
-UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
{
struct ib_dm_alloc_attr attr = {};
struct ib_uobject *uobj =
@@ -109,3 +108,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
&UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
&UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
+
+const struct uapi_definition uverbs_def_obj_dm[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_dm)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index cb9486ad5c67..e4d01fb5335d 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -223,7 +223,6 @@ struct ib_flow_action_esp_attr {
#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
static int parse_flow_action_esp(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs,
struct ib_flow_action_esp_attr *esp_attr,
bool is_modify)
@@ -305,7 +304,7 @@ static int parse_flow_action_esp(struct ib_device *ib_dev,
}
static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
@@ -317,7 +316,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
if (!ib_dev->create_flow_action_esp)
return -EOPNOTSUPP;
- ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false);
+ ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false);
if (ret)
return ret;
@@ -333,7 +332,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
}
static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
@@ -344,8 +343,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
if (!action->device->modify_flow_action_esp)
return -EOPNOTSUPP;
- ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr,
- true);
+ ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true);
if (ret)
return ret;
@@ -438,3 +436,10 @@ DECLARE_UVERBS_NAMED_OBJECT(
&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+
+const struct uapi_definition uverbs_def_obj_flow_action[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_FLOW_ACTION,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow_action)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index cf02e774303e..70ea48cfc047 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -40,7 +40,7 @@ static int uverbs_free_mr(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_dm_mr_attr attr = {};
struct ib_uobject *uobj =
@@ -147,3 +147,9 @@ DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_MR,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
&UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
+
+const struct uapi_definition uverbs_def_obj_mr[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
+ UAPI_DEF_OBJ_NEEDS_FN(dereg_mr)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 86f3fc5e04b4..0136c1d78a0f 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -8,6 +8,11 @@
#include "rdma_core.h"
#include "uverbs.h"
+static int ib_uverbs_notsupp(struct uverbs_attr_bundle *attrs)
+{
+ return -EOPNOTSUPP;
+}
+
static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
{
void *elm;
@@ -26,6 +31,70 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
return elm;
}
+static void *uapi_add_get_elm(struct uverbs_api *uapi, u32 key,
+ size_t alloc_size, bool *exists)
+{
+ void *elm;
+
+ elm = uapi_add_elm(uapi, key, alloc_size);
+ if (!IS_ERR(elm)) {
+ *exists = false;
+ return elm;
+ }
+
+ if (elm != ERR_PTR(-EEXIST))
+ return elm;
+
+ elm = radix_tree_lookup(&uapi->radix, key);
+ if (WARN_ON(!elm))
+ return ERR_PTR(-EINVAL);
+ *exists = true;
+ return elm;
+}
+
+static int uapi_create_write(struct uverbs_api *uapi,
+ struct ib_device *ibdev,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 *cur_method_key)
+{
+ struct uverbs_api_write_method *method_elm;
+ u32 method_key = obj_key;
+ bool exists;
+
+ if (def->write.is_ex)
+ method_key |= uapi_key_write_ex_method(def->write.command_num);
+ else
+ method_key |= uapi_key_write_method(def->write.command_num);
+
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+
+ if (WARN_ON(exists && (def->write.is_ex != method_elm->is_ex)))
+ return -EINVAL;
+
+ method_elm->is_ex = def->write.is_ex;
+ method_elm->handler = def->func_write;
+ if (def->write.is_ex)
+ method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask &
+ BIT_ULL(def->write.command_num));
+ else
+ method_elm->disabled = !(ibdev->uverbs_cmd_mask &
+ BIT_ULL(def->write.command_num));
+
+ if (!def->write.is_ex && def->func_write) {
+ method_elm->has_udata = def->write.has_udata;
+ method_elm->has_resp = def->write.has_resp;
+ method_elm->req_size = def->write.req_size;
+ method_elm->resp_size = def->write.resp_size;
+ }
+
+ *cur_method_key = method_key;
+ return 0;
+}
+
static int uapi_merge_method(struct uverbs_api *uapi,
struct uverbs_api_object *obj_elm, u32 obj_key,
const struct uverbs_method_def *method,
@@ -34,23 +103,21 @@ static int uapi_merge_method(struct uverbs_api *uapi,
u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
struct uverbs_api_ioctl_method *method_elm;
unsigned int i;
+ bool exists;
if (!method->attrs)
return 0;
- method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm));
- if (IS_ERR(method_elm)) {
- if (method_elm != ERR_PTR(-EEXIST))
- return PTR_ERR(method_elm);
-
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ if (exists) {
/*
* This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
*/
if (WARN_ON(method->handler))
return -EINVAL;
- method_elm = radix_tree_lookup(&uapi->radix, method_key);
- if (WARN_ON(!method_elm))
- return -EINVAL;
} else {
WARN_ON(!method->handler);
rcu_assign_pointer(method_elm->handler, method->handler);
@@ -98,72 +165,182 @@ static int uapi_merge_method(struct uverbs_api *uapi,
return 0;
}
-static int uapi_merge_tree(struct uverbs_api *uapi,
- const struct uverbs_object_tree_def *tree,
- bool is_driver)
+static int uapi_merge_obj_tree(struct uverbs_api *uapi,
+ const struct uverbs_object_def *obj,
+ bool is_driver)
{
- unsigned int i, j;
+ struct uverbs_api_object *obj_elm;
+ unsigned int i;
+ u32 obj_key;
+ bool exists;
int rc;
- if (!tree->objects)
+ obj_key = uapi_key_obj(obj->id);
+ obj_elm = uapi_add_get_elm(uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+
+ if (obj->type_attrs) {
+ if (WARN_ON(obj_elm->type_attrs))
+ return -EINVAL;
+
+ obj_elm->id = obj->id;
+ obj_elm->type_attrs = obj->type_attrs;
+ obj_elm->type_class = obj->type_attrs->type_class;
+ /*
+ * Today drivers are only permitted to use idr_class
+ * types. They cannot use FD types because we currently have
+ * no way to revoke the fops pointer after device
+ * disassociation.
+ */
+ if (WARN_ON(is_driver &&
+ obj->type_attrs->type_class != &uverbs_idr_class))
+ return -EINVAL;
+ }
+
+ if (!obj->methods)
return 0;
- for (i = 0; i != tree->num_objects; i++) {
- const struct uverbs_object_def *obj = (*tree->objects)[i];
- struct uverbs_api_object *obj_elm;
- u32 obj_key;
+ for (i = 0; i != obj->num_methods; i++) {
+ const struct uverbs_method_def *method = (*obj->methods)[i];
- if (!obj)
+ if (!method)
continue;
- obj_key = uapi_key_obj(obj->id);
- obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm));
- if (IS_ERR(obj_elm)) {
- if (obj_elm != ERR_PTR(-EEXIST))
- return PTR_ERR(obj_elm);
+ rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
+ is_driver);
+ if (rc)
+ return rc;
+ }
- /* This occurs when a driver uses ADD_UVERBS_METHODS */
- if (WARN_ON(obj->type_attrs))
- return -EINVAL;
- obj_elm = radix_tree_lookup(&uapi->radix, obj_key);
- if (WARN_ON(!obj_elm))
+ return 0;
+}
+
+static int uapi_disable_elm(struct uverbs_api *uapi,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 method_key)
+{
+ bool exists;
+
+ if (def->scope == UAPI_SCOPE_OBJECT) {
+ struct uverbs_api_object *obj_elm;
+
+ obj_elm = uapi_add_get_elm(
+ uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ obj_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ uapi_key_is_ioctl_method(method_key)) {
+ struct uverbs_api_ioctl_method *method_elm;
+
+ method_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*method_elm), &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ method_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ (uapi_key_is_write_method(method_key) ||
+ uapi_key_is_write_ex_method(method_key))) {
+ struct uverbs_api_write_method *write_elm;
+
+ write_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*write_elm), &exists);
+ if (IS_ERR(write_elm))
+ return PTR_ERR(write_elm);
+ write_elm->disabled = 1;
+ return 0;
+ }
+
+ WARN_ON(true);
+ return -EINVAL;
+}
+
+static int uapi_merge_def(struct uverbs_api *uapi, struct ib_device *ibdev,
+ const struct uapi_definition *def_list,
+ bool is_driver)
+{
+ const struct uapi_definition *def = def_list;
+ u32 cur_obj_key = UVERBS_API_KEY_ERR;
+ u32 cur_method_key = UVERBS_API_KEY_ERR;
+ bool exists;
+ int rc;
+
+ if (!def_list)
+ return 0;
+
+ for (;; def++) {
+ switch ((enum uapi_definition_kind)def->kind) {
+ case UAPI_DEF_CHAIN:
+ rc = uapi_merge_def(uapi, ibdev, def->chain, is_driver);
+ if (rc)
+ return rc;
+ continue;
+
+ case UAPI_DEF_CHAIN_OBJ_TREE:
+ if (WARN_ON(def->object_start.object_id !=
+ def->chain_obj_tree->id))
return -EINVAL;
- } else {
- obj_elm->type_attrs = obj->type_attrs;
- if (obj->type_attrs) {
- obj_elm->type_class =
- obj->type_attrs->type_class;
- /*
- * Today drivers are only permitted to use
- * idr_class types. They cannot use FD types
- * because we currently have no way to revoke
- * the fops pointer after device
- * disassociation.
- */
- if (WARN_ON(is_driver &&
- obj->type_attrs->type_class !=
- &uverbs_idr_class))
- return -EINVAL;
- }
- }
- if (!obj->methods)
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ rc = uapi_merge_obj_tree(uapi, def->chain_obj_tree,
+ is_driver);
+ if (rc)
+ return rc;
continue;
- for (j = 0; j != obj->num_methods; j++) {
- const struct uverbs_method_def *method =
- (*obj->methods)[j];
- if (!method)
+ case UAPI_DEF_END:
+ return 0;
+
+ case UAPI_DEF_IS_SUPPORTED_DEV_FN: {
+ void **ibdev_fn = (void *)ibdev + def->needs_fn_offset;
+
+ if (*ibdev_fn)
continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
- rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
- is_driver);
+ case UAPI_DEF_IS_SUPPORTED_FUNC:
+ if (def->func_is_supported(ibdev))
+ continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
if (rc)
return rc;
+ continue;
+
+ case UAPI_DEF_OBJECT_START: {
+ struct uverbs_api_object *obj_elm;
+
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ obj_elm = uapi_add_get_elm(uapi, cur_obj_key,
+ sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ continue;
}
- }
- return 0;
+ case UAPI_DEF_WRITE:
+ rc = uapi_create_write(
+ uapi, ibdev, def, cur_obj_key, &cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
+ WARN_ON(true);
+ return -EINVAL;
+ }
}
static int
@@ -186,13 +363,16 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
u32 attr_bkey = uapi_bkey_attr(attr_key);
u8 type = elm->spec.type;
- if (uapi_key_attr_to_method(iter.index) !=
- uapi_key_attr_to_method(method_key))
+ if (uapi_key_attr_to_ioctl_method(iter.index) !=
+ uapi_key_attr_to_ioctl_method(method_key))
break;
if (elm->spec.mandatory)
__set_bit(attr_bkey, method_elm->attr_mandatory);
+ if (elm->spec.is_udata)
+ method_elm->has_udata = true;
+
if (type == UVERBS_ATTR_TYPE_IDR ||
type == UVERBS_ATTR_TYPE_FD) {
u8 access = elm->spec.u.obj.access;
@@ -229,9 +409,13 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
static int uapi_finalize(struct uverbs_api *uapi)
{
+ const struct uverbs_api_write_method **data;
+ unsigned long max_write_ex = 0;
+ unsigned long max_write = 0;
struct radix_tree_iter iter;
void __rcu **slot;
int rc;
+ int i;
radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
struct uverbs_api_ioctl_method *method_elm =
@@ -243,29 +427,208 @@ static int uapi_finalize(struct uverbs_api *uapi)
if (rc)
return rc;
}
+
+ if (uapi_key_is_write_method(iter.index))
+ max_write = max(max_write,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ if (uapi_key_is_write_ex_method(iter.index))
+ max_write_ex =
+ max(max_write_ex,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ }
+
+ uapi->notsupp_method.handler = ib_uverbs_notsupp;
+ uapi->num_write = max_write + 1;
+ uapi->num_write_ex = max_write_ex + 1;
+ data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
+ sizeof(*uapi->write_methods), GFP_KERNEL);
+ for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
+ data[i] = &uapi->notsupp_method;
+ uapi->write_methods = data;
+ uapi->write_ex_methods = data + uapi->num_write;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_write_method(iter.index))
+ uapi->write_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
+ if (uapi_key_is_write_ex_method(iter.index))
+ uapi->write_ex_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
}
return 0;
}
-void uverbs_destroy_api(struct uverbs_api *uapi)
+static void uapi_remove_range(struct uverbs_api *uapi, u32 start, u32 last)
{
struct radix_tree_iter iter;
void __rcu **slot;
- if (!uapi)
- return;
-
- radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, start) {
+ if (iter.index > last)
+ return;
kfree(rcu_dereference_protected(*slot, true));
radix_tree_iter_delete(&uapi->radix, &iter, slot);
}
+}
+
+static void uapi_remove_object(struct uverbs_api *uapi, u32 obj_key)
+{
+ uapi_remove_range(uapi, obj_key,
+ obj_key | UVERBS_API_METHOD_KEY_MASK |
+ UVERBS_API_ATTR_KEY_MASK);
+}
+
+static void uapi_remove_method(struct uverbs_api *uapi, u32 method_key)
+{
+ uapi_remove_range(uapi, method_key,
+ method_key | UVERBS_API_ATTR_KEY_MASK);
+}
+
+
+static u32 uapi_get_obj_id(struct uverbs_attr_spec *spec)
+{
+ if (spec->type == UVERBS_ATTR_TYPE_IDR ||
+ spec->type == UVERBS_ATTR_TYPE_FD)
+ return spec->u.obj.obj_type;
+ if (spec->type == UVERBS_ATTR_TYPE_IDRS_ARRAY)
+ return spec->u2.objs_arr.obj_type;
+ return UVERBS_API_KEY_ERR;
+}
+
+static void uapi_key_okay(u32 key)
+{
+ unsigned int count = 0;
+
+ if (uapi_key_is_object(key))
+ count++;
+ if (uapi_key_is_ioctl_method(key))
+ count++;
+ if (uapi_key_is_write_method(key))
+ count++;
+ if (uapi_key_is_write_ex_method(key))
+ count++;
+ if (uapi_key_is_attr(key))
+ count++;
+ WARN(count != 1, "Bad count %d key=%x", count, key);
+}
+
+static void uapi_finalize_disable(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ u32 starting_key = 0;
+ bool scan_again = false;
+ void __rcu **slot;
+
+again:
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, starting_key) {
+ uapi_key_okay(iter.index);
+
+ if (uapi_key_is_object(iter.index)) {
+ struct uverbs_api_object *obj_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (obj_elm->disabled) {
+ /* Have to check all the attrs again */
+ scan_again = true;
+ starting_key = iter.index;
+ uapi_remove_object(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ starting_key = iter.index;
+ uapi_remove_method(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_write_method(iter.index) ||
+ uapi_key_is_write_ex_method(iter.index)) {
+ struct uverbs_api_write_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ kfree(method_elm);
+ radix_tree_iter_delete(&uapi->radix, &iter, slot);
+ }
+ continue;
+ }
+
+ if (uapi_key_is_attr(iter.index)) {
+ struct uverbs_api_attr *attr_elm =
+ rcu_dereference_protected(*slot, true);
+ const struct uverbs_api_object *tmp_obj;
+ u32 obj_key;
+
+ /*
+ * If the method has a mandatory object handle
+ * attribute which relies on an object which is not
+ * present then the entire method is uncallable.
+ */
+ if (!attr_elm->spec.mandatory)
+ continue;
+ obj_key = uapi_get_obj_id(&attr_elm->spec);
+ if (obj_key == UVERBS_API_KEY_ERR)
+ continue;
+ tmp_obj = uapi_get_object(uapi, obj_key);
+ if (IS_ERR(tmp_obj)) {
+ if (PTR_ERR(tmp_obj) == -ENOMSG)
+ continue;
+ } else {
+ if (!tmp_obj->disabled)
+ continue;
+ }
+
+ starting_key = iter.index;
+ uapi_remove_method(
+ uapi,
+ iter.index & (UVERBS_API_OBJ_KEY_MASK |
+ UVERBS_API_METHOD_KEY_MASK));
+ goto again;
+ }
+
+ WARN_ON(false);
+ }
+
+ if (!scan_again)
+ return;
+ scan_again = false;
+ starting_key = 0;
+ goto again;
+}
+
+void uverbs_destroy_api(struct uverbs_api *uapi)
+{
+ if (!uapi)
+ return;
+
+ uapi_remove_range(uapi, 0, U32_MAX);
+ kfree(uapi->write_methods);
kfree(uapi);
}
-struct uverbs_api *uverbs_alloc_api(
- const struct uverbs_object_tree_def *const *driver_specs,
- enum rdma_driver_id driver_id)
+static const struct uapi_definition uverbs_core_api[] = {
+ UAPI_DEF_CHAIN(uverbs_def_obj_counters),
+ UAPI_DEF_CHAIN(uverbs_def_obj_cq),
+ UAPI_DEF_CHAIN(uverbs_def_obj_dm),
+ UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
+ UAPI_DEF_CHAIN(uverbs_def_obj_intf),
+ UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+ UAPI_DEF_CHAIN(uverbs_def_write_intf),
+ {},
+};
+
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev)
{
struct uverbs_api *uapi;
int rc;
@@ -275,18 +638,16 @@ struct uverbs_api *uverbs_alloc_api(
return ERR_PTR(-ENOMEM);
INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
- uapi->driver_id = driver_id;
+ uapi->driver_id = ibdev->driver_id;
- rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false);
+ rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false);
+ if (rc)
+ goto err;
+ rc = uapi_merge_def(uapi, ibdev, ibdev->driver_def, true);
if (rc)
goto err;
- for (; driver_specs && *driver_specs; driver_specs++) {
- rc = uapi_merge_tree(uapi, *driver_specs, true);
- if (rc)
- goto err;
- }
-
+ uapi_finalize_disable(uapi);
rc = uapi_finalize(uapi);
if (rc)
goto err;
@@ -294,8 +655,9 @@ struct uverbs_api *uverbs_alloc_api(
return uapi;
err:
if (rc != -ENOMEM)
- pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
- rc);
+ dev_err(&ibdev->dev,
+ "Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
+ rc);
uverbs_destroy_api(uapi);
return ERR_PTR(rc);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 178899e3ce73..fb2fc0c7ecfb 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -141,6 +141,10 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
case IB_RATE_100_GBPS: return 40;
case IB_RATE_200_GBPS: return 80;
case IB_RATE_300_GBPS: return 120;
+ case IB_RATE_28_GBPS: return 11;
+ case IB_RATE_50_GBPS: return 20;
+ case IB_RATE_400_GBPS: return 160;
+ case IB_RATE_600_GBPS: return 240;
default: return -1;
}
}
@@ -166,6 +170,10 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
case 40: return IB_RATE_100_GBPS;
case 80: return IB_RATE_200_GBPS;
case 120: return IB_RATE_300_GBPS;
+ case 11: return IB_RATE_28_GBPS;
+ case 20: return IB_RATE_50_GBPS;
+ case 160: return IB_RATE_400_GBPS;
+ case 240: return IB_RATE_600_GBPS;
default: return IB_RATE_PORT_CURRENT;
}
}
@@ -191,6 +199,10 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
case IB_RATE_100_GBPS: return 103125;
case IB_RATE_200_GBPS: return 206250;
case IB_RATE_300_GBPS: return 309375;
+ case IB_RATE_28_GBPS: return 28125;
+ case IB_RATE_50_GBPS: return 53125;
+ case IB_RATE_400_GBPS: return 425000;
+ case IB_RATE_600_GBPS: return 637500;
default: return -1;
}
}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index dcb4bba522ba..df4f7a3f043d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -291,13 +291,12 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
if (!wq->sq)
goto err3;
- wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+ wq->queue = dma_zalloc_coherent(&(rdev_p->rnic_info.pdev->dev),
depth * sizeof(union t3_wr),
&(wq->dma_addr), GFP_KERNEL);
if (!wq->queue)
goto err4;
- memset(wq->queue, 0, depth * sizeof(union t3_wr));
dma_unmap_addr_set(wq, mapping, wq->dma_addr);
wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
if (!kernel_domain)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 615413bd3e8d..659175c9ae91 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2795,7 +2795,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
break;
case MPA_REQ_SENT:
(void)stop_ep_timer(ep);
- if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
+ if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
+ (mpa_rev == 2 && ep->tried_with_mpa_v1))
connect_reply_upcall(ep, -ECONNRESET);
else {
/*
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 13478f3b7057..5a8030bd4208 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2564,13 +2564,12 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
T4_RQT_ENTRY_SHIFT;
- wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+ wq->queue = dma_zalloc_coherent(&rdev->lldi.pdev->dev,
wq->memsize, &wq->dma_addr,
GFP_KERNEL);
if (!wq->queue)
goto err_free_rqtpool;
- memset(wq->queue, 0, wq->memsize);
dma_unmap_addr_set(wq, mapping, wq->dma_addr);
wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index ff790390c91a..3ce9dc8c3463 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -34,6 +34,7 @@ hfi1-y := \
ruc.o \
sdma.o \
sysfs.o \
+ tid_rdma.o \
trace.o \
uc.o \
ud.o \
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9b20479dc710..385c33745c9f 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
static void log_physical_state(struct hfi1_pportdata *ppd, u32 state);
static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
@@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
break;
ppd->port_error_action = 0;
- ppd->host_link_state = HLS_DN_POLL;
if (quick_linkup) {
/* quick linkup does not go into polling */
ret = do_quick_linkup(dd);
} else {
ret1 = set_physical_link_state(dd, PLS_POLLING);
+ if (!ret1)
+ ret1 = wait_phys_link_out_of_offline(ppd,
+ 3000);
if (ret1 != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to transition to Polling link state, return 0x%x\n",
@@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret = -EINVAL;
}
}
+
+ /*
+ * Change the host link state after requesting DC8051 to
+ * change its physical state so that we can ignore any
+ * interrupt with stale LNI(XX) error, which will not be
+ * cleared until DC8051 transitions to Polling state.
+ */
+ ppd->host_link_state = HLS_DN_POLL;
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
/*
@@ -12927,6 +12939,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
return read_state;
}
+/*
+ * wait_phys_link_out_of_offline - wait for any out of offline state
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any out of offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs)
+{
+ u32 read_state;
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ read_state = read_physical_state(ppd->dd);
+ if ((read_state & 0xF0) != PLS_OFFLINE)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n",
+ read_state, msecs);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ log_state_transition(ppd, read_state);
+ return read_state;
+}
+
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index c6163a347e93..c0800ea5a3f8 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -935,6 +935,10 @@
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018)
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32
+#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull
#define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028)
#define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull
#define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020)
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 7108d4d92259..40d3cfb58bd1 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -136,18 +136,21 @@
HFI1_CAP_ALLOW_PERM_JKEY | \
HFI1_CAP_STATIC_RATE_CTRL | \
HFI1_CAP_PRINT_UNIMPL | \
- HFI1_CAP_TID_UNMAP)
+ HFI1_CAP_TID_UNMAP | \
+ HFI1_CAP_OPFN)
/*
* A set of capability bits that are "global" and are not allowed to be
* set in the user bitmask.
*/
#define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \
- HFI1_CAP_USE_SDMA_HEAD | \
- HFI1_CAP_EXTENDED_PSN | \
- HFI1_CAP_PRINT_UNIMPL | \
- HFI1_CAP_NO_INTEGRITY | \
- HFI1_CAP_PKEY_CHECK) << \
- HFI1_CAP_USER_SHIFT)
+ HFI1_CAP_USE_SDMA_HEAD | \
+ HFI1_CAP_EXTENDED_PSN | \
+ HFI1_CAP_PRINT_UNIMPL | \
+ HFI1_CAP_NO_INTEGRITY | \
+ HFI1_CAP_PKEY_CHECK | \
+ HFI1_CAP_TID_RDMA | \
+ HFI1_CAP_OPFN) << \
+ HFI1_CAP_USER_SHIFT)
/*
* Set of capabilities that need to be enabled for kernel context in
* order to be allowed for user contexts, as well.
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 9f992ae36c89..0a557795563c 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -407,6 +407,54 @@ DEBUGFS_SEQ_FILE_OPS(rcds);
DEBUGFS_SEQ_FILE_OPEN(rcds)
DEBUGFS_FILE_OPS(rcds);
+static void *_pios_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd;
+ struct hfi1_devdata *dd;
+
+ ibd = (struct hfi1_ibdev *)s->private;
+ dd = dd_from_dev(ibd);
+ if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+ return NULL;
+ return pos;
+}
+
+static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ ++*pos;
+ if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+ return NULL;
+ return pos;
+}
+
+static void _pios_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _pios_seq_show(struct seq_file *s, void *v)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct send_context_info *sci;
+ loff_t *spos = v;
+ loff_t i = *spos;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sc_lock, flags);
+ sci = &dd->send_contexts[i];
+ if (sci && sci->type != SC_USER && sci->allocated && sci->sc)
+ seqfile_dump_sci(s, i, sci);
+ spin_unlock_irqrestore(&dd->sc_lock, flags);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(pios);
+DEBUGFS_SEQ_FILE_OPEN(pios)
+DEBUGFS_FILE_OPS(pios);
+
/* read the per-device counters */
static ssize_t dev_counters_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -1143,6 +1191,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd);
+ DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index a41f85558312..a8ad70730203 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -430,40 +430,60 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = {
[HFI1_PKT_TYPE_16B] = &return_cnp_16B
};
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp)
+/**
+ * hfi1_process_ecn_slowpath - Process FECN or BECN bits
+ * @qp: The packet's destination QP
+ * @pkt: The packet itself.
+ * @prescan: Is the caller the RXQ prescan
+ *
+ * Process the packet's FECN or BECN bits. By now, the packet
+ * has already been evaluated whether processing of those bit should
+ * be done.
+ * The significance of the @prescan argument is that if the caller
+ * is the RXQ prescan, a CNP will be send out instead of waiting for the
+ * normal packet processing to send an ACK with BECN set (or a CNP).
+ */
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool prescan)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = pkt->grh;
- u32 rqpn = 0, bth1;
+ u32 rqpn = 0;
u16 pkey;
u32 rlid, slid, dlid = 0;
- u8 hdr_type, sc, svc_type;
- bool is_mcast = false;
+ u8 hdr_type, sc, svc_type, opcode;
+ bool is_mcast = false, ignore_fecn = false, do_cnp = false,
+ fecn, becn;
/* can be called from prescan */
if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
- is_mcast = hfi1_is_16B_mcast(dlid);
pkey = hfi1_16B_get_pkey(pkt->hdr);
sc = hfi1_16B_get_sc(pkt->hdr);
dlid = hfi1_16B_get_dlid(pkt->hdr);
slid = hfi1_16B_get_slid(pkt->hdr);
+ is_mcast = hfi1_is_16B_mcast(dlid);
+ opcode = ib_bth_get_opcode(ohdr);
hdr_type = HFI1_PKT_TYPE_16B;
+ fecn = hfi1_16B_get_fecn(pkt->hdr);
+ becn = hfi1_16B_get_becn(pkt->hdr);
} else {
- is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
- (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
pkey = ib_bth_get_pkey(ohdr);
sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf);
- dlid = ib_get_dlid(pkt->hdr);
+ dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) :
+ ppd->lid;
slid = ib_get_slid(pkt->hdr);
+ is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
+ opcode = ib_bth_get_opcode(ohdr);
hdr_type = HFI1_PKT_TYPE_9B;
+ fecn = ib_bth_get_fecn(ohdr);
+ becn = ib_bth_get_becn(ohdr);
}
switch (qp->ibqp.qp_type) {
case IB_QPT_UD:
- dlid = ppd->lid;
rlid = slid;
rqpn = ib_get_sqpn(pkt->ohdr);
svc_type = IB_CC_SVCTYPE_UD;
@@ -485,22 +505,31 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
svc_type = IB_CC_SVCTYPE_RC;
break;
default:
- return;
+ return false;
}
- bth1 = be32_to_cpu(ohdr->bth[1]);
+ ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) ||
+ (opcode == IB_OPCODE_RC_ACKNOWLEDGE);
+ /*
+ * ACKNOWLEDGE packets do not get a CNP but this will be
+ * guarded by ignore_fecn above.
+ */
+ do_cnp = prescan ||
+ (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST &&
+ opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE);
+
/* Call appropriate CNP handler */
- if (do_cnp && (bth1 & IB_FECN_SMASK))
+ if (!ignore_fecn && do_cnp && fecn)
hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey,
dlid, rlid, sc, grh);
- if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
- u32 lqpn = bth1 & RVT_QPN_MASK;
+ if (becn) {
+ u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
}
-
+ return !ignore_fecn && fecn;
}
struct ps_mdata {
@@ -599,7 +628,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
- int is_ecn = 0;
u8 lnh;
if (ps_done(&mdata, rhf, rcd))
@@ -625,12 +653,10 @@ static void __prescan_rxq(struct hfi1_packet *packet)
goto next; /* just in case */
}
- bth1 = be32_to_cpu(packet->ohdr->bth[1]);
- is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
-
- if (!is_ecn)
+ if (!hfi1_may_ecn(packet))
goto next;
+ bth1 = be32_to_cpu(packet->ohdr->bth[1]);
qpn = bth1 & RVT_QPN_MASK;
rcu_read_lock();
qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
@@ -640,7 +666,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
goto next;
}
- process_ecn(qp, packet, true);
+ hfi1_process_ecn_slowpath(qp, packet, true);
rcu_read_unlock();
/* turn off BECN, FECN */
@@ -1400,7 +1426,7 @@ static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet)
if ((!(hfi1_is_16B_mcast(packet->dlid))) &&
(packet->dlid !=
opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) {
- if (packet->dlid != ppd->lid)
+ if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid)
return -EINVAL;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 1401b6ea4a28..ca572ad6061c 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1802,13 +1802,20 @@ static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd)
return &rcd->ppd->ibport_data;
}
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp);
-static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp)
+/**
+ * hfi1_may_ecn - Check whether FECN or BECN processing should be done
+ * @pkt: the packet to be evaluated
+ *
+ * Check whether the FECN or BECN bits in the packet's header are
+ * enabled, depending on packet type.
+ *
+ * This function only checks for FECN and BECN bits. Additional checks
+ * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to
+ * ensure correct handling.
+ */
+static inline bool hfi1_may_ecn(struct hfi1_packet *pkt)
{
- bool becn;
- bool fecn;
+ bool fecn, becn;
if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
fecn = hfi1_16B_get_fecn(pkt->hdr);
@@ -1817,10 +1824,18 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
fecn = ib_bth_get_fecn(pkt->ohdr);
becn = ib_bth_get_becn(pkt->ohdr);
}
- if (unlikely(fecn || becn)) {
- hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
- return fecn;
- }
+ return fecn || becn;
+}
+
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool prescan);
+static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt)
+{
+ bool do_work;
+
+ do_work = hfi1_may_ecn(pkt);
+ if (unlikely(do_work))
+ return hfi1_process_ecn_slowpath(qp, pkt, false);
return false;
}
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 9ab50d2308dc..dd5a5c030066 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -742,6 +742,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
spin_lock_init(&sc->alloc_lock);
spin_lock_init(&sc->release_lock);
spin_lock_init(&sc->credit_ctrl_lock);
+ seqlock_init(&sc->waitlock);
INIT_LIST_HEAD(&sc->piowait);
INIT_WORK(&sc->halt_work, sc_halted);
init_waitqueue_head(&sc->halt_wait);
@@ -1593,7 +1594,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
static void sc_piobufavail(struct send_context *sc)
{
struct hfi1_devdata *dd = sc->dd;
- struct hfi1_ibdev *dev = &dd->verbs_dev;
struct list_head *list;
struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
struct rvt_qp *qp;
@@ -1612,7 +1612,7 @@ static void sc_piobufavail(struct send_context *sc)
* could end up with QPs on the wait list with the interrupt
* disabled.
*/
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ write_seqlock_irqsave(&sc->waitlock, flags);
while (!list_empty(list)) {
struct iowait *wait;
@@ -1636,7 +1636,7 @@ static void sc_piobufavail(struct send_context *sc)
if (!list_empty(list))
hfi1_sc_wantpiobuf_intr(sc, 1);
}
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(&sc->waitlock, flags);
/* Wake up the most starved one first */
if (n)
@@ -2137,3 +2137,28 @@ void free_credit_return(struct hfi1_devdata *dd)
kfree(dd->cr_base);
dd->cr_base = NULL;
}
+
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+ struct send_context_info *sci)
+{
+ struct send_context *sc = sci->sc;
+ u64 reg;
+
+ seq_printf(s, "SCI %u: type %u base %u credits %u\n",
+ i, sci->type, sci->base, sci->credits);
+ seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n",
+ sc->flags, sc->sw_index, sc->hw_context, sc->group);
+ seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n",
+ sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail);
+ seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n",
+ sc->fill, sc->free, sc->fill_wrap, sc->alloc_free);
+ seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n",
+ sc->credit_intr_count, sc->credit_ctrl);
+ reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS));
+ seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n",
+ (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >>
+ CR_COUNTER_SHIFT,
+ (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) &
+ SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK),
+ reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK));
+}
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index aaf372c3e5d6..c9a58b642bdd 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -127,6 +127,8 @@ struct send_context {
volatile __le64 *hw_free; /* HW free counter */
/* list for PIO waiters */
struct list_head piowait ____cacheline_aligned_in_smp;
+ seqlock_t waitlock;
+
spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
u32 credit_intr_count; /* count of credit intr users */
u64 credit_ctrl; /* cache for credit control */
@@ -329,4 +331,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
void seg_pio_copy_end(struct pio_buf *pbuf);
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+ struct send_context_info *sci);
+
#endif /* _PIO_H */
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 6f3bc4dab858..e32fbfe029bc 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -368,20 +368,18 @@ bool _hfi1_schedule_send(struct rvt_qp *qp)
static void qp_pio_drain(struct rvt_qp *qp)
{
- struct hfi1_ibdev *dev;
struct hfi1_qp_priv *priv = qp->priv;
if (!priv->s_sendcontext)
return;
- dev = to_idev(qp->ibqp.device);
while (iowait_pio_pending(&priv->s_iowait)) {
- write_seqlock_irq(&dev->iowait_lock);
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
- write_sequnlock_irq(&dev->iowait_lock);
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
iowait_pio_drain(&priv->s_iowait);
- write_seqlock_irq(&dev->iowait_lock);
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
- write_sequnlock_irq(&dev->iowait_lock);
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
}
}
@@ -452,7 +450,6 @@ static int iowait_sleep(
struct hfi1_qp_priv *priv;
unsigned long flags;
int ret = 0;
- struct hfi1_ibdev *dev;
qp = tx->qp;
priv = qp->priv;
@@ -465,9 +462,8 @@ static int iowait_sleep(
* buffer and undoing the side effects of the copy.
*/
/* Make a common routine? */
- dev = &sde->dd->verbs_dev;
list_add_tail(&stx->list, &wait->tx_head);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
if (sdma_progress(sde, seq, stx))
goto eagain;
if (list_empty(&priv->s_iowait.list)) {
@@ -478,11 +474,11 @@ static int iowait_sleep(
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
iowait_queue(pkts_sent, &priv->s_iowait,
&sde->dmawait);
- priv->s_iowait.lock = &dev->iowait_lock;
+ priv->s_iowait.lock = &sde->waitlock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
hfi1_qp_unbusy(qp, wait);
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY;
@@ -492,7 +488,7 @@ static int iowait_sleep(
}
return ret;
eagain:
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
spin_unlock_irqrestore(&qp->s_lock, flags);
list_del_init(&stx->list);
return -EAGAIN;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 188aa4f686a0..be603f35d7e4 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1157,6 +1157,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
+ rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
@@ -1209,6 +1210,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
u32 s_last;
rvt_put_swqe(wqe);
+ rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
@@ -2049,8 +2051,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
struct ib_reth *reth;
unsigned long flags;
int ret;
- bool is_fecn = false;
- bool copy_last = false;
+ bool copy_last = false, fecn;
u32 rkey;
u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
@@ -2059,7 +2060,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (hfi1_ruc_check_hdr(ibp, packet))
return;
- is_fecn = process_ecn(qp, packet, false);
+ fecn = process_ecn(qp, packet);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2070,8 +2071,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
rc_rcv_resp(packet);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2347,11 +2346,11 @@ send_last:
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2413,11 +2412,11 @@ send_last:
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2430,16 +2429,9 @@ send_last:
qp->r_ack_psn = psn;
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
- if (psn & IB_BTH_REQ_ACK) {
- if (packet->numpkt == 0) {
- rc_cancel_ack(qp);
- goto send_ack;
- }
- if (qp->r_adefered >= HFI1_PSN_CREDIT) {
- rc_cancel_ack(qp);
- goto send_ack;
- }
- if (unlikely(is_fecn)) {
+ if (psn & IB_BTH_REQ_ACK || fecn) {
+ if (packet->numpkt == 0 || fecn ||
+ qp->r_adefered >= HFI1_PSN_CREDIT) {
rc_cancel_ack(qp);
goto send_ack;
}
@@ -2480,7 +2472,7 @@ nack_acc:
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
- hfi1_send_rc_ack(packet, is_fecn);
+ hfi1_send_rc_ack(packet, fecn);
}
void hfi1_rc_hdrerr(
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 891d2386d1ca..b84356e1a4c1 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1424,6 +1424,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
seqlock_init(&sde->head_lock);
spin_lock_init(&sde->senddmactrl_lock);
spin_lock_init(&sde->flushlist_lock);
+ seqlock_init(&sde->waitlock);
/* insure there is always a zero bit */
sde->ahg_bits = 0xfffffffe00000000ULL;
@@ -1758,7 +1759,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
- struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
u8 max_starved_cnt = 0;
#ifdef CONFIG_SDMA_VERBOSITY
@@ -1768,10 +1768,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
#endif
do {
- seq = read_seqbegin(&dev->iowait_lock);
+ seq = read_seqbegin(&sde->waitlock);
if (!list_empty(&sde->dmawait)) {
/* at least one item */
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
/* Harvest waiters wanting DMA descriptors */
list_for_each_entry_safe(
wait,
@@ -1794,10 +1794,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
list_del_init(&wait->list);
waits[n++] = wait;
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
break;
}
- } while (read_seqretry(&dev->iowait_lock, seq));
+ } while (read_seqretry(&sde->waitlock, seq));
/* Schedule the most starved one first */
if (n)
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 6dc63d7c5685..1e2e40f79cb2 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -382,6 +382,7 @@ struct sdma_engine {
u64 progress_int_cnt;
/* private: */
+ seqlock_t waitlock;
struct list_head dmawait;
/* CONFIG SDMA for now, just blindly duplicate */
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
new file mode 100644
index 000000000000..da1ecb68a928
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#include "hfi.h"
+#include "verbs.h"
+#include "tid_rdma.h"
+
+/**
+ * qp_to_rcd - determine the receive context used by a qp
+ * @qp - the qp
+ *
+ * This routine returns the receive context associated
+ * with a a qp's qpn.
+ *
+ * Returns the context.
+ */
+static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
+ struct rvt_qp *qp)
+{
+ struct hfi1_ibdev *verbs_dev = container_of(rdi,
+ struct hfi1_ibdev,
+ rdi);
+ struct hfi1_devdata *dd = container_of(verbs_dev,
+ struct hfi1_devdata,
+ verbs_dev);
+ unsigned int ctxt;
+
+ if (qp->ibqp.qp_num == 0)
+ ctxt = 0;
+ else
+ ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
+ (dd->n_krcv_queues - 1)) + 1;
+
+ return dd->rcd[ctxt];
+}
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+
+ qpriv->rcd = qp_to_rcd(rdi, qp);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
new file mode 100644
index 000000000000..6fcd3adcdcc3
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#ifndef HFI1_TID_RDMA_H
+#define HFI1_TID_RDMA_H
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr);
+
+#endif /* HFI1_TID_RDMA_H */
+
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 6aca0c5a7f97..6ba47037c424 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -321,7 +321,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
if (hfi1_ruc_check_hdr(ibp, packet))
return;
- process_ecn(qp, packet, true);
+ process_ecn(qp, packet);
psn = ib_bth_get_psn(ohdr);
/* Compare the PSN verses the expected PSN. */
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 4baa8f4d49de..88242fe95eaa 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -51,6 +51,7 @@
#include "hfi.h"
#include "mad.h"
#include "verbs_txreq.h"
+#include "trace_ibhdrs.h"
#include "qp.h"
/* We support only two types - 9B and 16B for now */
@@ -656,18 +657,19 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
u32 bth0, plen, vl, hwords = 7;
u16 len;
u8 l4;
- struct hfi1_16b_header hdr;
+ struct hfi1_opa_header hdr;
struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 nwords;
+ hdr.hdr_type = HFI1_PKT_TYPE_16B;
/* Populate length */
nwords = ((hfi1_get_16b_padding(hwords << 2, 0) +
SIZE_OF_LT) >> 2) + SIZE_OF_CRC;
if (old_grh) {
- struct ib_grh *grh = &hdr.u.l.grh;
+ struct ib_grh *grh = &hdr.opah.u.l.grh;
grh->version_tclass_flow = old_grh->version_tclass_flow;
grh->paylen = cpu_to_be16(
@@ -675,11 +677,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
grh->hop_limit = 0xff;
grh->sgid = old_grh->dgid;
grh->dgid = old_grh->sgid;
- ohdr = &hdr.u.l.oth;
+ ohdr = &hdr.opah.u.l.oth;
l4 = OPA_16B_L4_IB_GLOBAL;
hwords += sizeof(struct ib_grh) / sizeof(u32);
} else {
- ohdr = &hdr.u.oth;
+ ohdr = &hdr.opah.u.oth;
l4 = OPA_16B_L4_IB_LOCAL;
}
@@ -693,7 +695,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
/* Convert dwords to flits */
len = (hwords + nwords) >> 1;
- hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5);
+ hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5);
plen = 2 /* PBC */ + hwords + nwords;
pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
@@ -701,9 +703,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
- if (pbuf)
+ if (pbuf) {
+ trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
+ }
}
}
@@ -715,14 +719,15 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 bth0, plen, vl, hwords = 5;
u16 lrh0;
u8 sl = ibp->sc_to_sl[sc5];
- struct ib_header hdr;
+ struct hfi1_opa_header hdr;
struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ hdr.hdr_type = HFI1_PKT_TYPE_9B;
if (old_grh) {
- struct ib_grh *grh = &hdr.u.l.grh;
+ struct ib_grh *grh = &hdr.ibh.u.l.grh;
grh->version_tclass_flow = old_grh->version_tclass_flow;
grh->paylen = cpu_to_be16(
@@ -730,11 +735,11 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
grh->hop_limit = 0xff;
grh->sgid = old_grh->dgid;
grh->dgid = old_grh->sgid;
- ohdr = &hdr.u.l.oth;
+ ohdr = &hdr.ibh.u.l.oth;
lrh0 = HFI1_LRH_GRH;
hwords += sizeof(struct ib_grh) / sizeof(u32);
} else {
- ohdr = &hdr.u.oth;
+ ohdr = &hdr.ibh.u.oth;
lrh0 = HFI1_LRH_BTH;
}
@@ -746,16 +751,18 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
- hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
+ hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
plen = 2 /* PBC */ + hwords;
pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
vl = sc_to_vlt(ppd->dd, sc5);
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
- if (pbuf)
+ if (pbuf) {
+ trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
+ }
}
}
@@ -912,7 +919,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
src_qp = hfi1_16B_get_src_qpn(packet->mgmt);
}
- process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
+ process_ecn(qp, packet);
/*
* Get the number of bytes the message was padded by
* and drop incomplete packets.
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 3f0aadccd9f6..e5e7fad09f32 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -130,7 +130,6 @@ static int defer_packet_queue(
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
- struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq);
@@ -144,10 +143,10 @@ static int defer_packet_queue(
* it is supposed to be enqueued.
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
if (list_empty(&pq->busy.list))
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
return -EBUSY;
eagain:
return -EAGAIN;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 48e11e510358..28f754d2a844 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -765,7 +765,6 @@ static int pio_wait(struct rvt_qp *qp,
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_devdata *dd = sc->dd;
- struct hfi1_ibdev *dev = &dd->verbs_dev;
unsigned long flags;
int ret = 0;
@@ -777,7 +776,7 @@ static int pio_wait(struct rvt_qp *qp,
*/
spin_lock_irqsave(&qp->s_lock, flags);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sc->waitlock);
list_add_tail(&ps->s_txreq->txreq.list,
&ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
@@ -790,14 +789,14 @@ static int pio_wait(struct rvt_qp *qp,
was_empty = list_empty(&sc->piowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
&sc->piowait);
- priv->s_iowait.lock = &dev->iowait_lock;
+ priv->s_iowait.lock = &sc->waitlock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
if (was_empty)
hfi1_sc_wantpiobuf_intr(sc, 1);
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sc->waitlock);
hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
@@ -919,6 +918,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (slen > len)
slen = len;
+ if (slen > ss->sge.sge_length)
+ slen = ss->sge.sge_length;
rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
@@ -1704,6 +1705,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
+ dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init;
dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 64c9054db5f3..1ad0b14bdb3c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -71,6 +71,7 @@ struct hfi1_devdata;
struct hfi1_packet;
#include "iowait.h"
+#include "tid_rdma.h"
#define HFI1_MAX_RDMA_ATOMIC 16
@@ -156,6 +157,7 @@ struct hfi1_qp_priv {
struct hfi1_ahg_info *s_ahg; /* ahg info for next header */
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
+ struct hfi1_ctxtdata *rcd; /* QP's receive context */
u8 s_sc; /* SC[0..4] for next packet */
struct iowait s_iowait;
struct rvt_qp *owner;
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index c9876d9e3cb9..a922db58be14 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -816,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
- chip_sdma_engines(dd), dd->num_vnic_contexts);
+ dd->num_sdma, dd->num_vnic_contexts);
if (!netdev)
return ERR_PTR(-ENOMEM);
rn = netdev_priv(netdev);
vinfo = opa_vnic_dev_priv(netdev);
vinfo->dd = dd;
- vinfo->num_tx_q = chip_sdma_engines(dd);
+ vinfo->num_tx_q = dd->num_sdma;
vinfo->num_rx_q = dd->num_vnic_contexts;
vinfo->netdev = netdev;
rn->free_rdma_netdev = hfi1_vnic_free_rn;
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index 97bd940a056a..1f81c480e028 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -57,7 +57,6 @@
#define HFI1_VNIC_TXREQ_NAME_LEN 32
#define HFI1_VNIC_SDMA_DESC_WTRMRK 64
-#define HFI1_VNIC_SDMA_RETRY_COUNT 1
/*
* struct vnic_txreq - VNIC transmit descriptor
@@ -67,7 +66,6 @@
* @pad: pad buffer
* @plen: pad length
* @pbc_val: pbc value
- * @retry_count: tx retry count
*/
struct vnic_txreq {
struct sdma_txreq txreq;
@@ -77,8 +75,6 @@ struct vnic_txreq {
unsigned char pad[HFI1_VNIC_MAX_PAD];
u16 plen;
__le64 pbc_val;
-
- u32 retry_count;
};
static void vnic_sdma_complete(struct sdma_txreq *txreq,
@@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
ret = build_vnic_tx_desc(sde, tx, pbc);
if (unlikely(ret))
goto free_desc;
- tx->retry_count = 0;
ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
&tx->txreq, vnic_sdma->pkts_sent);
@@ -237,18 +232,17 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
{
struct hfi1_vnic_sdma *vnic_sdma =
container_of(wait->iow, struct hfi1_vnic_sdma, wait);
- struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
- struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
- if (sdma_progress(sde, seq, txreq))
- if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT)
- return -EAGAIN;
+ write_seqlock(&sde->waitlock);
+ if (sdma_progress(sde, seq, txreq)) {
+ write_sequnlock(&sde->waitlock);
+ return -EAGAIN;
+ }
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
- write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
return -EBUSY;
}
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index cf03404b9d58..004c88b32e13 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -7,7 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
- hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o
+ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o
obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
hns-roce-hw-v1-objs := hns_roce_hw_v1.o
obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 46f65f9f59d0..6300033a448f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -239,6 +239,8 @@ err_free:
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
{
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_srq_table(hr_dev);
hns_roce_cleanup_qp_table(hr_dev);
hns_roce_cleanup_cq_table(hr_dev);
hns_roce_cleanup_mr_table(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 9549ae51a0dd..927701df5eff 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -120,6 +120,10 @@ enum {
HNS_ROCE_CMD_SQD2RTS_QP = 0x20,
HNS_ROCE_CMD_2RST_QP = 0x21,
HNS_ROCE_CMD_QUERY_QP = 0x22,
+ HNS_ROCE_CMD_SW2HW_SRQ = 0x70,
+ HNS_ROCE_CMD_MODIFY_SRQC = 0x72,
+ HNS_ROCE_CMD_QUERY_SRQC = 0x73,
+ HNS_ROCE_CMD_HW2SW_SRQ = 0x74,
};
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 93d4b4ec002d..f4c92a7ac1ce 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -376,9 +376,6 @@
#define ROCEE_RX_CMQ_TAIL_REG 0x07024
#define ROCEE_RX_CMQ_HEAD_REG 0x07028
-#define ROCEE_VF_MB_CFG0_REG 0x40
-#define ROCEE_VF_MB_STATUS_REG 0x58
-
#define ROCEE_VF_EQ_DB_CFG0_REG 0x238
#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index d39bdfdb5de9..779dd4c409cb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -111,6 +111,9 @@
#define PAGES_SHIFT_24 24
#define PAGES_SHIFT_32 32
+#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
+#define SRQ_DB_REG 0x230
+
enum {
HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
@@ -196,6 +199,7 @@ enum {
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
+ HNS_ROCE_CAP_FLAG_SRQ = BIT(5),
HNS_ROCE_CAP_FLAG_MW = BIT(7),
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
@@ -204,6 +208,8 @@ enum {
enum hns_roce_mtt_type {
MTT_TYPE_WQE,
MTT_TYPE_CQE,
+ MTT_TYPE_SRQWQE,
+ MTT_TYPE_IDX
};
enum {
@@ -339,6 +345,10 @@ struct hns_roce_mr_table {
struct hns_roce_hem_table mtpt_table;
struct hns_roce_buddy mtt_cqe_buddy;
struct hns_roce_hem_table mtt_cqe_table;
+ struct hns_roce_buddy mtt_srqwqe_buddy;
+ struct hns_roce_hem_table mtt_srqwqe_table;
+ struct hns_roce_buddy mtt_idx_buddy;
+ struct hns_roce_hem_table mtt_idx_table;
};
struct hns_roce_wq {
@@ -429,9 +439,37 @@ struct hns_roce_cq {
struct completion free;
};
+struct hns_roce_idx_que {
+ struct hns_roce_buf idx_buf;
+ int entry_sz;
+ u32 buf_size;
+ struct ib_umem *umem;
+ struct hns_roce_mtt mtt;
+ u64 *bitmap;
+};
+
struct hns_roce_srq {
struct ib_srq ibsrq;
- int srqn;
+ void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
+ unsigned long srqn;
+ int max;
+ int max_gs;
+ int wqe_shift;
+ void __iomem *db_reg_l;
+
+ atomic_t refcount;
+ struct completion free;
+
+ struct hns_roce_buf buf;
+ u64 *wrid;
+ struct ib_umem *umem;
+ struct hns_roce_mtt mtt;
+ struct hns_roce_idx_que idx_que;
+ spinlock_t lock;
+ int head;
+ int tail;
+ u16 wqe_ctr;
+ struct mutex mutex;
};
struct hns_roce_uar_table {
@@ -453,6 +491,12 @@ struct hns_roce_cq_table {
struct hns_roce_hem_table table;
};
+struct hns_roce_srq_table {
+ struct hns_roce_bitmap bitmap;
+ struct xarray xa;
+ struct hns_roce_hem_table table;
+};
+
struct hns_roce_raq_table {
struct hns_roce_buf_list *e_raq_buf;
};
@@ -603,6 +647,12 @@ struct hns_roce_aeqe {
} qp_event;
struct {
+ __le32 srq;
+ u32 rsv0;
+ u32 rsv1;
+ } srq_event;
+
+ struct {
__le32 cq;
u32 rsv0;
u32 rsv1;
@@ -679,7 +729,12 @@ struct hns_roce_caps {
u32 max_extend_sg;
int num_qps; /* 256k */
int reserved_qps;
+ u32 max_srq_sg;
+ int num_srqs;
u32 max_wqes; /* 16k */
+ u32 max_srqs;
+ u32 max_srq_wrs;
+ u32 max_srq_sges;
u32 max_sq_desc_sz; /* 64 */
u32 max_rq_desc_sz; /* 64 */
u32 max_srq_desc_sz;
@@ -690,12 +745,16 @@ struct hns_roce_caps {
int min_cqes;
u32 min_wqes;
int reserved_cqs;
+ int reserved_srqs;
+ u32 max_srqwqes;
int num_aeq_vectors; /* 1 */
int num_comp_vectors;
int num_other_vectors;
int num_mtpts;
u32 num_mtt_segs;
u32 num_cqe_segs;
+ u32 num_srqwqe_segs;
+ u32 num_idx_segs;
int reserved_mrws;
int reserved_uars;
int num_pds;
@@ -709,6 +768,8 @@ struct hns_roce_caps {
int irrl_entry_sz;
int trrl_entry_sz;
int cqc_entry_sz;
+ int srqc_entry_sz;
+ int idx_entry_sz;
u32 pbl_ba_pg_sz;
u32 pbl_buf_pg_sz;
u32 pbl_hop_num;
@@ -737,6 +798,12 @@ struct hns_roce_caps {
u32 cqe_ba_pg_sz;
u32 cqe_buf_pg_sz;
u32 cqe_hop_num;
+ u32 srqwqe_ba_pg_sz;
+ u32 srqwqe_buf_pg_sz;
+ u32 srqwqe_hop_num;
+ u32 idx_ba_pg_sz;
+ u32 idx_buf_pg_sz;
+ u32 idx_hop_num;
u32 eqe_ba_pg_sz;
u32 eqe_buf_pg_sz;
u32 eqe_hop_num;
@@ -805,6 +872,17 @@ struct hns_roce_hw {
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
+ void (*write_srqc)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn,
+ void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx,
+ dma_addr_t dma_handle_wqe,
+ dma_addr_t dma_handle_idx);
+ int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+ int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+ int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
};
struct hns_roce_dev {
@@ -839,6 +917,7 @@ struct hns_roce_dev {
struct hns_roce_uar_table uar_table;
struct hns_roce_mr_table mr_table;
struct hns_roce_cq_table cq_table;
+ struct hns_roce_srq_table srq_table;
struct hns_roce_qp_table qp_table;
struct hns_roce_eq_table eq_table;
@@ -951,12 +1030,14 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
+int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
+void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev);
int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
@@ -1011,6 +1092,14 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_mtt *mtt, struct ib_umem *umem);
+struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq);
+
struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
@@ -1052,6 +1141,7 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db);
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index f6faefed96e8..4cdbcafa5915 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -46,7 +46,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
(hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
(hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
(hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
- (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT))
+ (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
+ (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
+ (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX))
return true;
return false;
@@ -147,6 +149,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
mhop->ba_l0_num = mhop->bt_chunk_size / 8;
mhop->hop_num = hr_dev->caps.cqe_hop_num;
break;
+ case HEM_TYPE_SRQWQE:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->hop_num = hr_dev->caps.srqwqe_hop_num;
+ break;
+ case HEM_TYPE_IDX:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->hop_num = hr_dev->caps.idx_hop_num;
+ break;
default:
dev_err(dev, "Table %d not support multi-hop addressing!\n",
table->type);
@@ -906,6 +924,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
bt_chunk_size = buf_chunk_size;
hop_num = hr_dev->caps.cqe_hop_num;
break;
+ case HEM_TYPE_SRQWQE:
+ buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
+ + PAGE_SHIFT);
+ bt_chunk_size = buf_chunk_size;
+ hop_num = hr_dev->caps.srqwqe_hop_num;
+ break;
+ case HEM_TYPE_IDX:
+ buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
+ + PAGE_SHIFT);
+ bt_chunk_size = buf_chunk_size;
+ hop_num = hr_dev->caps.idx_hop_num;
+ break;
default:
dev_err(dev,
"Table %d not support to init hem table here!\n",
@@ -1041,6 +1071,15 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
{
+ if ((hr_dev->caps.num_idx_segs))
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_idx_table);
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table);
+ if (hr_dev->caps.srqc_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->srq_table.table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index e8850d59e780..a650278c6fbd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -48,6 +48,8 @@ enum {
/* UNMAP HEM */
HEM_TYPE_MTT,
HEM_TYPE_CQE,
+ HEM_TYPE_SRQWQE,
+ HEM_TYPE_IDX,
HEM_TYPE_IRRL,
HEM_TYPE_TRRL,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 3beb1523e17c..5007fe7af2f9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1082,6 +1082,33 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
return 0;
}
+static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
+ int vf_id)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_vf_switch *swt;
+ int ret;
+
+ swt = (struct hns_roce_vf_switch *)desc.data;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
+ swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+ roce_set_field(swt->fun_id,
+ VF_SWITCH_DATA_FUN_ID_VF_ID_M,
+ VF_SWITCH_DATA_FUN_ID_VF_ID_S,
+ vf_id);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+ desc.flag =
+ cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
+ desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmq_desc desc[2];
@@ -1269,6 +1296,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
+ if (hr_dev->pci_dev->revision == 0x21) {
+ ret = hns_roce_set_vf_switch_param(hr_dev, 0);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "Set function switch param fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ }
hr_dev->vendor_part_id = hr_dev->pci_dev->device;
hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
@@ -1276,11 +1312,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
+ caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
+ caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
+ caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM;
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM;
caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM;
@@ -1289,6 +1328,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
+ caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+ caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
@@ -1299,8 +1340,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ;
caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ;
caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ;
+ caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ;
caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ;
caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
+ caps->idx_entry_sz = 4;
caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE;
caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
caps->reserved_lkey = 0;
@@ -1308,6 +1351,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->reserved_mrws = 1;
caps->reserved_uars = 0;
caps->reserved_cqs = 0;
+ caps->reserved_srqs = 0;
caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
caps->qpc_ba_pg_sz = 0;
@@ -1331,6 +1375,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->cqe_ba_pg_sz = 0;
caps->cqe_buf_pg_sz = 0;
caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM;
+ caps->srqwqe_ba_pg_sz = 0;
+ caps->srqwqe_buf_pg_sz = 0;
+ caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM;
+ caps->idx_ba_pg_sz = 0;
+ caps->idx_buf_pg_sz = 0;
+ caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM;
caps->eqe_ba_pg_sz = 0;
caps->eqe_buf_pg_sz = 0;
caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
@@ -1354,8 +1404,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
+ caps->max_srqs = HNS_ROCE_V2_MAX_SRQ;
+ caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR;
+ caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE;
+
if (hr_dev->pci_dev->revision == 0x21)
- caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
+ caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
+ HNS_ROCE_CAP_FLAG_SRQ;
ret = hns_roce_v2_set_bt(hr_dev);
if (ret)
@@ -1587,30 +1642,62 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
hns_roce_free_link_table(hr_dev, &priv->tsq);
}
+static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_mbox_status *mb_st =
+ (struct hns_roce_mbox_status *)desc.data;
+ enum hns_roce_cmd_return_status status;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true);
+
+ status = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (status)
+ return status;
+
+ return cpu_to_le32(mb_st->mb_status_hw_run);
+}
+
static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
{
- u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
+ u32 status = hns_roce_query_mbox_status(hr_dev);
return status >> HNS_ROCE_HW_RUN_BIT_SHIFT;
}
static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev)
{
- u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
+ u32 status = hns_roce_query_mbox_status(hr_dev);
return status & HNS_ROCE_HW_MB_STATUS_MASK;
}
+static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param,
+ u64 out_param, u32 in_modifier, u8 op_modifier,
+ u16 op, u16 token, int event)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
+
+ mb->in_param_l = cpu_to_le64(in_param);
+ mb->in_param_h = cpu_to_le64(in_param) >> 32;
+ mb->out_param_l = cpu_to_le64(out_param);
+ mb->out_param_h = cpu_to_le64(out_param) >> 32;
+ mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op);
+ mb->token_event_en = cpu_to_le32(event << 16 | token);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, u32 in_modifier, u8 op_modifier,
u16 op, u16 token, int event)
{
struct device *dev = hr_dev->dev;
- u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base +
- ROCEE_VF_MB_CFG0_REG);
unsigned long end;
- u32 val0 = 0;
- u32 val1 = 0;
+ int ret;
end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies;
while (hns_roce_v2_cmd_pending(hr_dev)) {
@@ -1622,27 +1709,12 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
cond_resched();
}
- roce_set_field(val0, HNS_ROCE_VF_MB4_TAG_MASK,
- HNS_ROCE_VF_MB4_TAG_SHIFT, in_modifier);
- roce_set_field(val0, HNS_ROCE_VF_MB4_CMD_MASK,
- HNS_ROCE_VF_MB4_CMD_SHIFT, op);
- roce_set_field(val1, HNS_ROCE_VF_MB5_EVENT_MASK,
- HNS_ROCE_VF_MB5_EVENT_SHIFT, event);
- roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK,
- HNS_ROCE_VF_MB5_TOKEN_SHIFT, token);
-
- writeq(in_param, hcr + 0);
- writeq(out_param, hcr + 2);
-
- /* Memory barrier */
- wmb();
-
- writel(val0, hcr + 4);
- writel(val1, hcr + 5);
-
- mmiowb();
+ ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier,
+ op_modifier, op, token, event);
+ if (ret)
+ dev_err(dev, "Post mailbox fail(%d)\n", ret);
- return 0;
+ return ret;
}
static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
@@ -2007,6 +2079,27 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
return get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
}
+static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+{
+ return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift);
+}
+
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
+{
+ u32 bitmap_num;
+ int bit_num;
+
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
+
+ bitmap_num = wqe_index / (sizeof(u64) * 8);
+ bit_num = wqe_index % (sizeof(u64) * 8);
+ srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
+ srq->tail++;
+
+ spin_unlock(&srq->lock);
+}
+
static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
{
*hr_cq->set_ci_db = cons_index & 0xffffff;
@@ -2018,6 +2111,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
struct hns_roce_v2_cqe *cqe, *dest;
u32 prod_index;
int nfreed = 0;
+ int wqe_index;
u8 owner_bit;
for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
@@ -2035,7 +2129,13 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M,
V2_CQE_BYTE_16_LCL_QPN_S) &
HNS_ROCE_V2_CQE_QPN_MASK) == qpn) {
- /* In v1 engine, not support SRQ */
+ if (srq &&
+ roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) {
+ wqe_index = roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S);
+ hns_roce_free_srq_wqe(srq, wqe_index);
+ }
++nfreed;
} else if (nfreed) {
dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
@@ -2212,6 +2312,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
struct hns_roce_qp **cur_qp, struct ib_wc *wc)
{
+ struct hns_roce_srq *srq = NULL;
struct hns_roce_dev *hr_dev;
struct hns_roce_v2_cqe *cqe;
struct hns_roce_qp *hr_qp;
@@ -2254,6 +2355,37 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->qp = &(*cur_qp)->ibqp;
wc->vendor_err = 0;
+ if (is_send) {
+ wq = &(*cur_qp)->sq;
+ if ((*cur_qp)->sq_signal_bits) {
+ /*
+ * If sg_signal_bit is 1,
+ * firstly tail pointer updated to wqe
+ * which current cqe correspond to
+ */
+ wqe_ctr = (u16)roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S);
+ wq->tail += (wqe_ctr - (u16)wq->tail) &
+ (wq->wqe_cnt - 1);
+ }
+
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ } else if ((*cur_qp)->ibqp.srq) {
+ srq = to_hr_srq((*cur_qp)->ibqp.srq);
+ wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S));
+ wc->wr_id = srq->wrid[wqe_ctr];
+ hns_roce_free_srq_wqe(srq, wqe_ctr);
+ } else {
+ /* Update tail pointer, record wr_id */
+ wq = &(*cur_qp)->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ }
+
status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
V2_CQE_BYTE_4_STATUS_S);
switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) {
@@ -2373,23 +2505,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->status = IB_WC_GENERAL_ERR;
break;
}
-
- wq = &(*cur_qp)->sq;
- if ((*cur_qp)->sq_signal_bits) {
- /*
- * If sg_signal_bit is 1,
- * firstly tail pointer updated to wqe
- * which current cqe correspond to
- */
- wqe_ctr = (u16)roce_get_field(cqe->byte_4,
- V2_CQE_BYTE_4_WQE_INDX_M,
- V2_CQE_BYTE_4_WQE_INDX_S);
- wq->tail += (wqe_ctr - (u16)wq->tail) &
- (wq->wqe_cnt - 1);
- }
-
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
} else {
/* RQ correspond to CQE */
wc->byte_len = le32_to_cpu(cqe->byte_cnt);
@@ -2434,11 +2549,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
return -EAGAIN;
}
- /* Update tail pointer, record wr_id */
- wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
-
wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M,
V2_CQE_BYTE_32_SL_S);
wc->src_qp = (u8)roce_get_field(cqe->byte_32,
@@ -2747,6 +2857,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+ (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
ilog2((unsigned int)hr_qp->rq.wqe_cnt));
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3088,6 +3200,8 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+ (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
ilog2((unsigned int)hr_qp->rq.wqe_cnt));
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3601,6 +3715,21 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
return 0;
}
+static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+
+ if ((cur_state != IB_QPS_RESET &&
+ (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) ||
+ ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) &&
+ (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) ||
+ (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS))
+ return true;
+
+ return false;
+
+}
+
static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
@@ -3626,6 +3755,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
*/
memset(qpc_mask, 0xff, sizeof(*qpc_mask));
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ memset(qpc_mask, 0, sizeof(*qpc_mask));
modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -3641,21 +3771,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
qpc_mask);
if (ret)
goto out;
- } else if ((cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) ||
- (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD) ||
- (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD) ||
- (cur_state == IB_QPS_SQD && new_state == IB_QPS_RTS) ||
- (cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) {
+ } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) {
/* Nothing */
;
} else {
@@ -3789,6 +3905,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
+ roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
+ ibqp->srq ? 1 : 0);
+ roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
+ V2_QPC_BYTE_108_INV_CREDIT_S, 0);
+
/* Every status migrate must change state */
roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, new_state);
@@ -4074,7 +4195,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
hns_roce_free_db(hr_dev, &hr_qp->rdb);
}
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+ hr_qp->rq.wqe_cnt) {
kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
kfree(hr_qp->rq_inl_buf.wqe_list);
}
@@ -4384,6 +4506,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
int aeqe_found = 0;
int event_type;
int sub_type;
+ u32 srqn;
u32 qpn;
u32 cqn;
@@ -4406,6 +4529,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
cqn = roce_get_field(aeqe->event.cq_event.cq,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+ srqn = roce_get_field(aeqe->event.srq_event.srq,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -4413,13 +4539,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
case HNS_ROCE_EVENT_TYPE_COMM_EST:
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
hns_roce_qp_event(hr_dev, qpn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ hns_roce_srq_event(hr_dev, srqn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
@@ -4964,13 +5091,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
size = (eq->entries - eqe_alloc) * eq->eqe_size;
}
- eq->buf[i] = dma_alloc_coherent(dev, size,
+ eq->buf[i] = dma_zalloc_coherent(dev, size,
&(eq->buf_dma[i]),
GFP_KERNEL);
if (!eq->buf[i])
goto err_dma_alloc_buf;
- memset(eq->buf[i], 0, size);
*(eq->bt_l0 + i) = eq->buf_dma[i];
eq_buf_cnt++;
@@ -5000,13 +5126,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
size = (eq->entries - eqe_alloc)
* eq->eqe_size;
}
- eq->buf[idx] = dma_alloc_coherent(dev, size,
+ eq->buf[idx] = dma_zalloc_coherent(dev, size,
&(eq->buf_dma[idx]),
GFP_KERNEL);
if (!eq->buf[idx])
goto err_dma_alloc_buf;
- memset(eq->buf[idx], 0, size);
*(eq->bt_l1[i] + j) = eq->buf_dma[idx];
eq_buf_cnt++;
@@ -5116,7 +5241,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
goto free_cmd_mbox;
}
- eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
+ eq->buf_list->buf = dma_zalloc_coherent(dev, buf_chk_sz,
&(eq->buf_list->map),
GFP_KERNEL);
if (!eq->buf_list->buf) {
@@ -5124,7 +5249,6 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
goto err_alloc_buf;
}
- memset(eq->buf_list->buf, 0, buf_chk_sz);
} else {
ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
if (ret) {
@@ -5332,6 +5456,284 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
destroy_workqueue(hr_dev->irq_workq);
}
+static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
+ u32 cqn, void *mb_buf, u64 *mtts_wqe,
+ u64 *mtts_idx, dma_addr_t dma_handle_wqe,
+ dma_addr_t dma_handle_idx)
+{
+ struct hns_roce_srq_context *srq_context;
+
+ srq_context = mb_buf;
+ memset(srq_context, 0, sizeof(*srq_context));
+
+ roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
+ SRQC_BYTE_4_SRQ_ST_S, 1);
+
+ roce_set_field(srq_context->byte_4_srqn_srqst,
+ SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
+ SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
+ (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+ hr_dev->caps.srqwqe_hop_num));
+ roce_set_field(srq_context->byte_4_srqn_srqst,
+ SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
+ ilog2(srq->max));
+
+ roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
+ SRQC_BYTE_4_SRQN_S, srq->srqn);
+
+ roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+ roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
+ SRQC_BYTE_12_SRQ_XRCD_S, xrcd);
+
+ srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
+
+ roce_set_field(srq_context->byte_24_wqe_bt_ba,
+ SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
+ SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
+ cpu_to_le32(dma_handle_wqe >> 35));
+
+ roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
+ SRQC_BYTE_28_PD_S, pdn);
+ roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
+ SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
+ fls(srq->max_gs - 1));
+
+ srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3);
+ srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba);
+ roce_set_field(srq_context->rsv_idx_bt_ba,
+ SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
+ SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
+ cpu_to_le32(dma_handle_idx >> 35));
+
+ srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT);
+ srq_context->idx_cur_blk_addr =
+ cpu_to_le32(srq_context->idx_cur_blk_addr);
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
+ SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
+ cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT)));
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
+ SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
+ hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+ hr_dev->caps.idx_hop_num);
+
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
+ SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
+ hr_dev->caps.idx_ba_pg_sz);
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
+ SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
+ hr_dev->caps.idx_buf_pg_sz);
+
+ srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT);
+ srq_context->idx_nxt_blk_addr =
+ cpu_to_le32(srq_context->idx_nxt_blk_addr);
+ roce_set_field(srq_context->rsv_idxnxtblkaddr,
+ SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
+ SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
+ cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT)));
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
+ cqn);
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
+ SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
+ hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET);
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
+ SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
+ hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET);
+
+ roce_set_bit(srq_context->db_record_addr_record_en,
+ SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
+}
+
+static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_srq_context *srqc_mask;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ if (srq_attr_mask & IB_SRQ_LIMIT) {
+ if (srq_attr->srq_limit >= srq->max)
+ return -EINVAL;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
+
+ memset(srqc_mask, 0xff, sizeof(*srqc_mask));
+
+ roce_set_field(srq_context->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit);
+ roce_set_field(srqc_mask->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0,
+ HNS_ROCE_CMD_MODIFY_SRQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "MODIFY SRQ Failed to cmd mailbox.\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int limit_wl;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0,
+ HNS_ROCE_CMD_QUERY_SRQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ if (ret) {
+ dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n");
+ goto out;
+ }
+
+ limit_wl = roce_get_field(srq_context->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S);
+
+ attr->srq_limit = limit_wl;
+ attr->max_wr = srq->max - 1;
+ attr->max_sge = srq->max_gs;
+
+ memcpy(srq_context, mailbox->buf, sizeof(*srq_context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int find_empty_entry(struct hns_roce_idx_que *idx_que)
+{
+ int bit_num;
+ int i;
+
+ /* bitmap[i] is set zero if all bits are allocated */
+ for (i = 0; idx_que->bitmap[i] == 0; ++i)
+ ;
+ bit_num = ffs(idx_que->bitmap[i]);
+ idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));
+
+ return i * sizeof(u64) * 8 + (bit_num - 1);
+}
+
+static void fill_idx_queue(struct hns_roce_idx_que *idx_que,
+ int cur_idx, int wqe_idx)
+{
+ unsigned int *addr;
+
+ addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf,
+ cur_idx * idx_que->entry_sz);
+ *addr = wqe_idx;
+}
+
+static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ struct hns_roce_v2_db srq_db;
+ unsigned long flags;
+ int ret = 0;
+ int wqe_idx;
+ void *wqe;
+ int nreq;
+ int ind;
+ int i;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ ind = srq->head & (srq->max - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(wr->num_sge > srq->max_gs)) {
+ ret = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ if (unlikely(srq->head == srq->tail)) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ wqe_idx = find_empty_entry(&srq->idx_que);
+ fill_idx_queue(&srq->idx_que, ind, wqe_idx);
+ wqe = get_srq_wqe(srq, wqe_idx);
+ dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
+ dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
+ dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
+ }
+
+ if (i < srq->max_gs) {
+ dseg->len = 0;
+ dseg->lkey = cpu_to_le32(0x100);
+ dseg->addr = 0;
+ }
+
+ srq->wrid[wqe_idx] = wr->wr_id;
+ ind = (ind + 1) & (srq->max - 1);
+ }
+
+ if (likely(nreq)) {
+ srq->head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn;
+ srq_db.parameter = srq->head;
+
+ hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l);
+
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return ret;
+}
+
static const struct hns_roce_hw hns_roce_hw_v2 = {
.cmq_init = hns_roce_v2_cmq_init,
.cmq_exit = hns_roce_v2_cmq_exit,
@@ -5359,6 +5761,10 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.poll_cq = hns_roce_v2_poll_cq,
.init_eq = hns_roce_v2_init_eq_table,
.cleanup_eq = hns_roce_v2_cleanup_eq_table,
+ .write_srqc = hns_roce_v2_write_srqc,
+ .modify_srq = hns_roce_v2_modify_srq,
+ .query_srq = hns_roce_v2_query_srq,
+ .post_srq_recv = hns_roce_v2_post_srq_recv,
};
static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 8bc820635bbd..b72d0443c835 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -46,10 +46,16 @@
#define HNS_ROCE_V2_MAX_QP_NUM 0x2000
#define HNS_ROCE_V2_MAX_WQE_NUM 0x8000
+#define HNS_ROCE_V2_MAX_SRQ 0x100000
+#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100
#define HNS_ROCE_V2_MAX_CQ_NUM 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
+#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
+#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
#define HNS_ROCE_V2_UAR_NUM 256
@@ -61,6 +67,8 @@
#define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000
#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_PD_NUM 0x1000000
#define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128
#define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128
@@ -71,6 +79,7 @@
#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64
#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48
#define HNS_ROCE_V2_CQC_ENTRY_SZ 64
+#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64
#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32
@@ -84,8 +93,10 @@
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
#define HNS_ROCE_CQE_HOP_NUM 1
+#define HNS_ROCE_SRQWQE_HOP_NUM 1
#define HNS_ROCE_PBL_HOP_NUM 2
#define HNS_ROCE_EQE_HOP_NUM 2
+#define HNS_ROCE_IDX_HOP_NUM 1
#define HNS_ROCE_V2_GID_INDEX_NUM 256
@@ -113,6 +124,8 @@
((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
(step_idx == 1 && hop_num == 1) || \
(step_idx == 2 && hop_num == 2))
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT 0
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT)
#define CMD_CSQ_DESC_NUM 1024
#define CMD_CRQ_DESC_NUM 1024
@@ -213,7 +226,10 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
+ HNS_ROCE_OPC_POST_MB = 0x8504,
+ HNS_ROCE_OPC_QUERY_MB_ST = 0x8505,
HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
+ HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
enum {
@@ -325,6 +341,90 @@ struct hns_roce_v2_cq_context {
#define V2_CQC_BYTE_64_SE_CQE_IDX_S 0
#define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)
+struct hns_roce_srq_context {
+ __le32 byte_4_srqn_srqst;
+ __le32 byte_8_limit_wl;
+ __le32 byte_12_xrcd;
+ __le32 byte_16_pi_ci;
+ __le32 wqe_bt_ba;
+ __le32 byte_24_wqe_bt_ba;
+ __le32 byte_28_rqws_pd;
+ __le32 idx_bt_ba;
+ __le32 rsv_idx_bt_ba;
+ __le32 idx_cur_blk_addr;
+ __le32 byte_44_idxbufpgsz_addr;
+ __le32 idx_nxt_blk_addr;
+ __le32 rsv_idxnxtblkaddr;
+ __le32 byte_56_xrc_cqn;
+ __le32 db_record_addr_record_en;
+ __le32 db_record_addr;
+};
+
+#define SRQC_BYTE_4_SRQ_ST_S 0
+#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0)
+
+#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2
+#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2)
+
+#define SRQC_BYTE_4_SRQ_SHIFT_S 4
+#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4)
+
+#define SRQC_BYTE_4_SRQN_S 8
+#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8)
+
+#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0
+#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0)
+
+#define SRQC_BYTE_12_SRQ_XRCD_S 0
+#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0)
+
+#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0
+#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0)
+
+#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0
+#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16)
+
+#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0
+#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0)
+
+#define SRQC_BYTE_28_PD_S 0
+#define SRQC_BYTE_28_PD_M GENMASK(23, 0)
+
+#define SRQC_BYTE_28_RQWS_S 24
+#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24)
+
+#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0
+#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0)
+
+#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0
+#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0)
+
+#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22
+#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22)
+
+#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24
+#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24)
+
+#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28
+#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28)
+
+#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0
+#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0)
+
+#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0
+#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0)
+
+#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24
+#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24)
+
+#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28
+#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28)
+
+#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0
+
+#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1
+#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1)
+
enum{
V2_MPT_ST_VALID = 0x1,
V2_MPT_ST_FREE = 0x2,
@@ -1289,6 +1389,36 @@ struct hns_roce_vf_res_b {
#define VF_RES_B_DATA_3_VF_SL_NUM_S 16
#define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16)
+struct hns_roce_vf_switch {
+ __le32 rocee_sel;
+ __le32 fun_id;
+ __le32 cfg;
+ __le32 resv1;
+ __le32 resv2;
+ __le32 resv3;
+};
+
+#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3
+#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3)
+
+#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1
+#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2
+#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3
+
+struct hns_roce_post_mbox {
+ __le32 in_param_l;
+ __le32 in_param_h;
+ __le32 out_param_l;
+ __le32 out_param_h;
+ __le32 cmd_tag;
+ __le32 token_event_en;
+};
+
+struct hns_roce_mbox_status {
+ __le32 mb_status_hw_run;
+ __le32 rsv[5];
+};
+
struct hns_roce_cfg_bt_attr {
__le32 vf_qpc_cfg;
__le32 vf_srqc_cfg;
@@ -1372,18 +1502,6 @@ struct hns_roce_cmq_desc {
#define HNS_ROCE_HW_RUN_BIT_SHIFT 31
#define HNS_ROCE_HW_MB_STATUS_MASK 0xFF
-#define HNS_ROCE_VF_MB4_TAG_MASK 0xFFFFFF00
-#define HNS_ROCE_VF_MB4_TAG_SHIFT 8
-
-#define HNS_ROCE_VF_MB4_CMD_MASK 0xFF
-#define HNS_ROCE_VF_MB4_CMD_SHIFT 0
-
-#define HNS_ROCE_VF_MB5_EVENT_MASK 0x10000
-#define HNS_ROCE_VF_MB5_EVENT_SHIFT 16
-
-#define HNS_ROCE_VF_MB5_TOKEN_MASK 0xFFFF
-#define HNS_ROCE_VF_MB5_TOKEN_SHIFT 0
-
struct hns_roce_v2_cmq_ring {
dma_addr_t desc_dma_addr;
struct hns_roce_cmq_desc *desc;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 1b3ee514f2ef..65ba43cee810 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -220,6 +220,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ props->max_srq = hr_dev->caps.max_srqs;
+ props->max_srq_wr = hr_dev->caps.max_srq_wrs;
+ props->max_srq_sge = hr_dev->caps.max_srq_sges;
+ }
return 0;
}
@@ -541,6 +546,21 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->map_mr_sg = hns_roce_map_mr_sg;
}
+ /* SRQ */
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ib_dev->create_srq = hns_roce_create_srq;
+ ib_dev->modify_srq = hr_dev->hw->modify_srq;
+ ib_dev->query_srq = hr_dev->hw->query_srq;
+ ib_dev->destroy_srq = hns_roce_destroy_srq;
+ ib_dev->post_srq_recv = hr_dev->hw->post_srq_recv;
+ ib_dev->uverbs_cmd_mask |=
+ (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+ }
+
/* OTHERS */
ib_dev->get_port_immutable = hns_roce_port_immutable;
ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
@@ -646,8 +666,58 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
goto err_unmap_trrl;
}
+ if (hr_dev->caps.srqc_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table,
+ HEM_TYPE_SRQC,
+ hr_dev->caps.srqc_entry_sz,
+ hr_dev->caps.num_srqs, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init SRQ context memory, aborting.\n");
+ goto err_unmap_cq;
+ }
+ }
+
+ if (hr_dev->caps.num_srqwqe_segs) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table,
+ HEM_TYPE_SRQWQE,
+ hr_dev->caps.mtt_entry_sz,
+ hr_dev->caps.num_srqwqe_segs, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init MTT srqwqe memory, aborting.\n");
+ goto err_unmap_srq;
+ }
+ }
+
+ if (hr_dev->caps.num_idx_segs) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_idx_table,
+ HEM_TYPE_IDX,
+ hr_dev->caps.idx_entry_sz,
+ hr_dev->caps.num_idx_segs, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init MTT idx memory, aborting.\n");
+ goto err_unmap_srqwqe;
+ }
+ }
+
return 0;
+err_unmap_srqwqe:
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table);
+
+err_unmap_srq:
+ if (hr_dev->caps.srqc_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table);
+
+err_unmap_cq:
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
+
err_unmap_trrl:
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
@@ -727,8 +797,21 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
goto err_cq_table_free;
}
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ret = hns_roce_init_srq_table(hr_dev);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init share receive queue table.\n");
+ goto err_qp_table_free;
+ }
+ }
+
return 0;
+err_qp_table_free:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_qp_table(hr_dev);
+
err_cq_table_free:
hns_roce_cleanup_cq_table(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 521ad2aa3a4e..ee5991bd4171 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -184,12 +184,27 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
struct hns_roce_buddy *buddy;
int ret;
- if (mtt_type == MTT_TYPE_WQE) {
+ switch (mtt_type) {
+ case MTT_TYPE_WQE:
buddy = &mr_table->mtt_buddy;
table = &mr_table->mtt_table;
- } else {
+ break;
+ case MTT_TYPE_CQE:
buddy = &mr_table->mtt_cqe_buddy;
table = &mr_table->mtt_cqe_table;
+ break;
+ case MTT_TYPE_SRQWQE:
+ buddy = &mr_table->mtt_srqwqe_buddy;
+ table = &mr_table->mtt_srqwqe_table;
+ break;
+ case MTT_TYPE_IDX:
+ buddy = &mr_table->mtt_idx_buddy;
+ table = &mr_table->mtt_idx_table;
+ break;
+ default:
+ dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
+ mtt_type);
+ return -EINVAL;
}
ret = hns_roce_buddy_alloc(buddy, order, seg);
@@ -242,18 +257,40 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
if (mtt->order < 0)
return;
- if (mtt->mtt_type == MTT_TYPE_WQE) {
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
mtt->order);
hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
mtt->first_seg,
mtt->first_seg + (1 << mtt->order) - 1);
- } else {
+ break;
+ case MTT_TYPE_CQE:
hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
mtt->order);
hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
mtt->first_seg,
mtt->first_seg + (1 << mtt->order) - 1);
+ break;
+ case MTT_TYPE_SRQWQE:
+ hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
+ mtt->order);
+ hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
+ mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+ break;
+ case MTT_TYPE_IDX:
+ hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
+ mtt->order);
+ hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
+ mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "Unsupport mtt type %d, clean mtt failed\n",
+ mtt->mtt_type);
+ break;
}
}
EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup);
@@ -713,10 +750,26 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
u32 bt_page_size;
u32 i;
- if (mtt->mtt_type == MTT_TYPE_WQE)
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
+ table = &hr_dev->mr_table.mtt_table;
bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- else
+ break;
+ case MTT_TYPE_CQE:
+ table = &hr_dev->mr_table.mtt_cqe_table;
bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_SRQWQE:
+ table = &hr_dev->mr_table.mtt_srqwqe_table;
+ bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_IDX:
+ table = &hr_dev->mr_table.mtt_idx_table;
+ bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
+ break;
+ default:
+ return -EINVAL;
+ }
/* All MTTs must fit in the same page */
if (start_index / (bt_page_size / sizeof(u64)) !=
@@ -726,11 +779,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
return -EINVAL;
- if (mtt->mtt_type == MTT_TYPE_WQE)
- table = &hr_dev->mr_table.mtt_table;
- else
- table = &hr_dev->mr_table.mtt_cqe_table;
-
mtts = hns_roce_table_find(hr_dev, table,
mtt->first_seg + s / hr_dev->caps.mtt_entry_sz,
&dma_handle);
@@ -759,10 +807,25 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
if (mtt->order < 0)
return -EINVAL;
- if (mtt->mtt_type == MTT_TYPE_WQE)
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- else
+ break;
+ case MTT_TYPE_CQE:
bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_SRQWQE:
+ bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_IDX:
+ bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "Unsupport mtt type %d, write mtt failed\n",
+ mtt->mtt_type);
+ return -EINVAL;
+ }
while (npages > 0) {
chunk = min_t(int, bt_page_size / sizeof(u64), npages);
@@ -828,8 +891,31 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
if (ret)
goto err_buddy_cqe;
}
+
+ if (hr_dev->caps.num_srqwqe_segs) {
+ ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
+ ilog2(hr_dev->caps.num_srqwqe_segs));
+ if (ret)
+ goto err_buddy_srqwqe;
+ }
+
+ if (hr_dev->caps.num_idx_segs) {
+ ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
+ ilog2(hr_dev->caps.num_idx_segs));
+ if (ret)
+ goto err_buddy_idx;
+ }
+
return 0;
+err_buddy_idx:
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
+
+err_buddy_srqwqe:
+ if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
+ hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
+
err_buddy_cqe:
hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
@@ -842,6 +928,10 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+ if (hr_dev->caps.num_idx_segs)
+ hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
@@ -897,8 +987,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
u32 bt_page_size;
u32 n;
- order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz :
- hr_dev->caps.cqe_ba_pg_sz;
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
+ order = hr_dev->caps.mtt_ba_pg_sz;
+ break;
+ case MTT_TYPE_CQE:
+ order = hr_dev->caps.cqe_ba_pg_sz;
+ break;
+ case MTT_TYPE_SRQWQE:
+ order = hr_dev->caps.srqwqe_ba_pg_sz;
+ break;
+ case MTT_TYPE_IDX:
+ order = hr_dev->caps.idx_ba_pg_sz;
+ break;
+ default:
+ dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
+ mtt->mtt_type);
+ return -EINVAL;
+ }
+
bt_page_size = 1 << (order + PAGE_SHIFT);
pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
@@ -1021,14 +1128,14 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_umem;
}
} else {
- int pbl_size = 1;
+ u64 pbl_size = 1;
bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8;
for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
pbl_size *= bt_size;
if (n > pbl_size) {
dev_err(dev,
- " MR len %lld err. MR page num is limited to %d!\n",
+ " MR len %lld err. MR page num is limited to %lld!\n",
length, pbl_size);
ret = -EINVAL;
goto err_umem;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 5ebf481a39d9..52d2b299b3be 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -280,7 +280,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
EXPORT_SYMBOL_GPL(hns_roce_release_range_qp);
static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
- struct ib_qp_cap *cap, int is_user, int has_srq,
+ struct ib_qp_cap *cap, int is_user, int has_rq,
struct hns_roce_qp *hr_qp)
{
struct device *dev = hr_dev->dev;
@@ -294,14 +294,12 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
return -EINVAL;
}
- /* If srq exit, set zero for relative number of rq */
- if (has_srq) {
- if (cap->max_recv_wr) {
- dev_dbg(dev, "srq no need config max_recv_wr\n");
- return -EINVAL;
- }
-
- hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0;
+ /* If srq exist, set zero for relative number of rq */
+ if (!has_rq) {
+ hr_qp->rq.wqe_cnt = 0;
+ hr_qp->rq.max_gs = 0;
+ cap->max_recv_wr = 0;
+ cap->max_recv_sge = 0;
} else {
if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n");
@@ -563,13 +561,14 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR);
ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject,
- !!init_attr->srq, hr_qp);
+ hns_roce_qp_has_rq(init_attr), hr_qp);
if (ret) {
dev_err(dev, "hns_roce_set_rq_size failed\n");
goto err_out;
}
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+ hns_roce_qp_has_rq(init_attr)) {
/* allocate recv inline buf */
hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt,
sizeof(struct hns_roce_rinl_wqe),
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
new file mode 100644
index 000000000000..463df60094e8
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018 Hisilicon Limited.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/hns-abi.h>
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_srq *srq;
+
+ xa_lock(&srq_table->xa);
+ srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1));
+ if (srq)
+ atomic_inc(&srq->refcount);
+ xa_unlock(&srq_table->xa);
+
+ if (!srq) {
+ dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn);
+ return;
+ }
+
+ srq->event(srq, event_type);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+}
+EXPORT_SYMBOL_GPL(hns_roce_srq_event);
+
+static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
+ enum hns_roce_event event_type)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq *ibsrq = &srq->ibsrq;
+ struct ib_event event;
+
+ if (ibsrq->event_handler) {
+ event.device = ibsrq->device;
+ event.element.srq = ibsrq;
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+ event_type, srq->srqn);
+ return;
+ }
+
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+ }
+}
+
+static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long srq_num)
+{
+ return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0,
+ HNS_ROCE_CMD_SW2HW_SRQ,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+}
+
+static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long srq_num)
+{
+ return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
+ mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+}
+
+int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd,
+ struct hns_roce_mtt *hr_mtt, u64 db_rec_addr,
+ struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_cmd_mailbox *mailbox;
+ dma_addr_t dma_handle_wqe;
+ dma_addr_t dma_handle_idx;
+ u64 *mtts_wqe;
+ u64 *mtts_idx;
+ int ret;
+
+ /* Get the physical address of srq buf */
+ mtts_wqe = hns_roce_table_find(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table,
+ srq->mtt.first_seg,
+ &dma_handle_wqe);
+ if (!mtts_wqe) {
+ dev_err(hr_dev->dev,
+ "SRQ alloc.Failed to find srq buf addr.\n");
+ return -EINVAL;
+ }
+
+ /* Get physical address of idx que buf */
+ mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table,
+ srq->idx_que.mtt.first_seg,
+ &dma_handle_idx);
+ if (!mtts_idx) {
+ dev_err(hr_dev->dev,
+ "SRQ alloc.Failed to find idx que buf addr.\n");
+ return -EINVAL;
+ }
+
+ ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
+ if (ret == -1) {
+ dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n");
+ return -ENOMEM;
+ }
+
+ ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
+ if (ret)
+ goto err_out;
+
+ ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ if (ret)
+ goto err_put;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto err_xa;
+ }
+
+ hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf,
+ mtts_wqe, mtts_idx, dma_handle_wqe,
+ dma_handle_idx);
+
+ ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret)
+ goto err_xa;
+
+ atomic_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+ return ret;
+
+err_xa:
+ xa_erase(&srq_table->xa, srq->srqn);
+
+err_put:
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+
+err_out:
+ hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+ return ret;
+}
+
+void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ int ret;
+
+ ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn);
+ if (ret)
+ dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n",
+ ret, srq->srqn);
+
+ xa_erase(&srq_table->xa, srq->srqn);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
+
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+ hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+}
+
+static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
+ u32 page_shift)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ u32 bitmap_num;
+ int i;
+
+ bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64));
+
+ idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL);
+ if (!idx_que->bitmap)
+ return -ENOMEM;
+
+ bitmap_num = bitmap_num / (8 * sizeof(u64));
+
+ idx_que->buf_size = srq->idx_que.buf_size;
+
+ if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2,
+ &idx_que->idx_buf, page_shift)) {
+ kfree(idx_que->bitmap);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < bitmap_num; i++)
+ idx_que->bitmap[i] = ~(0UL);
+
+ return 0;
+}
+
+struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_srq *srq;
+ int srq_desc_size;
+ int srq_buf_size;
+ u32 page_shift;
+ int ret = 0;
+ u32 npages;
+ u32 cqn;
+
+ /* Check the actual SRQ wqe and SRQ sge num */
+ if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
+ srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
+ return ERR_PTR(-EINVAL);
+
+ srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+
+ srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
+ srq->max_gs = srq_init_attr->attr.max_sge;
+
+ srq_desc_size = max(16, 16 * srq->max_gs);
+
+ srq->wqe_shift = ilog2(srq_desc_size);
+
+ srq_buf_size = srq->max * srq_desc_size;
+
+ srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ;
+ srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz;
+ srq->mtt.mtt_type = MTT_TYPE_SRQWQE;
+ srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
+
+ if (udata) {
+ struct hns_roce_ib_create_srq ucmd;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ret = -EFAULT;
+ goto err_srq;
+ }
+
+ srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ srq_buf_size, 0, 0);
+ if (IS_ERR(srq->umem)) {
+ ret = PTR_ERR(srq->umem);
+ goto err_srq;
+ }
+
+ if (hr_dev->caps.srqwqe_buf_pg_sz) {
+ npages = (ib_umem_page_count(srq->umem) +
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz);
+ page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, npages,
+ page_shift,
+ &srq->mtt);
+ } else
+ ret = hns_roce_mtt_init(hr_dev,
+ ib_umem_page_count(srq->umem),
+ srq->umem->page_shift,
+ &srq->mtt);
+ if (ret)
+ goto err_buf;
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
+ if (ret)
+ goto err_srq_mtt;
+
+ /* config index queue BA */
+ srq->idx_que.umem = ib_umem_get(pd->uobject->context,
+ ucmd.que_addr,
+ srq->idx_que.buf_size, 0, 0);
+ if (IS_ERR(srq->idx_que.umem)) {
+ dev_err(hr_dev->dev,
+ "ib_umem_get error for index queue\n");
+ goto err_srq_mtt;
+ }
+
+ if (hr_dev->caps.idx_buf_pg_sz) {
+ npages = (ib_umem_page_count(srq->idx_que.umem) +
+ (1 << hr_dev->caps.idx_buf_pg_sz) - 1) /
+ (1 << hr_dev->caps.idx_buf_pg_sz);
+ page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, npages,
+ page_shift, &srq->idx_que.mtt);
+ } else {
+ ret = hns_roce_mtt_init(hr_dev,
+ ib_umem_page_count(srq->idx_que.umem),
+ srq->idx_que.umem->page_shift,
+ &srq->idx_que.mtt);
+ }
+
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "hns_roce_mtt_init error for idx que\n");
+ goto err_idx_mtt;
+ }
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
+ srq->idx_que.umem);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "hns_roce_ib_umem_write_mtt error for idx que\n");
+ goto err_idx_buf;
+ }
+ } else {
+ page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ if (hns_roce_buf_alloc(hr_dev, srq_buf_size,
+ (1 << page_shift) * 2,
+ &srq->buf, page_shift)) {
+ ret = -ENOMEM;
+ goto err_srq;
+ }
+
+ srq->head = 0;
+ srq->tail = srq->max - 1;
+
+ ret = hns_roce_mtt_init(hr_dev, srq->buf.npages,
+ srq->buf.page_shift, &srq->mtt);
+ if (ret)
+ goto err_buf;
+
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
+ if (ret)
+ goto err_srq_mtt;
+
+ page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_create_idx_que(pd, srq, page_shift);
+ if (ret) {
+ dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n",
+ ret);
+ goto err_srq_mtt;
+ }
+
+ /* Init mtt table for idx_que */
+ ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
+ srq->idx_que.idx_buf.page_shift,
+ &srq->idx_que.mtt);
+ if (ret)
+ goto err_create_idx;
+
+ /* Write buffer address into the mtt table */
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
+ &srq->idx_que.idx_buf);
+ if (ret)
+ goto err_idx_buf;
+
+ srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
+ if (!srq->wrid) {
+ ret = -ENOMEM;
+ goto err_idx_buf;
+ }
+ }
+
+ cqn = ib_srq_has_cq(srq_init_attr->srq_type) ?
+ to_hr_cq(srq_init_attr->ext.cq)->cqn : 0;
+
+ srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
+
+ ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0,
+ &srq->mtt, 0, srq);
+ if (ret)
+ goto err_wrid;
+
+ srq->event = hns_roce_ib_srq_event;
+ srq->ibsrq.ext.xrc.srq_num = srq->srqn;
+
+ if (pd->uobject) {
+ if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
+ ret = -EFAULT;
+ goto err_wrid;
+ }
+ }
+
+ return &srq->ibsrq;
+
+err_wrid:
+ kvfree(srq->wrid);
+
+err_idx_buf:
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_idx_mtt:
+ if (udata)
+ ib_umem_release(srq->idx_que.umem);
+
+err_create_idx:
+ hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
+ &srq->idx_que.idx_buf);
+ kfree(srq->idx_que.bitmap);
+
+err_srq_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_buf:
+ if (udata)
+ ib_umem_release(srq->umem);
+ else
+ hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+
+err_srq:
+ kfree(srq);
+ return ERR_PTR(ret);
+}
+
+int hns_roce_destroy_srq(struct ib_srq *ibsrq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+
+ hns_roce_srq_free(hr_dev, srq);
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+ if (ibsrq->uobject) {
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+ ib_umem_release(srq->idx_que.umem);
+ ib_umem_release(srq->umem);
+ } else {
+ kvfree(srq->wrid);
+ hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift,
+ &srq->buf);
+ }
+
+ kfree(srq);
+
+ return 0;
+}
+
+int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+
+ xa_init(&srq_table->xa);
+
+ return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs,
+ hr_dev->caps.num_srqs - 1,
+ hr_dev->caps.reserved_srqs, 0);
+}
+
+void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev)
+{
+ hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 102875872bea..a773d1edf7fd 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2722,25 +2722,6 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
}
/**
- * i40iw_get_vector_affinity - report IRQ affinity mask
- * @ibdev: IB device
- * @comp_vector: completion vector index
- */
-static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev,
- int comp_vector)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_msix_vector *msix_vec;
-
- if (iwdev->msix_shared)
- msix_vec = &iwdev->iw_msixtbl[comp_vector];
- else
- msix_vec = &iwdev->iw_msixtbl[comp_vector + 1];
-
- return irq_get_affinity_mask(msix_vec->irq);
-}
-
-/**
* i40iw_init_rdma_device - initialization of iwarp device
* @iwdev: iwarp device
*/
@@ -2832,7 +2813,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
iwibdev->ibdev.post_send = i40iw_post_send;
iwibdev->ibdev.post_recv = i40iw_post_recv;
- iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity;
return iwibdev;
}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0def2323459c..b73b5fa1822a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2352,6 +2352,32 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
event == NETDEV_UP || event == NETDEV_CHANGE))
update_qps_port = port;
+ if (dev == iboe->netdevs[port - 1] &&
+ (event == NETDEV_UP || event == NETDEV_DOWN)) {
+ enum ib_port_state port_state;
+ struct ib_event ibev = { };
+
+ if (ib_get_cached_port_state(&ibdev->ib_dev, port,
+ &port_state))
+ continue;
+
+ if (event == NETDEV_UP &&
+ (port_state != IB_PORT_ACTIVE ||
+ iboe->last_port_state[port - 1] != IB_PORT_DOWN))
+ continue;
+ if (event == NETDEV_DOWN &&
+ (port_state != IB_PORT_DOWN ||
+ iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
+ continue;
+ iboe->last_port_state[port - 1] = port_state;
+
+ ibev.device = &ibdev->ib_dev;
+ ibev.element.port_num = port;
+ ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
+ IB_EVENT_PORT_ERR;
+ ib_dispatch_event(&ibev);
+ }
+
}
spin_unlock_bh(&iboe->lock);
@@ -2710,6 +2736,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
for (i = 0; i < ibdev->num_ports; ++i) {
mutex_init(&ibdev->counters_table[i].mutex);
INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+ iboe->last_port_state[i] = IB_PORT_DOWN;
}
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 8850dfc3826d..5cb52424912e 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -519,6 +519,7 @@ struct mlx4_ib_iboe {
atomic64_t mac[MLX4_MAX_PORTS];
struct notifier_block nb;
struct mlx4_port_gid_table gids[MLX4_MAX_PORTS];
+ enum ib_port_state last_port_state[MLX4_MAX_PORTS];
};
struct pkey_mgt {
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 752bdd536130..ea1f3a081b05 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -353,16 +353,12 @@ err:
static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
{
- char base_name[9];
-
- /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
- strlcpy(name, pci_name(dev->dev->persist->pdev), max);
- strncpy(base_name, name, 8); /*till xxxx:yy:*/
- base_name[8] = '\0';
- /* with no ARI only 3 last bits are used so when the fn is higher than 8
+ /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n
+ * with no ARI only 3 last bits are used so when the fn is higher than 8
* need to add it to the dev num, so count in the last number will be
* modulo 8 */
- sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
+ snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev),
+ i / 8, i % 8);
}
struct mlx4_port {
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index b8e4b15e2674..33f5adb14e4e 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,6 +1,8 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
+ srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
+ cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 7d769b5538b4..0b99f7d0630d 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -35,6 +35,7 @@
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
#include "mlx5_ib.h"
+#include "srq.h"
static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
{
@@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_core_srq *msrq = NULL;
if (qp->ibqp.xrcd) {
- msrq = mlx5_core_get_srq(dev->mdev,
- be32_to_cpu(cqe->srqn));
+ msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
srq = to_mibsrq(msrq);
} else {
srq = to_msrq(qp->ibqp.srq);
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 61aab7c0c513..5271469aad10 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -9,6 +9,7 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/ib_umem.h>
+#include <rdma/uverbs_std_types.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
@@ -40,29 +41,37 @@ struct devx_umem_reg_cmd {
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
};
-static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
+static struct mlx5_ib_ucontext *
+devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
{
- return to_mucontext(ib_uverbs_get_ucontext(file));
+ return to_mucontext(ib_uverbs_get_ucontext(attrs));
}
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
{
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
u64 general_obj_types;
- void *hdr;
+ void *hdr, *uctx;
int err;
u16 uid;
+ u32 cap = 0;
hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
+ uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types);
if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) ||
!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
return -EINVAL;
+ if (is_user && capable(CAP_NET_RAW) &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
+ cap |= MLX5_UCTX_CAP_RAW_TX;
+
MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
+ MLX5_SET(uctx, uctx, cap, cap);
err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
if (err)
@@ -106,6 +115,21 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
}
}
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+ *counter_id = MLX5_GET(dealloc_flow_counter_in,
+ devx_obj->dinbox,
+ flow_counter_id);
+ return true;
+ }
+
+ return false;
+}
+
/*
* As the obj_id in the firmware is not globally unique the object type
* must be considered upon checking for a valid object id.
@@ -116,7 +140,7 @@ static u64 get_enc_obj_id(u16 opcode, u32 obj_id)
return ((u64)opcode << 32) | obj_id;
}
-static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
+static u64 devx_get_obj_id(const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
u64 obj_id;
@@ -290,6 +314,8 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
MLX5_GET(query_dct_in, in, dctn));
break;
case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
MLX5_GET(query_xrq_in, in, xrqn));
break;
@@ -316,17 +342,107 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
MLX5_GET(drain_dct_in, in, dctn));
break;
case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
MLX5_GET(arm_xrq_in, in, xrqn));
break;
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+ obj_id = get_enc_obj_id
+ (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
+ MLX5_GET(query_packet_reformat_context_in,
+ in, packet_reformat_id));
+ break;
default:
+ obj_id = 0;
+ }
+
+ return obj_id;
+}
+
+static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in)
+{
+ u64 obj_id = devx_get_obj_id(in);
+
+ if (!obj_id)
return false;
+
+ switch (uobj_get_object_id(uobj)) {
+ case UVERBS_OBJECT_CQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ to_mcq(uobj->object)->mcq.cqn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_SRQ:
+ {
+ struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
+ struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+ u16 opcode;
+
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
+ break;
+ case MLX5_RES_XRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRQ;
+ break;
+ default:
+ if (!dev->mdev->issi)
+ opcode = MLX5_CMD_OP_CREATE_SRQ;
+ else
+ opcode = MLX5_CMD_OP_CREATE_RMP;
+ }
+
+ return get_enc_obj_id(opcode,
+ to_msrq(uobj->object)->msrq.srqn) ==
+ obj_id;
}
- if (obj_id == obj->obj_id)
- return true;
+ case UVERBS_OBJECT_QP:
+ {
+ struct mlx5_ib_qp *qp = to_mqp(uobj->object);
+ enum ib_qp_type qp_type = qp->ibqp.qp_type;
+
+ if (qp_type == IB_QPT_RAW_PACKET ||
+ (qp->flags & MLX5_IB_QP_UNDERLAY)) {
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp =
+ &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+
+ return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ rq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ sq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ rq->tirn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ sq->tisn) == obj_id);
+ }
+
+ if (qp_type == MLX5_IB_QPT_DCT)
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ qp->dct.mdct.mqp.qpn) == obj_id;
+
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ qp->ibqp.qp_num) == obj_id;
+ }
- return false;
+ case UVERBS_OBJECT_WQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ to_mrwq(uobj->object)->core_qp.qpn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_RWQ_IND_TBL:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ to_mrwq_ind_table(uobj->object)->rqtn) ==
+ obj_id;
+
+ case MLX5_IB_OBJECT_DEVX_OBJ:
+ return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
+
+ default:
+ return false;
+ }
}
static void devx_set_umem_valid(const void *in)
@@ -494,6 +610,7 @@ static bool devx_is_obj_modify_cmd(const void *in)
case MLX5_CMD_OP_DRAIN_DCT:
case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
return true;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
{
@@ -535,6 +652,9 @@ static bool devx_is_obj_query_cmd(const void *in)
case MLX5_CMD_OP_QUERY_XRC_SRQ:
case MLX5_CMD_OP_QUERY_DCT:
case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
return true;
default:
return false;
@@ -572,15 +692,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
if (!c->devx_uid)
return -EINVAL;
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
return c->devx_uid;
}
static bool devx_is_general_cmd(void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+ if (opcode >= MLX5_CMD_OP_GENERAL_START &&
+ opcode < MLX5_CMD_OP_GENERAL_END)
+ return true;
+
switch (opcode) {
case MLX5_CMD_OP_QUERY_HCA_CAP:
case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -603,7 +724,7 @@ static bool devx_is_general_cmd(void *in)
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *c;
struct mlx5_ib_dev *dev;
@@ -616,7 +737,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
return -EFAULT;
- c = devx_ufile2uctx(file);
+ c = devx_ufile2uctx(attrs);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
@@ -653,14 +774,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
* queue or arm its CQ for event generation), no further harm is expected.
*/
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *c;
struct mlx5_ib_dev *dev;
u32 user_idx;
s32 dev_idx;
- c = devx_ufile2uctx(file);
+ c = devx_ufile2uctx(attrs);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
@@ -681,7 +802,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *c;
struct mlx5_ib_dev *dev;
@@ -693,7 +814,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
int err;
int uid;
- c = devx_ufile2uctx(file);
+ c = devx_ufile2uctx(attrs);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
@@ -908,7 +1029,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -970,7 +1091,7 @@ obj_free:
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -978,7 +1099,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
- struct devx_obj *obj = uobj->object;
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
void *cmd_out;
int err;
int uid;
@@ -990,7 +1111,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
if (!devx_is_obj_modify_cmd(cmd_in))
return -EINVAL;
- if (!devx_is_valid_obj_id(obj, cmd_in))
+ if (!devx_is_valid_obj_id(uobj, cmd_in))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1000,7 +1121,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
devx_set_umem_valid(cmd_in);
- err = mlx5_cmd_exec(obj->mdev, cmd_in,
+ err = mlx5_cmd_exec(mdev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
cmd_out, cmd_out_len);
if (err)
@@ -1011,7 +1132,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -1019,10 +1140,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
- struct devx_obj *obj = uobj->object;
void *cmd_out;
int err;
int uid;
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
@@ -1031,7 +1152,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
if (!devx_is_obj_query_cmd(cmd_in))
return -EINVAL;
- if (!devx_is_valid_obj_id(obj, cmd_in))
+ if (!devx_is_valid_obj_id(uobj, cmd_in))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1039,7 +1160,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
return PTR_ERR(cmd_out);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
- err = mlx5_cmd_exec(obj->mdev, cmd_in,
+ err = mlx5_cmd_exec(mdev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
cmd_out, cmd_out_len);
if (err)
@@ -1125,7 +1246,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct devx_umem_reg_cmd cmd;
struct devx_umem *obj;
@@ -1139,9 +1260,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
if (!c->devx_uid)
return -EINVAL;
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
if (!obj)
return -ENOMEM;
@@ -1277,7 +1395,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
- MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_IDR_ANY_OBJECT,
UVERBS_ACCESS_WRITE,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
@@ -1293,7 +1411,7 @@ DECLARE_UVERBS_NAMED_METHOD(
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_QUERY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
- MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_IDR_ANY_OBJECT,
UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
@@ -1323,12 +1441,23 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
-DECLARE_UVERBS_OBJECT_TREE(devx_objects,
- &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
- &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
- &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
-
-const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void)
+static bool devx_is_supported(struct ib_device *device)
{
- return &devx_objects;
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return !dev->rep && MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_UCTX;
}
+
+const struct uapi_definition mlx5_ib_devx_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index f86cdcafdafc..b73756bd38bd 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -60,7 +60,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_ib_flow_handler *flow_handler;
@@ -77,6 +77,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
int len, ret, i;
+ u32 counter_id = 0;
if (!capable(CAP_NET_RAW))
return -EPERM;
@@ -92,10 +93,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
((dest_devx && dest_qp) || (!dest_devx && !dest_qp)))
return -EINVAL;
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS &&
- (dest_devx || dest_qp))
- return -EINVAL;
-
if (dest_devx) {
devx_obj = uverbs_attr_get_obj(
attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
@@ -128,9 +125,22 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
+ if (len) {
+ devx_obj = arr_flow_actions[0]->object;
+
+ if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id))
+ return -EINVAL;
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ }
if (dev->rep)
return -ENOTSUPP;
+ if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+ return -EINVAL;
+
cmd_in = uverbs_attr_get_alloced_ptr(
attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
inlen = uverbs_attr_get_len(attrs,
@@ -164,6 +174,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
}
flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+ counter_id,
cmd_in, inlen,
dest_id, dest_type);
if (IS_ERR(flow_handler)) {
@@ -194,7 +205,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
@@ -313,7 +324,6 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
- struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
@@ -435,7 +445,6 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx(
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
- struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
@@ -526,7 +535,11 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
UVERBS_ATTR_TYPE(u32),
- UA_OPTIONAL));
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ, 1, 1,
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_DESTROY_FLOW,
@@ -610,16 +623,10 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
-DECLARE_UVERBS_OBJECT_TREE(flow_objects,
- &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER));
-
-int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
-{
- int i = 0;
-
- root[i++] = &flow_objects;
- root[i++] = &mlx5_ib_fs;
- root[i++] = &mlx5_ib_flow_actions;
-
- return i;
-}
+const struct uapi_definition mlx5_ib_flow_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_FLOW_MATCHER),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW, &mlx5_ib_fs),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_actions),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 584ff2ea7810..8a682d86d634 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -4,6 +4,7 @@
*/
#include "ib_rep.h"
+#include "srq.h"
static const struct mlx5_ib_profile rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
@@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 32a9e9228b13..a2735f246d5c 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -568,6 +568,10 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
+ props->port_cap_flags2 =
+ be16_to_cpup((__be16 *)(out_mad->data + 60));
+
/* Check if extended speeds (EDR/FDR/...) are supported */
if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
ext_active_speed = out_mad->data[62] >> 4;
@@ -579,6 +583,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
case 2:
props->active_speed = 32; /* EDR */
break;
+ case 4:
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP)
+ props->active_speed = IB_SPEED_HDR;
+ break;
}
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 3569fda07e07..5edd8d5f151f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -60,6 +60,7 @@
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
+#include "srq.h"
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
@@ -82,10 +83,13 @@ static char mlx5_version[] =
struct mlx5_ib_event_work {
struct work_struct work;
- struct mlx5_core_dev *dev;
- void *context;
- enum mlx5_dev_event event;
- unsigned long param;
+ union {
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_multiport_info *mpi;
+ };
+ bool is_slave;
+ unsigned int event;
+ void *param;
};
enum {
@@ -1014,6 +1018,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, cqe_128_always))
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
+ if (MLX5_CAP_GEN(mdev, qp_packet_based))
+ resp.flags |=
+ MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
}
if (field_avail(typeof(resp), sw_parsing_caps,
@@ -1216,6 +1223,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
props->subnet_timeout = rep->subnet_timeout;
props->init_type_reply = rep->init_type_reply;
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
+ props->port_cap_flags2 = rep->cap_mask2;
+
err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
if (err)
goto out;
@@ -1752,7 +1762,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
#endif
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- err = mlx5_ib_devx_create(dev);
+ err = mlx5_ib_devx_create(dev, true);
if (err < 0)
goto out_uars;
context->devx_uid = err;
@@ -3706,7 +3716,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_flow_act *flow_act,
- void *cmd_in, int inlen)
+ void *cmd_in, int inlen,
+ int dst_num)
{
struct mlx5_ib_flow_handler *handler;
struct mlx5_flow_spec *spec;
@@ -3728,7 +3739,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
handler->rule = mlx5_add_flow_rules(ft, spec,
- flow_act, dst, 1);
+ flow_act, dst, dst_num);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -3791,12 +3802,14 @@ struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_flow_act *flow_act,
+ u32 counter_id,
void *cmd_in, int inlen, int dest_id,
int dest_type)
{
struct mlx5_flow_destination *dst;
struct mlx5_ib_flow_prio *ft_prio;
struct mlx5_ib_flow_handler *handler;
+ int dst_num = 0;
bool mcast;
int err;
@@ -3806,7 +3819,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL);
if (!dst)
return ERR_PTR(-ENOMEM);
@@ -3820,20 +3833,28 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
}
if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
- dst->type = dest_type;
- dst->tir_num = dest_id;
+ dst[dst_num].type = dest_type;
+ dst[dst_num].tir_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
} else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
- dst->ft_num = dest_id;
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+ dst[dst_num].ft_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
} else {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
}
+ dst_num++;
+
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst[dst_num].counter_id = counter_id;
+ dst_num++;
+ }
+
handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
- cmd_in, inlen);
+ cmd_in, inlen, dst_num);
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
@@ -4226,6 +4247,63 @@ static void delay_drop_handler(struct work_struct *work)
mutex_unlock(&delay_drop->lock);
}
+static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+ struct ib_event *ibev)
+{
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
+ break;
+ default: /* do nothing */
+ return;
+ }
+}
+
+static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+ struct ib_event *ibev)
+{
+ u8 port = (eqe->data.port.port >> 4) & 0xf;
+
+ ibev->element.port_num = port;
+
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+ /* In RoCE, port up/down events are handled in
+ * mlx5_netdev_event().
+ */
+ if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
+ IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
+
+ ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_LID:
+ ibev->event = IB_EVENT_LID_CHANGE;
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+ ibev->event = IB_EVENT_PKEY_CHANGE;
+ schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+ ibev->event = IB_EVENT_GID_CHANGE;
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+ ibev->event = IB_EVENT_CLIENT_REREGISTER;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static void mlx5_ib_handle_event(struct work_struct *_work)
{
struct mlx5_ib_event_work *work =
@@ -4233,65 +4311,37 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
struct mlx5_ib_dev *ibdev;
struct ib_event ibev;
bool fatal = false;
- u8 port = (u8)work->param;
- if (mlx5_core_is_mp_slave(work->dev)) {
- ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
+ if (work->is_slave) {
+ ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
if (!ibdev)
goto out;
} else {
- ibdev = work->context;
+ ibdev = work->dev;
}
switch (work->event) {
case MLX5_DEV_EVENT_SYS_ERROR:
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
+ ibev.element.port_num = (u8)(unsigned long)work->param;
fatal = true;
break;
-
- case MLX5_DEV_EVENT_PORT_UP:
- case MLX5_DEV_EVENT_PORT_DOWN:
- case MLX5_DEV_EVENT_PORT_INITIALIZED:
- /* In RoCE, port up/down events are handled in
- * mlx5_netdev_event().
- */
- if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
- IB_LINK_LAYER_ETHERNET)
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ if (handle_port_change(ibdev, work->param, &ibev))
goto out;
-
- ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
- break;
-
- case MLX5_DEV_EVENT_LID_CHANGE:
- ibev.event = IB_EVENT_LID_CHANGE;
- break;
-
- case MLX5_DEV_EVENT_PKEY_CHANGE:
- ibev.event = IB_EVENT_PKEY_CHANGE;
- schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
- break;
-
- case MLX5_DEV_EVENT_GUID_CHANGE:
- ibev.event = IB_EVENT_GID_CHANGE;
- break;
-
- case MLX5_DEV_EVENT_CLIENT_REREG:
- ibev.event = IB_EVENT_CLIENT_REREGISTER;
break;
- case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
- schedule_work(&ibdev->delay_drop.delay_drop_work);
- goto out;
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ handle_general_event(ibdev, work->param, &ibev);
+ /* fall through */
default:
goto out;
}
- ibev.device = &ibdev->ib_dev;
- ibev.element.port_num = port;
+ ibev.device = &ibdev->ib_dev;
- if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
- mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
+ if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
+ mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num);
goto out;
}
@@ -4304,22 +4354,43 @@ out:
kfree(work);
}
-static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
- enum mlx5_dev_event event, unsigned long param)
+static int mlx5_ib_event(struct notifier_block *nb,
+ unsigned long event, void *param)
{
struct mlx5_ib_event_work *work;
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
- return;
+ return NOTIFY_DONE;
INIT_WORK(&work->work, mlx5_ib_handle_event);
- work->dev = dev;
+ work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events);
+ work->is_slave = false;
work->param = param;
- work->context = context;
work->event = event;
queue_work(mlx5_ib_event_wq, &work->work);
+
+ return NOTIFY_OK;
+}
+
+static int mlx5_ib_event_slave_port(struct notifier_block *nb,
+ unsigned long event, void *param)
+{
+ struct mlx5_ib_event_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return NOTIFY_DONE;
+
+ INIT_WORK(&work->work, mlx5_ib_handle_event);
+ work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events);
+ work->is_slave = true;
+ work->param = param;
+ work->event = event;
+ queue_work(mlx5_ib_event_wq, &work->work);
+
+ return NOTIFY_OK;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -5325,14 +5396,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev)
mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
}
-static const struct cpumask *
-mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
- return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
-}
-
/* The mlx5_ib_multiport_mutex should be held when calling this function */
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
@@ -5350,6 +5413,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
spin_unlock(&port->mp.mpi_lock);
return;
}
+
+ if (mpi->mdev_events.notifier_call)
+ mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
+ mpi->mdev_events.notifier_call = NULL;
+
mpi->ibdev = NULL;
spin_unlock(&port->mp.mpi_lock);
@@ -5405,6 +5473,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
ibdev->port[port_num].mp.mpi = mpi;
mpi->ibdev = ibdev;
+ mpi->mdev_events.notifier_call = NULL;
spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
@@ -5422,6 +5491,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
goto unbind;
}
+ mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
+ mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
+
err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
if (err)
goto unbind;
@@ -5551,30 +5623,17 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
enum mlx5_ib_uapi_flow_action_flags));
-static int populate_specs_root(struct mlx5_ib_dev *dev)
-{
- const struct uverbs_object_tree_def **trees = dev->driver_trees;
- size_t num_trees = 0;
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- trees[num_trees++] = &mlx5_ib_flow_action;
-
- if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
- trees[num_trees++] = &mlx5_ib_dm;
-
- if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
- MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
- trees[num_trees++] = mlx5_ib_get_devx_tree();
-
- num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
-
- WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
- trees[num_trees] = NULL;
- dev->ib_dev.driver_specs = trees;
+static const struct uapi_definition mlx5_ib_defs[] = {
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+ UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
+#endif
- return 0;
-}
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_action),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ {}
+};
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
@@ -5694,8 +5753,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->ib_dev.phys_port_cnt = dev->num_ports;
- dev->ib_dev.num_comp_vectors =
- dev->mdev->priv.eq_table.num_comp_vectors;
+ dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
dev->ib_dev.dev.parent = &mdev->pdev->dev;
mutex_init(&dev->cap_mask_mutex);
@@ -5838,7 +5896,6 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
- dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params;
@@ -5881,14 +5938,22 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
- dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE) {
+ dev->ib_dev.create_flow_action_esp =
+ mlx5_ib_create_flow_action_esp;
+ dev->ib_dev.modify_flow_action_esp =
+ mlx5_ib_modify_flow_action_esp;
+ }
dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
- dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
dev->ib_dev.create_counters = mlx5_ib_create_counters;
dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
dev->ib_dev.read_counters = mlx5_ib_read_counters;
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+ dev->ib_dev.driver_def = mlx5_ib_defs;
+
err = init_node_data(dev);
if (err)
return err;
@@ -6034,6 +6099,11 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
return mlx5_ib_odp_init_one(dev);
}
+void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_odp_cleanup_one(dev);
+}
+
int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
@@ -6096,11 +6166,6 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
-static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
-{
- return populate_specs_root(dev);
-}
-
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
const char *name;
@@ -6152,6 +6217,34 @@ static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
mlx5_ib_unregister_vport_reps(dev);
}
+static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
+{
+ dev->mdev_events.notifier_call = mlx5_ib_event;
+ mlx5_notifier_register(dev->mdev, &dev->mdev_events);
+ return 0;
+}
+
+static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
+}
+
+static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
+{
+ int uid;
+
+ uid = mlx5_ib_devx_create(dev, false);
+ if (uid > 0)
+ dev->devx_whitelist_uid = uid;
+
+ return 0;
+}
+static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (dev->devx_whitelist_uid)
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
+}
+
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
@@ -6163,8 +6256,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
profile->stage[stage].cleanup(dev);
}
- if (dev->devx_whitelist_uid)
- mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
ib_dealloc_device((struct ib_device *)dev);
}
@@ -6173,7 +6264,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
{
int err;
int i;
- int uid;
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
@@ -6183,10 +6273,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
}
}
- uid = mlx5_ib_devx_create(dev);
- if (uid > 0)
- dev->devx_whitelist_uid = uid;
-
dev->profile = profile;
dev->ib_active = true;
@@ -6214,12 +6300,18 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_roce_init,
mlx5_ib_stage_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
mlx5_ib_stage_odp_init,
- NULL),
+ mlx5_ib_stage_odp_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
@@ -6235,9 +6327,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
- mlx5_ib_stage_populate_specs,
- NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
+ mlx5_ib_stage_devx_init,
+ mlx5_ib_stage_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -6265,9 +6357,15 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
@@ -6280,9 +6378,6 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
- mlx5_ib_stage_populate_specs,
- NULL),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -6388,10 +6483,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
- .event = mlx5_ib_event,
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- .pfault = mlx5_ib_pfault,
-#endif
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b651a7a6fde9..7145f512f948 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -41,7 +41,6 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
#include <linux/mlx5/fs.h>
#include <linux/types.h>
#include <linux/mlx5/transobj.h>
@@ -50,6 +49,8 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include "srq.h"
+
#define mlx5_ib_dbg(_dev, format, arg...) \
dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
__LINE__, current->pid, ##arg)
@@ -257,6 +258,7 @@ enum mlx5_ib_rq_flags {
};
struct mlx5_ib_wq {
+ struct mlx5_frag_buf_ctrl fbc;
u64 *wrid;
u32 *wr_data;
struct wr_list *w_list;
@@ -275,7 +277,7 @@ struct mlx5_ib_wq {
unsigned tail;
u16 cur_post;
u16 last_poll;
- void *qend;
+ void *cur_edge;
};
enum mlx5_ib_wq_flags {
@@ -460,6 +462,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_UNDERLAY = 1 << 10,
MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11,
MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12,
+ MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13,
};
struct mlx5_umr_wr {
@@ -523,6 +526,7 @@ struct mlx5_ib_srq {
struct mlx5_core_srq msrq;
struct mlx5_frag_buf buf;
struct mlx5_db db;
+ struct mlx5_frag_buf_ctrl fbc;
u64 *wrid;
/* protect SRQ hanlding
*/
@@ -540,7 +544,6 @@ struct mlx5_ib_srq {
struct mlx5_ib_xrcd {
struct ib_xrcd ibxrcd;
u32 xrcdn;
- u16 uid;
};
enum mlx5_ib_mtt_access_flags {
@@ -774,14 +777,16 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_CAPS,
MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
+ MLX5_IB_STAGE_SRQ,
MLX5_IB_STAGE_DEVICE_RESOURCES,
+ MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_COUNTERS,
MLX5_IB_STAGE_CONG_DEBUGFS,
MLX5_IB_STAGE_UAR,
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
- MLX5_IB_STAGE_SPECS,
+ MLX5_IB_STAGE_WHITELIST_UID,
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
@@ -806,6 +811,7 @@ struct mlx5_ib_multiport_info {
struct list_head list;
struct mlx5_ib_dev *ibdev;
struct mlx5_core_dev *mdev;
+ struct notifier_block mdev_events;
struct completion unref_comp;
u64 sys_image_guid;
u32 mdev_refcnt;
@@ -880,10 +886,19 @@ struct mlx5_ib_lb_state {
bool enabled;
};
+struct mlx5_ib_pf_eq {
+ struct mlx5_ib_dev *dev;
+ struct mlx5_eq *core;
+ struct work_struct work;
+ spinlock_t lock; /* Pagefaults spinlock */
+ struct workqueue_struct *wq;
+ mempool_t *pool;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
- const struct uverbs_object_tree_def *driver_trees[7];
struct mlx5_core_dev *mdev;
+ struct notifier_block mdev_events;
struct mlx5_roce roce[MLX5_MAX_PORTS];
int num_ports;
/* serialize update of capability mask
@@ -902,6 +917,8 @@ struct mlx5_ib_dev {
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
u64 odp_max_size;
+ struct mlx5_ib_pf_eq odp_pf_eq;
+
/*
* Sleepable RCU that prevents destruction of MRs while they are still
* being used by a page fault handler.
@@ -927,6 +944,7 @@ struct mlx5_ib_dev {
u64 sys_image_guid;
struct mlx5_memic memic;
u16 devx_whitelist_uid;
+ struct mlx5_srq_table srq_table;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1158,9 +1176,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
-void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
- struct mlx5_pagefault *pfault);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
@@ -1175,6 +1192,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
@@ -1250,32 +1268,29 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
+extern const struct uapi_definition mlx5_ib_devx_defs[];
+extern const struct uapi_definition mlx5_ib_flow_defs[];
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
- int dest_id, int dest_type);
+ struct mlx5_flow_act *flow_act, u32 counter_id,
+ void *cmd_in, int inlen, int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id);
int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
#else
static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; };
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+ bool is_user) { return -EOPNOTSUPP; }
static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
-static inline const struct uverbs_object_tree_def *
-mlx5_ib_get_devx_tree(void) { return NULL; }
static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
int *dest_type)
{
return false;
}
-static inline int
-mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
-{
- return 0;
-}
static inline void
mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
{
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 2cc3d69ab6f6..4ead8c0fff5a 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -37,6 +37,46 @@
#include "mlx5_ib.h"
#include "cmd.h"
+#include <linux/mlx5/eq.h>
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+ u32 bytes_committed;
+ u32 token;
+ u8 event_subtype;
+ u8 type;
+ union {
+ /* Initiator or send message responder pagefault details. */
+ struct {
+ /* Received packet size, only valid for responders. */
+ u32 packet_size;
+ /*
+ * Number of resource holding WQE, depends on type.
+ */
+ u32 wq_num;
+ /*
+ * WQE index. Refers to either the send queue or
+ * receive queue, according to event_subtype.
+ */
+ u16 wqe_index;
+ } wqe;
+ /* RDMA responder pagefault details */
+ struct {
+ u32 r_key;
+ /*
+ * Received packet size, minimal size page fault
+ * resolution required for forward progress.
+ */
+ u32 packet_size;
+ u32 rdma_op_len;
+ u64 rdma_va;
+ } rdma;
+ };
+
+ struct mlx5_ib_pf_eq *eq;
+ struct work_struct work;
+};
+
#define MAX_PREFETCH_LEN (4*1024*1024U)
/* Timeout in ms to wait for an active mmu notifier to complete when handling
@@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
{
int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
pfault->wqe.wq_num : pfault->token;
- int ret = mlx5_core_page_fault_resume(dev->mdev,
- pfault->token,
- wq_num,
- pfault->type,
- error);
- if (ret)
- mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n",
- wq_num);
+ u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { };
+ u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { };
+ int err;
+
+ MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
+ MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type);
+ MLX5_SET(page_fault_resume_in, in, token, pfault->token);
+ MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
+ MLX5_SET(page_fault_resume_in, in, error, !!error);
+
+ err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n",
+ wq_num, err);
}
static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
@@ -607,8 +653,8 @@ out:
if (!wait_for_completion_timeout(
&odp->notifier_completion,
timeout)) {
- mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
- current_seq, odp->notifiers_seq);
+ mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
+ current_seq, odp->notifiers_seq, odp->notifiers_count);
}
} else {
/* The MR is being killed, kill the QP as well. */
@@ -1026,16 +1072,31 @@ invalid_transport_or_opcode:
return 0;
}
-static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev,
- u32 wq_num)
+static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
+ u32 wq_num, int pf_type)
{
- struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num);
+ enum mlx5_res_type res_type;
- if (!mqp) {
- mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num);
+ switch (pf_type) {
+ case MLX5_WQE_PF_TYPE_RMP:
+ res_type = MLX5_RES_SRQ;
+ break;
+ case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
+ case MLX5_WQE_PF_TYPE_RESP:
+ case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
+ res_type = MLX5_RES_QP;
+ break;
+ default:
return NULL;
}
+ return mlx5_core_res_hold(dev->mdev, wq_num, res_type);
+}
+
+static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
+{
+ struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res;
+
return to_mibqp(mqp);
}
@@ -1049,18 +1110,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
int resume_with_error = 1;
u16 wqe_index = pfault->wqe.wqe_index;
int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
+ struct mlx5_core_rsc_common *res;
struct mlx5_ib_qp *qp;
+ res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type);
+ if (!res) {
+ mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num);
+ return;
+ }
+
+ switch (res->res) {
+ case MLX5_RES_QP:
+ qp = res_to_qp(res);
+ break;
+ default:
+ mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type);
+ goto resolve_page_fault;
+ }
+
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
goto resolve_page_fault;
}
- qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num);
- if (!qp)
- goto resolve_page_fault;
-
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
@@ -1100,6 +1173,7 @@ resolve_page_fault:
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
pfault->wqe.wq_num, resume_with_error,
pfault->type);
+ mlx5_core_res_put(res);
free_page((unsigned long)buffer);
}
@@ -1178,10 +1252,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
}
}
-void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
- struct mlx5_pagefault *pfault)
+static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault)
{
- struct mlx5_ib_dev *dev = context;
u8 event_subtype = pfault->event_subtype;
switch (event_subtype) {
@@ -1198,6 +1270,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
}
}
+static void mlx5_ib_eqe_pf_action(struct work_struct *work)
+{
+ struct mlx5_pagefault *pfault = container_of(work,
+ struct mlx5_pagefault,
+ work);
+ struct mlx5_ib_pf_eq *eq = pfault->eq;
+
+ mlx5_ib_pfault(eq->dev, pfault);
+ mempool_free(pfault, eq->pool);
+}
+
+static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
+{
+ struct mlx5_eqe_page_fault *pf_eqe;
+ struct mlx5_pagefault *pfault;
+ struct mlx5_eqe *eqe;
+ int cc = 0;
+
+ while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) {
+ pfault = mempool_alloc(eq->pool, GFP_ATOMIC);
+ if (!pfault) {
+ schedule_work(&eq->work);
+ break;
+ }
+
+ pf_eqe = &eqe->data.page_fault;
+ pfault->event_subtype = eqe->sub_type;
+ pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
+
+ mlx5_ib_dbg(eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
+ eqe->sub_type, pfault->bytes_committed);
+
+ switch (eqe->sub_type) {
+ case MLX5_PFAULT_SUBTYPE_RDMA:
+ /* RDMA based event */
+ pfault->type =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
+ pfault->token =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) &
+ MLX5_24BIT_MASK;
+ pfault->rdma.r_key =
+ be32_to_cpu(pf_eqe->rdma.r_key);
+ pfault->rdma.packet_size =
+ be16_to_cpu(pf_eqe->rdma.packet_length);
+ pfault->rdma.rdma_op_len =
+ be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+ pfault->rdma.rdma_va =
+ be64_to_cpu(pf_eqe->rdma.rdma_va);
+ mlx5_ib_dbg(eq->dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
+ pfault->type, pfault->token,
+ pfault->rdma.r_key);
+ mlx5_ib_dbg(eq->dev,
+ "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
+ pfault->rdma.rdma_op_len,
+ pfault->rdma.rdma_va);
+ break;
+
+ case MLX5_PFAULT_SUBTYPE_WQE:
+ /* WQE based event */
+ pfault->type =
+ (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
+ pfault->token =
+ be32_to_cpu(pf_eqe->wqe.token);
+ pfault->wqe.wq_num =
+ be32_to_cpu(pf_eqe->wqe.pftype_wq) &
+ MLX5_24BIT_MASK;
+ pfault->wqe.wqe_index =
+ be16_to_cpu(pf_eqe->wqe.wqe_index);
+ pfault->wqe.packet_size =
+ be16_to_cpu(pf_eqe->wqe.packet_length);
+ mlx5_ib_dbg(eq->dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+ pfault->type, pfault->token,
+ pfault->wqe.wq_num,
+ pfault->wqe.wqe_index);
+ break;
+
+ default:
+ mlx5_ib_warn(eq->dev,
+ "Unsupported page fault event sub-type: 0x%02hhx\n",
+ eqe->sub_type);
+ /* Unsupported page faults should still be
+ * resolved by the page fault handler
+ */
+ }
+
+ pfault->eq = eq;
+ INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action);
+ queue_work(eq->wq, &pfault->work);
+
+ cc = mlx5_eq_update_cc(eq->core, ++cc);
+ }
+
+ mlx5_eq_update_ci(eq->core, cc, 1);
+}
+
+static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
+{
+ struct mlx5_ib_pf_eq *eq = eq_ptr;
+ unsigned long flags;
+
+ if (spin_trylock_irqsave(&eq->lock, flags)) {
+ mlx5_ib_eq_pf_process(eq);
+ spin_unlock_irqrestore(&eq->lock, flags);
+ } else {
+ schedule_work(&eq->work);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* mempool_refill() was proposed but unfortunately wasn't accepted
+ * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
+ * Cheap workaround.
+ */
+static void mempool_refill(mempool_t *pool)
+{
+ while (pool->curr_nr < pool->min_nr)
+ mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
+}
+
+static void mlx5_ib_eq_pf_action(struct work_struct *work)
+{
+ struct mlx5_ib_pf_eq *eq =
+ container_of(work, struct mlx5_ib_pf_eq, work);
+
+ mempool_refill(eq->pool);
+
+ spin_lock_irq(&eq->lock);
+ mlx5_ib_eq_pf_process(eq);
+ spin_unlock_irq(&eq->lock);
+}
+
+enum {
+ MLX5_IB_NUM_PF_EQE = 0x1000,
+ MLX5_IB_NUM_PF_DRAIN = 64,
+};
+
+static int
+mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+{
+ struct mlx5_eq_param param = {};
+ int err;
+
+ INIT_WORK(&eq->work, mlx5_ib_eq_pf_action);
+ spin_lock_init(&eq->lock);
+ eq->dev = dev;
+
+ eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN,
+ sizeof(struct mlx5_pagefault));
+ if (!eq->pool)
+ return -ENOMEM;
+
+ eq->wq = alloc_workqueue("mlx5_ib_page_fault",
+ WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM,
+ MLX5_NUM_CMD_EQE);
+ if (!eq->wq) {
+ err = -ENOMEM;
+ goto err_mempool;
+ }
+
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_PFAULT_IDX,
+ .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
+ .nent = MLX5_IB_NUM_PF_EQE,
+ .context = eq,
+ .handler = mlx5_ib_eq_pf_int
+ };
+ eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
+ if (IS_ERR(eq->core)) {
+ err = PTR_ERR(eq->core);
+ goto err_wq;
+ }
+
+ return 0;
+err_wq:
+ destroy_workqueue(eq->wq);
+err_mempool:
+ mempool_destroy(eq->pool);
+ return err;
+}
+
+static int
+mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+{
+ int err;
+
+ err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
+ cancel_work_sync(&eq->work);
+ destroy_workqueue(eq->wq);
+ mempool_destroy(eq->pool);
+
+ return err;
+}
+
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
{
if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
@@ -1226,7 +1495,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
- int ret;
+ int ret = 0;
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
@@ -1236,7 +1505,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
}
}
- return 0;
+ if (!MLX5_CAP_GEN(dev->mdev, pg))
+ return ret;
+
+ ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq);
+
+ return ret;
+}
+
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, pg))
+ return;
+
+ mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq);
}
int mlx5_ib_odp_init(void)
@@ -1246,4 +1528,3 @@ int mlx5_ib_odp_init(void)
return 0;
}
-
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 3747cc681b18..4a85748aeeea 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -108,21 +108,6 @@ static int is_sqp(enum ib_qp_type qp_type)
return is_qp0(qp_type) || is_qp1(qp_type);
}
-static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
-{
- return mlx5_buf_offset(&qp->buf, offset);
-}
-
-static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
-{
- return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
-}
-
-void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
-{
- return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
-}
-
/**
* mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
*
@@ -790,6 +775,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
__be64 *pas;
void *qpc;
int err;
+ u16 uid;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err) {
@@ -851,7 +837,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_umem;
}
- MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid);
+ uid = (attr->qp_type != IB_QPT_XRC_TGT) ? to_mpd(pd)->uid : 0;
+ MLX5_SET(create_qp_in, *in, uid, uid);
pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@ -917,6 +904,30 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
}
+/* get_sq_edge - Get the next nearby edge.
+ *
+ * An 'edge' is defined as the first following address after the end
+ * of the fragment or the SQ. Accordingly, during the WQE construction
+ * which repetitively increases the pointer to write the next data, it
+ * simply should check if it gets to an edge.
+ *
+ * @sq - SQ buffer.
+ * @idx - Stride index in the SQ buffer.
+ *
+ * Return:
+ * The new edge.
+ */
+static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
+{
+ void *fragment_end;
+
+ fragment_end = mlx5_frag_buf_get_wqe
+ (&sq->fbc,
+ mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx));
+
+ return fragment_end + MLX5_SEND_WQE_BB;
+}
+
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
@@ -955,13 +966,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
- err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
+ err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size,
+ &qp->buf, dev->mdev->priv.numa_node);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
- qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
+ if (qp->rq.wqe_cnt)
+ mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift,
+ ilog2(qp->rq.wqe_cnt), &qp->rq.fbc);
+
+ if (qp->sq.wqe_cnt) {
+ int sq_strides_offset = (qp->sq.offset & (PAGE_SIZE - 1)) /
+ MLX5_SEND_WQE_BB;
+ mlx5_init_fbc_offset(qp->buf.frags +
+ (qp->sq.offset / PAGE_SIZE),
+ ilog2(MLX5_SEND_WQE_BB),
+ ilog2(qp->sq.wqe_cnt),
+ sq_strides_offset, &qp->sq.fbc);
+
+ qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
+ }
+
*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
*in = kvzalloc(*inlen, GFP_KERNEL);
@@ -983,8 +1010,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->flags |= MLX5_IB_QP_SQPN_QP1;
}
- mlx5_fill_page_array(&qp->buf,
- (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
+ mlx5_fill_page_frag_array(&qp->buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in,
+ *in, pas));
err = mlx5_db_alloc(dev->mdev, &qp->db);
if (err) {
@@ -1024,7 +1052,7 @@ err_free:
kvfree(*in);
err_buf:
- mlx5_buf_free(dev->mdev, &qp->buf);
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
return err;
}
@@ -1036,7 +1064,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
kvfree(qp->sq.wr_data);
kvfree(qp->rq.wrid);
mlx5_db_free(dev->mdev, &qp->db);
- mlx5_buf_free(dev->mdev, &qp->buf);
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
}
static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
@@ -1889,7 +1917,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_QP_FLAG_BFREG_INDEX |
MLX5_QP_FLAG_TYPE_DCT |
MLX5_QP_FLAG_TYPE_DCI |
- MLX5_QP_FLAG_ALLOW_SCATTER_CQE))
+ MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
+ MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE))
return -EINVAL;
err = get_qp_user_index(to_mucontext(pd->uobject->context),
@@ -1925,6 +1954,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
}
+ if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) {
+ if (init_attr->qp_type != IB_QPT_RC ||
+ !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) {
+ mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+ qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT;
+ }
+
if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
if (init_attr->qp_type != IB_QPT_UD ||
(MLX5_CAP_GEN(dev->mdev, port_type) !=
@@ -2021,7 +2059,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, cd_slave_send, 1);
if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
MLX5_SET(qpc, qpc, cd_slave_receive, 1);
-
+ if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT)
+ MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
configure_responder_scat_cqe(init_attr, qpc);
configure_requester_scat_cqe(dev, init_attr,
@@ -2663,7 +2702,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
if (rate == IB_RATE_PORT_CURRENT)
return 0;
- if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS)
+ if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
return -EINVAL;
while (rate != IB_RATE_PORT_CURRENT &&
@@ -3475,6 +3514,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->sq.head = 0;
qp->sq.tail = 0;
qp->sq.cur_post = 0;
+ if (qp->sq.wqe_cnt)
+ qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
qp->sq.last_poll = 0;
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
@@ -3515,7 +3556,7 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new
return is_valid_mask(attr_mask, req, opt);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
req |= IB_QP_PATH_MTU;
- opt = IB_QP_PKEY_INDEX;
+ opt = IB_QP_PKEY_INDEX | IB_QP_AV;
return is_valid_mask(attr_mask, req, opt);
} else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
@@ -3749,6 +3790,62 @@ out:
return err;
}
+static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+ u32 wqe_sz, void **cur_edge)
+{
+ u32 idx;
+
+ idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
+ *cur_edge = get_sq_edge(sq, idx);
+
+ *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
+}
+
+/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
+ * next nearby edge and get new address translation for current WQE position.
+ * @sq - SQ buffer.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @cur_edge: Updated current edge.
+ */
+static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+ u32 wqe_sz, void **cur_edge)
+{
+ if (likely(*seg != *cur_edge))
+ return;
+
+ _handle_post_send_edge(sq, seg, wqe_sz, cur_edge);
+}
+
+/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
+ * pointers. At the end @seg is aligned to 16B regardless the copied size.
+ * @sq - SQ buffer.
+ * @cur_edge: Updated current edge.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @src: Pointer to copy from.
+ * @n: Number of bytes to copy.
+ */
+static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
+ void **seg, u32 *wqe_sz, const void *src,
+ size_t n)
+{
+ while (likely(n)) {
+ size_t leftlen = *cur_edge - *seg;
+ size_t copysz = min_t(size_t, leftlen, n);
+ size_t stride;
+
+ memcpy(*seg, src, copysz);
+
+ n -= copysz;
+ src += copysz;
+ stride = !n ? ALIGN(copysz, 16) : copysz;
+ *seg += stride;
+ *wqe_sz += stride >> 4;
+ handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
+ }
+}
+
static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
{
struct mlx5_ib_cq *cq;
@@ -3774,11 +3871,10 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
rseg->reserved = 0;
}
-static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
- const struct ib_send_wr *wr, void *qend,
- struct mlx5_ib_qp *qp, int *size)
+static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size, void **cur_edge)
{
- void *seg = eseg;
+ struct mlx5_wqe_eth_seg *eseg = *seg;
memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
@@ -3786,45 +3882,41 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
MLX5_ETH_WQE_L4_CSUM;
- seg += sizeof(struct mlx5_wqe_eth_seg);
- *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
-
if (wr->opcode == IB_WR_LSO) {
struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
- int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start);
- u64 left, leftlen, copysz;
+ size_t left, copysz;
void *pdata = ud_wr->header;
+ size_t stride;
left = ud_wr->hlen;
eseg->mss = cpu_to_be16(ud_wr->mss);
eseg->inline_hdr.sz = cpu_to_be16(left);
- /*
- * check if there is space till the end of queue, if yes,
- * copy all in one shot, otherwise copy till the end of queue,
- * rollback and than the copy the left
+ /* memcpy_send_wqe should get a 16B align address. Hence, we
+ * first copy up to the current edge and then, if needed,
+ * fall-through to memcpy_send_wqe.
*/
- leftlen = qend - (void *)eseg->inline_hdr.start;
- copysz = min_t(u64, leftlen, left);
-
- memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
-
- if (likely(copysz > size_of_inl_hdr_start)) {
- seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
- *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
- }
-
- if (unlikely(copysz < left)) { /* the last wqe in the queue */
- seg = mlx5_get_send_wqe(qp, 0);
+ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
+ left);
+ memcpy(eseg->inline_hdr.start, pdata, copysz);
+ stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
+ sizeof(eseg->inline_hdr.start) + copysz, 16);
+ *size += stride / 16;
+ *seg += stride;
+
+ if (copysz < left) {
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
left -= copysz;
pdata += copysz;
- memcpy(seg, pdata, left);
- seg += ALIGN(left, 16);
- *size += ALIGN(left, 16) / 16;
+ memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
+ left);
}
+
+ return;
}
- return seg;
+ *seg += sizeof(struct mlx5_wqe_eth_seg);
+ *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
}
static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
@@ -4083,24 +4175,6 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
}
-static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
- struct mlx5_ib_mr *mr, int mr_list_size)
-{
- void *qend = qp->sq.qend;
- void *addr = mr->descs;
- int copy;
-
- if (unlikely(seg + mr_list_size > qend)) {
- copy = qend - seg;
- memcpy(seg, addr, copy);
- addr += copy;
- mr_list_size -= copy;
- seg = mlx5_get_send_wqe(qp, 0);
- }
- memcpy(seg, addr, mr_list_size);
- seg += mr_list_size;
-}
-
static __be32 send_ieth(const struct ib_send_wr *wr)
{
switch (wr->opcode) {
@@ -4134,40 +4208,48 @@ static u8 wq_sig(void *wqe)
}
static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
- void *wqe, int *sz)
+ void **wqe, int *wqe_sz, void **cur_edge)
{
struct mlx5_wqe_inline_seg *seg;
- void *qend = qp->sq.qend;
- void *addr;
+ size_t offset;
int inl = 0;
- int copy;
- int len;
int i;
- seg = wqe;
- wqe += sizeof(*seg);
+ seg = *wqe;
+ *wqe += sizeof(*seg);
+ offset = sizeof(*seg);
+
for (i = 0; i < wr->num_sge; i++) {
- addr = (void *)(unsigned long)(wr->sg_list[i].addr);
- len = wr->sg_list[i].length;
+ size_t len = wr->sg_list[i].length;
+ void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+
inl += len;
if (unlikely(inl > qp->max_inline_data))
return -ENOMEM;
- if (unlikely(wqe + len > qend)) {
- copy = qend - wqe;
- memcpy(wqe, addr, copy);
- addr += copy;
- len -= copy;
- wqe = mlx5_get_send_wqe(qp, 0);
+ while (likely(len)) {
+ size_t leftlen;
+ size_t copysz;
+
+ handle_post_send_edge(&qp->sq, wqe,
+ *wqe_sz + (offset >> 4),
+ cur_edge);
+
+ leftlen = *cur_edge - *wqe;
+ copysz = min_t(size_t, leftlen, len);
+
+ memcpy(*wqe, addr, copysz);
+ len -= copysz;
+ addr += copysz;
+ *wqe += copysz;
+ offset += copysz;
}
- memcpy(wqe, addr, len);
- wqe += len;
}
seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
- *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
+ *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
return 0;
}
@@ -4280,7 +4362,8 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
}
static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
- struct mlx5_ib_qp *qp, void **seg, int *size)
+ struct mlx5_ib_qp *qp, void **seg,
+ int *size, void **cur_edge)
{
struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
struct ib_mr *sig_mr = wr->sig_mr;
@@ -4364,8 +4447,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
*seg += wqe_size;
*size += wqe_size / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
bsf = *seg;
ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
@@ -4374,8 +4456,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
*seg += sizeof(*bsf);
*size += sizeof(*bsf) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
return 0;
}
@@ -4413,7 +4494,8 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
- struct mlx5_ib_qp *qp, void **seg, int *size)
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
@@ -4445,16 +4527,14 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
set_sig_umr_segment(*seg, xlt_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- ret = set_sig_data_segment(wr, qp, seg, size);
+ ret = set_sig_data_segment(wr, qp, seg, size, cur_edge);
if (ret)
return ret;
@@ -4491,11 +4571,11 @@ static int set_psv_wr(struct ib_sig_domain *domain,
static int set_reg_wr(struct mlx5_ib_qp *qp,
const struct ib_reg_wr *wr,
- void **seg, int *size)
+ void **seg, int *size, void **cur_edge)
{
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- int mr_list_size = mr->ndescs * mr->desc_size;
+ size_t mr_list_size = mr->ndescs * mr->desc_size;
bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
@@ -4507,18 +4587,17 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
set_reg_umr_seg(*seg, mr, umr_inline);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
if (umr_inline) {
- set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size);
- *size += get_xlt_octo(mr_list_size);
+ memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
+ mr_list_size);
+ *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
} else {
set_reg_data_seg(*seg, mr, pd);
*seg += sizeof(struct mlx5_wqe_data_seg);
@@ -4527,32 +4606,31 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
return 0;
}
-static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
+static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
set_linv_umr_seg(*seg);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_linv_mkey_seg(*seg);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
}
-static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
+static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
{
__be32 *p = NULL;
- int tidx = idx;
+ u32 tidx = idx;
int i, j;
- pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
+ pr_debug("dump WQE index %u:\n", idx);
for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
if ((i & 0xf) == 0) {
- void *buf = mlx5_get_send_wqe(qp, tidx);
tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
- p = buf;
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx);
+ pr_debug("WQBB at %p:\n", (void *)p);
j = 0;
}
pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
@@ -4562,15 +4640,16 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
}
static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned *idx,
- int *size, int nreq, bool send_signaled, bool solicited)
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ const struct ib_send_wr *wr, unsigned int *idx,
+ int *size, void **cur_edge, int nreq,
+ bool send_signaled, bool solicited)
{
if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
return -ENOMEM;
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- *seg = mlx5_get_send_wqe(qp, *idx);
+ *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
*ctrl = *seg;
*(uint32_t *)(*seg + 8) = 0;
(*ctrl)->imm = send_ieth(wr);
@@ -4580,6 +4659,7 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
+ *cur_edge = qp->sq.cur_edge;
return 0;
}
@@ -4587,17 +4667,18 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
const struct ib_send_wr *wr, unsigned *idx,
- int *size, int nreq)
+ int *size, void **cur_edge, int nreq)
{
- return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq,
+ return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
wr->send_flags & IB_SEND_SIGNALED,
wr->send_flags & IB_SEND_SOLICITED);
}
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
- u8 size, unsigned idx, u64 wr_id,
- int nreq, u8 fence, u32 mlx5_opcode)
+ void *seg, u8 size, void *cur_edge,
+ unsigned int idx, u64 wr_id, int nreq, u8 fence,
+ u32 mlx5_opcode)
{
u8 opmod = 0;
@@ -4613,6 +4694,15 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
qp->sq.wqe_head[idx] = qp->sq.head + nreq;
qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
qp->sq.w_list[idx].next = qp->sq.cur_post;
+
+ /* We save the edge which was possibly updated during the WQE
+ * construction, into SQ's cache.
+ */
+ seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
+ qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
+ get_sq_edge(&qp->sq, qp->sq.cur_post &
+ (qp->sq.wqe_cnt - 1)) :
+ cur_edge;
}
static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
@@ -4623,11 +4713,10 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
- struct mlx5_wqe_data_seg *dpseg;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
+ void *cur_edge;
int uninitialized_var(size);
- void *qend;
unsigned long flags;
unsigned idx;
int err = 0;
@@ -4649,7 +4738,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
qp = to_mqp(ibqp);
bf = &qp->bf;
- qend = qp->sq.qend;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -4669,7 +4757,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
+ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
+ nreq);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4719,14 +4808,15 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
case IB_WR_LOCAL_INV:
qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
- set_linv_wr(qp, &seg, &size);
+ set_linv_wr(qp, &seg, &size, &cur_edge);
num_sge = 0;
break;
case IB_WR_REG_MR:
qp->sq.wr_data[idx] = IB_WR_REG_MR;
ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
- err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
+ err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
+ &cur_edge);
if (err) {
*bad_wr = wr;
goto out;
@@ -4739,21 +4829,24 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
mr = to_mmr(sig_handover_wr(wr)->sig_mr);
ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
- err = set_sig_umr_wr(wr, qp, &seg, &size);
+ err = set_sig_umr_wr(wr, qp, &seg, &size,
+ &cur_edge);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_UMR);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+ wr->wr_id, nreq, fence,
+ MLX5_OPCODE_UMR);
/*
* SET_PSV WQEs are not signaled and solicited
* on error
*/
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, nreq, false, true);
+ &size, &cur_edge, nreq, false,
+ true);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4770,10 +4863,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+ wr->wr_id, nreq, fence,
+ MLX5_OPCODE_SET_PSV);
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, nreq, false, true);
+ &size, &cur_edge, nreq, false,
+ true);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4790,8 +4885,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+ wr->wr_id, nreq, fence,
+ MLX5_OPCODE_SET_PSV);
qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
num_sge = 0;
goto skip_psv;
@@ -4828,16 +4924,14 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
+
break;
case IB_QPT_UD:
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
-
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
/* handle qp that supports ud offload */
if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
@@ -4847,11 +4941,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
seg += sizeof(struct mlx5_wqe_eth_pad);
size += sizeof(struct mlx5_wqe_eth_pad) / 16;
-
- seg = set_eth_seg(seg, wr, qend, qp, &size);
-
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ set_eth_seg(wr, qp, &seg, &size, &cur_edge);
+ handle_post_send_edge(&qp->sq, &seg, size,
+ &cur_edge);
}
break;
case MLX5_IB_QPT_REG_UMR:
@@ -4867,13 +4959,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
set_reg_mkey_segment(seg, wr);
seg += sizeof(struct mlx5_mkey_seg);
size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
break;
default:
@@ -4881,33 +4971,29 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
if (wr->send_flags & IB_SEND_INLINE && num_sge) {
- int uninitialized_var(sz);
-
- err = set_data_inl_seg(qp, wr, seg, &sz);
+ err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
if (unlikely(err)) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
- size += sz;
} else {
- dpseg = seg;
for (i = 0; i < num_sge; i++) {
- if (unlikely(dpseg == qend)) {
- seg = mlx5_get_send_wqe(qp, 0);
- dpseg = seg;
- }
+ handle_post_send_edge(&qp->sq, &seg, size,
+ &cur_edge);
if (likely(wr->sg_list[i].length)) {
- set_data_ptr_seg(dpseg, wr->sg_list + i);
+ set_data_ptr_seg
+ ((struct mlx5_wqe_data_seg *)seg,
+ wr->sg_list + i);
size += sizeof(struct mlx5_wqe_data_seg) / 16;
- dpseg++;
+ seg += sizeof(struct mlx5_wqe_data_seg);
}
}
}
qp->next_fence = next_fence;
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence,
- mlx5_ib_opcode[wr->opcode]);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
+ fence, mlx5_ib_opcode[wr->opcode]);
skip_psv:
if (0)
dump_wqe(qp, idx, size);
@@ -4993,7 +5079,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
goto out;
}
- scat = get_recv_wqe(qp, ind);
+ scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
if (qp->wq_sig)
scat++;
@@ -5441,7 +5527,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_xrcd *xrcd;
int err;
- u16 uid;
if (!MLX5_CAP_GEN(dev->mdev, xrc))
return ERR_PTR(-ENOSYS);
@@ -5450,14 +5535,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
if (!xrcd)
return ERR_PTR(-ENOMEM);
- uid = context ? to_mucontext(context)->devx_uid : 0;
- err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid);
+ err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
if (err) {
kfree(xrcd);
return ERR_PTR(-ENOMEM);
}
- xrcd->uid = uid;
return &xrcd->ibxrcd;
}
@@ -5465,10 +5548,9 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- u16 uid = to_mxrcd(xrcd)->uid;
int err;
- err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid);
+ err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
if (err)
mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index d012e7dbcc38..0413b10dea71 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -1,50 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/module.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
#include <linux/slab.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
-
#include "mlx5_ib.h"
-
-/* not supported currently */
-static int srq_signature;
+#include "srq.h"
static void *get_wqe(struct mlx5_ib_srq *srq, int n)
{
- return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
+ return mlx5_frag_buf_get_wqe(&srq->fbc, n);
}
static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
@@ -144,7 +113,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->page_offset = offset;
- in->uid = to_mpd(pd)->uid;
+ in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type != IB_SRQT_BASIC)
in->user_index = uidx;
@@ -173,12 +142,16 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
return err;
}
- if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) {
+ if (mlx5_frag_buf_alloc_node(dev->mdev, buf_size, &srq->buf,
+ dev->mdev->priv.numa_node)) {
mlx5_ib_dbg(dev, "buf alloc failed\n");
err = -ENOMEM;
goto err_db;
}
+ mlx5_init_fbc(srq->buf.frags, srq->msrq.wqe_shift, ilog2(srq->msrq.max),
+ &srq->fbc);
+
srq->head = 0;
srq->tail = srq->msrq.max - 1;
srq->wqe_ctr = 0;
@@ -195,14 +168,14 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
err = -ENOMEM;
goto err_buf;
}
- mlx5_fill_page_array(&srq->buf, in->pas);
+ mlx5_fill_page_frag_array(&srq->buf, in->pas);
srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
err = -ENOMEM;
goto err_in;
}
- srq->wq_sig = !!srq_signature;
+ srq->wq_sig = 0;
in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
@@ -215,7 +188,7 @@ err_in:
kvfree(in->pas);
err_buf:
- mlx5_buf_free(dev->mdev, &srq->buf);
+ mlx5_frag_buf_free(dev->mdev, &srq->buf);
err_db:
mlx5_db_free(dev->mdev, &srq->db);
@@ -232,7 +205,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
{
kvfree(srq->wrid);
- mlx5_buf_free(dev->mdev, &srq->buf);
+ mlx5_frag_buf_free(dev->mdev, &srq->buf);
mlx5_db_free(dev->mdev, &srq->db);
}
@@ -327,7 +300,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
in.pd = to_mpd(pd)->pdn;
in.db_record = srq->db.dma;
- err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
+ err = mlx5_cmd_create_srq(dev, &srq->msrq, &in);
kvfree(in.pas);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
@@ -351,7 +324,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
return &srq->ibsrq;
err_core:
- mlx5_core_destroy_srq(dev->mdev, &srq->msrq);
+ mlx5_cmd_destroy_srq(dev, &srq->msrq);
err_usr_kern_srq:
if (pd->uobject)
@@ -381,7 +354,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return -EINVAL;
mutex_lock(&srq->mutex);
- ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1);
+ ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1);
mutex_unlock(&srq->mutex);
if (ret)
@@ -402,7 +375,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
if (!out)
return -ENOMEM;
- ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out);
+ ret = mlx5_cmd_query_srq(dev, &srq->msrq, out);
if (ret)
goto out_box;
@@ -420,7 +393,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
struct mlx5_ib_dev *dev = to_mdev(srq->device);
struct mlx5_ib_srq *msrq = to_msrq(srq);
- mlx5_core_destroy_srq(dev->mdev, &msrq->msrq);
+ mlx5_cmd_destroy_srq(dev, &msrq->msrq);
if (srq->uobject) {
mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
new file mode 100644
index 000000000000..75eb5839ae95
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef MLX5_IB_SRQ_H
+#define MLX5_IB_SRQ_H
+
+enum {
+ MLX5_SRQ_FLAG_ERR = (1 << 0),
+ MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
+ MLX5_SRQ_FLAG_RNDV = (1 << 2),
+};
+
+struct mlx5_srq_attr {
+ u32 type;
+ u32 flags;
+ u32 log_size;
+ u32 wqe_shift;
+ u32 log_page_size;
+ u32 wqe_cnt;
+ u32 srqn;
+ u32 xrcd;
+ u32 page_offset;
+ u32 cqn;
+ u32 pd;
+ u32 lwm;
+ u32 user_index;
+ u64 db_record;
+ __be64 *pas;
+ u32 tm_log_list_size;
+ u32 tm_next_tag;
+ u32 tm_hw_phase_cnt;
+ u32 tm_sw_phase_cnt;
+ u16 uid;
+};
+
+struct mlx5_ib_dev;
+
+struct mlx5_core_srq {
+ struct mlx5_core_rsc_common common; /* must be first */
+ u32 srqn;
+ int max;
+ size_t max_gs;
+ size_t max_avail_gather;
+ int wqe_shift;
+ void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e);
+
+ atomic_t refcount;
+ struct completion free;
+ u16 uid;
+};
+
+struct mlx5_srq_table {
+ struct notifier_block nb;
+ /* protect radix tree
+ */
+ spinlock_t lock;
+ struct radix_tree_root tree;
+};
+
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in);
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out);
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq);
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn);
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev);
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev);
+#endif /* MLX5_IB_SRQ_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index 6a6fc9be01e6..7aaaffbd4afa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -1,67 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
-#include <linux/mlx5/srq.h>
-#include <rdma/ib_verbs.h>
-#include "mlx5_core.h"
-#include <linux/mlx5/transobj.h>
-
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *srq;
-
- spin_lock(&table->lock);
-
- srq = radix_tree_lookup(&table->tree, srqn);
- if (srq)
- atomic_inc(&srq->refcount);
-
- spin_unlock(&table->lock);
-
- if (!srq) {
- mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
- return;
- }
-
- srq->event(srq, event_type);
-
- if (atomic_dec_and_test(&srq->refcount))
- complete(&srq->free);
-}
+#include "mlx5_ib.h"
+#include "srq.h"
static int get_pas_size(struct mlx5_srq_attr *in)
{
@@ -132,9 +78,9 @@ static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
}
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
+ struct mlx5_srq_table *table = &dev->srq_table;
struct mlx5_core_srq *srq;
spin_lock(&table->lock);
@@ -147,9 +93,8 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
return srq;
}
-EXPORT_SYMBOL(mlx5_core_get_srq);
-static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
@@ -176,7 +121,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(create_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_SRQ);
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
sizeof(create_out));
kvfree(create_in);
if (!err) {
@@ -187,8 +132,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return err;
}
-static int destroy_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
+static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
@@ -198,11 +142,11 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
- return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, sizeof(srq_out));
+ return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+ sizeof(srq_out));
}
-static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq)
{
u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
@@ -214,11 +158,11 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
- return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, sizeof(srq_out));
+ return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+ sizeof(srq_out));
}
-static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
@@ -233,8 +177,8 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(query_srq_in, srq_in, opcode,
MLX5_CMD_OP_QUERY_SRQ);
MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
- err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
+ err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+ MLX5_ST_SZ_BYTES(query_srq_out));
if (err)
goto out;
@@ -247,7 +191,7 @@ out:
return err;
}
-static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
+static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
@@ -277,7 +221,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_CMD_OP_CREATE_XRC_SRQ);
memset(create_out, 0, sizeof(create_out));
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
sizeof(create_out));
if (err)
goto out;
@@ -289,7 +233,7 @@ out:
return err;
}
-static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
+static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq)
{
u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
@@ -300,12 +244,12 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
- return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+ return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
xrcsrq_out, sizeof(xrcsrq_out));
}
-static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq, u16 lwm)
+static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm)
{
u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
@@ -316,11 +260,11 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
- return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+ return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
xrcsrq_out, sizeof(xrcsrq_out));
}
-static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
+static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
@@ -338,8 +282,8 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_CMD_OP_QUERY_XRC_SRQ);
MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
- err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
- MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+ err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
+ xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out));
if (err)
goto out;
@@ -354,21 +298,27 @@ out:
return err;
}
-static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
- void *create_in;
+ void *create_out = NULL;
+ void *create_in = NULL;
void *rmpc;
void *wq;
int pas_size;
+ int outlen;
int inlen;
int err;
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
+ outlen = MLX5_ST_SZ_BYTES(create_rmp_out);
create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
+ create_out = kvzalloc(outlen, GFP_KERNEL);
+ if (!create_in || !create_out) {
+ err = -ENOMEM;
+ goto out;
+ }
rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
@@ -378,16 +328,20 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
set_wq(wq, in);
memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
- err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
- if (!err)
+ MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP);
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen);
+ if (!err) {
+ srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn);
srq->uid = in->uid;
+ }
+out:
kvfree(create_in);
+ kvfree(create_out);
return err;
}
-static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
+static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
@@ -395,22 +349,30 @@ static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
-static int arm_rmp_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
+static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
u16 lwm)
{
- void *in;
+ void *out = NULL;
+ void *in = NULL;
void *rmpc;
void *wq;
void *bitmask;
+ int outlen;
+ int inlen;
int err;
- in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ inlen = MLX5_ST_SZ_BYTES(modify_rmp_in);
+ outlen = MLX5_ST_SZ_BYTES(modify_rmp_out);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
@@ -422,25 +384,39 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
MLX5_SET(wq, wq, lwm, lwm);
MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
- err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
+ err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen);
+out:
kvfree(in);
+ kvfree(out);
return err;
}
-static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
- u32 *rmp_out;
+ u32 *rmp_out = NULL;
+ u32 *rmp_in = NULL;
void *rmpc;
+ int outlen;
+ int inlen;
int err;
- rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
- if (!rmp_out)
- return -ENOMEM;
+ outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
+ inlen = MLX5_ST_SZ_BYTES(query_rmp_in);
- err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
+ rmp_out = kvzalloc(outlen, GFP_KERNEL);
+ rmp_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!rmp_out || !rmp_in) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP);
+ MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn);
+ err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen);
if (err)
goto out;
@@ -451,10 +427,11 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
out:
kvfree(rmp_out);
+ kvfree(rmp_in);
return err;
}
-static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
@@ -489,7 +466,7 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(xrqc, xrqc, cqn, in->cqn);
MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
MLX5_SET(create_xrq_in, create_in, uid, in->uid);
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
sizeof(create_out));
kvfree(create_in);
if (!err) {
@@ -500,7 +477,7 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return err;
}
-static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
@@ -509,10 +486,10 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
-static int arm_xrq_cmd(struct mlx5_core_dev *dev,
+static int arm_xrq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq,
u16 lwm)
{
@@ -525,10 +502,10 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(arm_rq_in, in, lwm, lwm);
MLX5_SET(arm_rq_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
-static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
@@ -544,7 +521,7 @@ static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
- err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
+ err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen);
if (err)
goto out;
@@ -567,11 +544,10 @@ out:
return err;
}
-static int create_srq_split(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
+static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
- if (!dev->issi)
+ if (!dev->mdev->issi)
return create_srq_cmd(dev, srq, in);
switch (srq->common.res) {
case MLX5_RES_XSRQ:
@@ -583,10 +559,9 @@ static int create_srq_split(struct mlx5_core_dev *dev,
}
}
-static int destroy_srq_split(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
+static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
- if (!dev->issi)
+ if (!dev->mdev->issi)
return destroy_srq_cmd(dev, srq);
switch (srq->common.res) {
case MLX5_RES_XSRQ:
@@ -598,11 +573,11 @@ static int destroy_srq_split(struct mlx5_core_dev *dev,
}
}
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
{
+ struct mlx5_srq_table *table = &dev->srq_table;
int err;
- struct mlx5_srq_table *table = &dev->priv.srq_table;
switch (in->type) {
case IB_SRQT_XRC:
@@ -625,10 +600,8 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
spin_lock_irq(&table->lock);
err = radix_tree_insert(&table->tree, srq->srqn, srq);
spin_unlock_irq(&table->lock);
- if (err) {
- mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
+ if (err)
goto err_destroy_srq_split;
- }
return 0;
@@ -637,25 +610,18 @@ err_destroy_srq_split:
return err;
}
-EXPORT_SYMBOL(mlx5_core_create_srq);
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
+ struct mlx5_srq_table *table = &dev->srq_table;
struct mlx5_core_srq *tmp;
int err;
spin_lock_irq(&table->lock);
tmp = radix_tree_delete(&table->tree, srq->srqn);
spin_unlock_irq(&table->lock);
- if (!tmp) {
- mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
- return -EINVAL;
- }
- if (tmp != srq) {
- mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
+ if (!tmp || tmp != srq)
return -EINVAL;
- }
err = destroy_srq_split(dev, srq);
if (err)
@@ -667,12 +633,11 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
return 0;
}
-EXPORT_SYMBOL(mlx5_core_destroy_srq);
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
{
- if (!dev->issi)
+ if (!dev->mdev->issi)
return query_srq_cmd(dev, srq, out);
switch (srq->common.res) {
case MLX5_RES_XSRQ:
@@ -683,12 +648,11 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return query_rmp_cmd(dev, srq, out);
}
}
-EXPORT_SYMBOL(mlx5_core_query_srq);
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- u16 lwm, int is_srq)
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq)
{
- if (!dev->issi)
+ if (!dev->mdev->issi)
return arm_srq_cmd(dev, srq, lwm, is_srq);
switch (srq->common.res) {
case MLX5_RES_XSRQ:
@@ -699,18 +663,60 @@ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return arm_rmp_cmd(dev, srq, lwm);
}
}
-EXPORT_SYMBOL(mlx5_core_arm_srq);
-void mlx5_init_srq_table(struct mlx5_core_dev *dev)
+static int srq_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_srq_table *table;
+ struct mlx5_core_srq *srq;
+ struct mlx5_eqe *eqe;
+ u32 srqn;
+
+ if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR &&
+ type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)
+ return NOTIFY_DONE;
+
+ table = container_of(nb, struct mlx5_srq_table, nb);
+
+ eqe = data;
+ srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+
+ spin_lock(&table->lock);
+
+ srq = radix_tree_lookup(&table->tree, srqn);
+ if (srq)
+ atomic_inc(&srq->refcount);
+
+ spin_unlock(&table->lock);
+
+ if (!srq)
+ return NOTIFY_OK;
+
+ srq->event(srq, eqe->type);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+
+ return NOTIFY_OK;
+}
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev)
{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
+ struct mlx5_srq_table *table = &dev->srq_table;
memset(table, 0, sizeof(*table));
spin_lock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+
+ table->nb.notifier_call = srq_event_notifier;
+ mlx5_notifier_register(dev->mdev, &table->nb);
+
+ return 0;
}
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev)
{
- /* nothing */
+ struct mlx5_srq_table *table = &dev->srq_table;
+
+ mlx5_notifier_unregister(dev->mdev, &table->nb);
}
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index fb1ff59f40bd..cdbf707fa267 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3237,7 +3237,6 @@ static int init_6120_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
ret = ib_mtu_enum_to_int(qib_ibmtu);
dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_6120_tidtemplate(dd);
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 163a57a88742..9fde45538f6e 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4043,7 +4043,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
ret = ib_mtu_enum_to_int(qib_ibmtu);
dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_7220_tidtemplate(dd);
@@ -4252,7 +4251,6 @@ static int init_sdma_7220_regs(struct qib_pportdata *ppd)
unsigned word = i / 64;
unsigned bit = i & 63;
- BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index bf5e222eed8e..17d6b24b3473 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1382,7 +1382,6 @@ static void err_decode(char *msg, size_t len, u64 errs,
*msg++ = ',';
len--;
}
- BUG_ON(!msp->sz);
/* msp->sz counts the nul */
took = min_t(size_t, msp->sz - (size_t)1, len);
memcpy(msg, msp->msg, took);
@@ -6599,7 +6598,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
dd->rcvegrbufsize = max(mtu, 2048);
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_7322_tidtemplate(dd);
@@ -6904,7 +6902,6 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd)
unsigned word = erstbuf / BITS_PER_LONG;
unsigned bit = erstbuf & (BITS_PER_LONG - 1);
- BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]);
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index d7cdc77d6306..9fd69903ca57 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -209,7 +209,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt +
rcd->rcvegrbufs_perchunk - 1) /
rcd->rcvegrbufs_perchunk;
- BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk));
rcd->rcvegrbufs_perchunk_shift =
ilog2(rcd->rcvegrbufs_perchunk);
}
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 30595b358d8f..864f2af171f7 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -387,7 +387,7 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
static int qib_pcie_coalesce;
module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
-MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
+MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets");
/*
* Enable PCIe completion and data coalescing, on Intel 5x00 and 7300
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 757d4c9d713d..0b6ca424c11d 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -572,7 +572,6 @@ retry:
len = sge->length;
if (len > sge->sge_length)
len = sge->sge_length;
- BUG_ON(len == 0);
dw = (len + 3) >> 2;
addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr,
dw << 2, DMA_TO_DEVICE);
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 4d4c31ea4e2d..868da0ece7ba 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -178,7 +178,6 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
- BUG_ON(len == 0);
rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 926f3c8eba69..31c523b2a9f5 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -237,7 +237,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
sdma_rb_node);
- BUG_ON(ret == 0);
}
pq->sdma_rb_node = sdma_rb_node;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 4b0f5761a646..8914abdd7584 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -150,7 +150,6 @@ static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
len = length;
if (len > sge.sge_length)
len = sge.sge_length;
- BUG_ON(len == 0);
if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
(len != length && (len & (sizeof(u32) - 1)))) {
ndesc = 0;
@@ -193,7 +192,6 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
- BUG_ON(len == 0);
memcpy(data, sge->vaddr, len);
sge->vaddr += len;
sge->length -= len;
@@ -449,7 +447,6 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
len = length;
if (len > ss->sge.sge_length)
len = ss->sge.sge_length;
- BUG_ON(len == 0);
/* If the source address is not aligned, try to align it. */
off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
if (off) {
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 73bd00f8d2c8..413fa5732e2b 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -649,7 +649,7 @@ static int __init usnic_ib_init(void)
err = usnic_uiom_init(DRV_NAME);
if (err) {
- usnic_err("Unable to initalize umem with err %d\n", err);
+ usnic_err("Unable to initialize umem with err %d\n", err);
return err;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index bf5136533d49..0cdb156e165e 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -681,7 +681,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
res_spec);
if (err) {
- usnic_err("Spec does not meet miniumum req for transport %d\n",
+ usnic_err("Spec does not meet minimum req for transport %d\n",
transport);
log_spec(res_spec);
return ERR_PTR(err);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 1735deb1a9d4..a1bd8cfc2c25 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016, 2017 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -1094,6 +1094,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->ibqp.qp_num = err;
qp->port_num = init_attr->port_num;
rvt_init_qp(rdi, qp, init_attr->qp_type);
+ if (rdi->driver_f.qp_priv_init) {
+ err = rdi->driver_f.qp_priv_init(rdi, qp, init_attr);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_rq_wq;
+ }
+ }
break;
default:
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index d9ec2de68738..8f79bd86d033 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -65,8 +65,9 @@
*/
#define RXE_UVERBS_ABI_VERSION 2
-#define IB_PHYS_STATE_LINK_UP (5)
-#define IB_PHYS_STATE_LINK_DOWN (3)
+#define RDMA_LINK_PHYS_STATE_LINK_UP (5)
+#define RDMA_LINK_PHYS_STATE_DISABLED (3)
+#define RDMA_LINK_PHYS_STATE_POLLING (2)
#define RXE_ROCE_V2_SPORT (0xc000)
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index ea089cb091ad..e996da67a851 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -439,6 +439,7 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
*/
static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_cqe cqe;
if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
@@ -451,6 +452,11 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
advance_consumer(qp->sq.queue);
}
+ if (wqe->wr.opcode == IB_WR_SEND ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_IMM ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_INV)
+ rxe_counter_inc(rxe, RXE_CNT_RDMA_SEND);
+
/*
* we completed something so let req run again
* if it is trying to fence
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index 6aeb7a165e46..4a24895846d3 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -37,15 +37,18 @@ static const char * const rxe_counter_name[] = {
[RXE_CNT_SENT_PKTS] = "sent_pkts",
[RXE_CNT_RCVD_PKTS] = "rcvd_pkts",
[RXE_CNT_DUP_REQ] = "duplicate_request",
- [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_sequence",
+ [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request",
[RXE_CNT_RCV_RNR] = "rcvd_rnr_err",
[RXE_CNT_SND_RNR] = "send_rnr_err",
[RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err",
- [RXE_CNT_COMPLETER_SCHED] = "ack_deffered",
+ [RXE_CNT_COMPLETER_SCHED] = "ack_deferred",
[RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err",
[RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err",
[RXE_CNT_COMP_RETRY] = "completer_retry_err",
[RXE_CNT_SEND_ERR] = "send_err",
+ [RXE_CNT_LINK_DOWNED] = "link_downed",
+ [RXE_CNT_RDMA_SEND] = "rdma_sends",
+ [RXE_CNT_RDMA_RECV] = "rdma_recvs",
};
int rxe_ib_get_hw_stats(struct ib_device *ibdev,
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
index f44df1b76742..72c0d63c79e0 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -50,6 +50,9 @@ enum rxe_counters {
RXE_CNT_RNR_RETRY_EXCEEDED,
RXE_CNT_COMP_RETRY,
RXE_CNT_SEND_ERR,
+ RXE_CNT_LINK_DOWNED,
+ RXE_CNT_RDMA_SEND,
+ RXE_CNT_RDMA_RECV,
RXE_NUM_OF_COUNTERS
};
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index afd53f57a62b..a675c9f2b427 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -250,11 +250,12 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
}
-static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
- struct rxe_pkt_info *pkt, struct sk_buff *skb)
+static inline int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
{
int err;
int is_request = pkt->mask & RXE_REQ_MASK;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
if ((is_request && (qp->req.state != QP_STATE_READY)) ||
(!is_request && (qp->resp.state != QP_STATE_READY))) {
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 40e82e0f6c2d..b26a8141f3ed 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -607,7 +607,6 @@ void rxe_port_up(struct rxe_dev *rxe)
port = &rxe->port;
port->attr.state = IB_PORT_ACTIVE;
- port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
dev_info(&rxe->ib_dev.dev, "set active\n");
@@ -620,9 +619,9 @@ void rxe_port_down(struct rxe_dev *rxe)
port = &rxe->port;
port->attr.state = IB_PORT_DOWN;
- port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
rxe_port_event(rxe, IB_EVENT_PORT_ERR);
+ rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
dev_info(&rxe->ib_dev.dev, "set down\n");
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 36b53fb94a49..b5c91df22047 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -112,6 +112,18 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
return rxe_type_info[pool->type].cache;
}
+static void rxe_cache_clean(size_t cnt)
+{
+ int i;
+ struct rxe_type_info *type;
+
+ for (i = 0; i < cnt; i++) {
+ type = &rxe_type_info[i];
+ kmem_cache_destroy(type->cache);
+ type->cache = NULL;
+ }
+}
+
int rxe_cache_init(void)
{
int err;
@@ -136,24 +148,14 @@ int rxe_cache_init(void)
return 0;
err1:
- while (--i >= 0) {
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
+ rxe_cache_clean(i);
return err;
}
void rxe_cache_exit(void)
{
- int i;
- struct rxe_type_info *type;
-
- for (i = 0; i < RXE_NUM_TYPES; i++) {
- type = &rxe_type_info[i];
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
+ rxe_cache_clean(RXE_NUM_TYPES);
}
static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
@@ -241,7 +243,7 @@ static void rxe_pool_put(struct rxe_pool *pool)
kref_put(&pool->ref_cnt, rxe_pool_release);
}
-int rxe_pool_cleanup(struct rxe_pool *pool)
+void rxe_pool_cleanup(struct rxe_pool *pool)
{
unsigned long flags;
@@ -253,8 +255,6 @@ int rxe_pool_cleanup(struct rxe_pool *pool)
write_unlock_irqrestore(&pool->pool_lock, flags);
rxe_pool_put(pool);
-
- return 0;
}
static u32 alloc_index(struct rxe_pool *pool)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index aa4ba307097b..72968c29e01f 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -126,7 +126,7 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
enum rxe_elem_type type, u32 max_elem);
/* free resources from object pool */
-int rxe_pool_cleanup(struct rxe_pool *pool);
+void rxe_pool_cleanup(struct rxe_pool *pool);
/* allocate an object from pool */
void *rxe_alloc(struct rxe_pool *pool);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b9710907dac2..2ca4ffe5015f 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -97,7 +97,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
goto err1;
if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
- if (port_num != 1) {
+ if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
pr_warn("invalid port = %d\n", port_num);
goto err1;
}
@@ -433,7 +433,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
}
if (mask & IB_QP_PORT) {
- if (attr->port_num != 1) {
+ if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
pr_warn("invalid port %d\n", attr->port_num);
goto err1;
}
@@ -448,7 +448,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_ALT_PATH) {
if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
goto err1;
- if (attr->alt_port_num != 1) {
+ if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) {
pr_warn("invalid alt port %d\n", attr->alt_port_num);
goto err1;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 6c361d70d7cd..c5d9b558fa90 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -643,6 +643,7 @@ next_wqe:
rmr->access = wqe->wr.wr.reg.access;
rmr->lkey = wqe->wr.wr.reg.key;
rmr->rkey = wqe->wr.wr.reg.key;
+ rmr->iova = wqe->wr.wr.reg.mr->iova;
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
} else {
@@ -728,7 +729,7 @@ next_wqe:
save_state(wqe, qp, &rollback_wqe, &rollback_psn);
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
- ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
+ ret = rxe_xmit_packet(qp, &pkt, skb);
if (ret) {
qp->need_req_skb = 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c962160292f4..231528188250 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -124,12 +124,9 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
struct sk_buff *skb;
if (qp->resp.state == QP_STATE_ERROR) {
- skb = skb_dequeue(&qp->req_pkts);
- if (skb) {
- /* drain request packet queue */
+ while ((skb = skb_dequeue(&qp->req_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
- return RESPST_GET_REQ;
}
/* go drain recv wr queue */
@@ -660,7 +657,6 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
static enum resp_states read_reply(struct rxe_qp *qp,
struct rxe_pkt_info *req_pkt)
{
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
int mtu = qp->mtu;
@@ -739,7 +735,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
*p = ~icrc;
- err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err) {
pr_err("Failed sending RDMA reply.\n");
return RESPST_ERR_RNR;
@@ -838,18 +834,25 @@ static enum resp_states do_complete(struct rxe_qp *qp,
struct ib_wc *wc = &cqe.ibwc;
struct ib_uverbs_wc *uwc = &cqe.uibwc;
struct rxe_recv_wqe *wqe = qp->resp.wqe;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
if (unlikely(!wqe))
return RESPST_CLEANUP;
memset(&cqe, 0, sizeof(cqe));
- wc->wr_id = wqe->wr_id;
- wc->status = qp->resp.status;
- wc->qp = &qp->ibqp;
+ if (qp->rcq->is_user) {
+ uwc->status = qp->resp.status;
+ uwc->qp_num = qp->ibqp.qp_num;
+ uwc->wr_id = wqe->wr_id;
+ } else {
+ wc->status = qp->resp.status;
+ wc->qp = &qp->ibqp;
+ wc->wr_id = wqe->wr_id;
+ }
- /* fields after status are not required for errors */
if (wc->status == IB_WC_SUCCESS) {
+ rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
pkt->mask & RXE_WRITE_MASK) ?
IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
@@ -898,7 +901,6 @@ static enum resp_states do_complete(struct rxe_qp *qp,
}
if (pkt->mask & RXE_IETH_MASK) {
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_mem *rmr;
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
@@ -950,7 +952,6 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int err = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
0, psn, syndrome, NULL);
@@ -959,7 +960,7 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
goto err1;
}
- err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err)
pr_err_ratelimited("Failed sending ack\n");
@@ -973,7 +974,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int rc = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct resp_res *res;
skb = prepare_ack_packet(qp, pkt, &ack_pkt,
@@ -1001,7 +1001,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
res->last_psn = ack_pkt.psn;
res->cur_psn = ack_pkt.psn;
- rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ rc = rxe_xmit_packet(qp, &ack_pkt, skb);
if (rc) {
pr_err_ratelimited("Failed sending ack\n");
rxe_drop_ref(qp);
@@ -1131,8 +1131,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
if (res) {
skb_get(res->atomic.skb);
/* Resend the result. */
- rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
- pkt, res->atomic.skb);
+ rc = rxe_xmit_packet(qp, pkt, res->atomic.skb);
if (rc) {
pr_err("Failed resending result. This flow is not handled - skb ignored\n");
rc = RESPST_CLEANUP;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9c19f2027511..30817c79ba96 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -71,6 +71,14 @@ static int rxe_query_port(struct ib_device *dev,
mutex_lock(&rxe->usdev_lock);
rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
&attr->active_width);
+
+ if (attr->state == IB_PORT_ACTIVE)
+ attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP;
+ else if (dev_get_flags(rxe->ndev) & IFF_UP)
+ attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING;
+ else
+ attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED;
+
mutex_unlock(&rxe->usdev_lock);
out:
@@ -1279,11 +1287,9 @@ err1:
return err;
}
-int rxe_unregister_device(struct rxe_dev *rxe)
+void rxe_unregister_device(struct rxe_dev *rxe)
{
struct ib_device *dev = &rxe->ib_dev;
ib_unregister_device(dev);
-
- return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 82e670d6eeea..831381b7788d 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -467,7 +467,7 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
}
int rxe_register_device(struct rxe_dev *rxe);
-int rxe_unregister_device(struct rxe_dev *rxe);
+void rxe_unregister_device(struct rxe_dev *rxe);
void rxe_mc_cleanup(struct rxe_pool_entry *arg);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 009be8889d71..dbe97c02848c 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -277,16 +277,13 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
- int ret;
if (!reg->mem_h)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
- ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
- if (ret)
- iser_err("ib_fmr_pool_unmap failed %d\n", ret);
+ ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
reg->mem_h = NULL;
}
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 2357aa727dcf..adc0e91d2bb5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -3617,7 +3617,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
const char *name)
{
struct srpt_port *sport = wwn->priv;
- static struct se_portal_group *tpg;
+ struct se_portal_group *tpg;
int res;
WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d324a3884462..d499b3d00348 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -12,9 +12,9 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
# mlx5 core basic
#
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
- health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
+ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
- fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
+ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
diag/fs_tracepoint.o diag/fw_tracer.o
#
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a5a0823e5ada..8ab636d59edb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -40,9 +40,11 @@
#include <linux/random.h>
#include <linux/io-mapping.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
#include <linux/debugfs.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
CMD_IF_REV = 5,
@@ -313,6 +315,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_FPGA_DESTROY_QP:
case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
case MLX5_CMD_OP_DEALLOC_MEMIC:
+ case MLX5_CMD_OP_PAGE_FAULT_RESUME:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -326,7 +329,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_MKEY:
case MLX5_CMD_OP_QUERY_MKEY:
case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
- case MLX5_CMD_OP_PAGE_FAULT_RESUME:
case MLX5_CMD_OP_CREATE_EQ:
case MLX5_CMD_OP_QUERY_EQ:
case MLX5_CMD_OP_GEN_EQE:
@@ -805,6 +807,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
return MLX5_GET(mbox_in, in->first.data, opcode);
}
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+
static void cb_timeout_handler(struct work_struct *work)
{
struct delayed_work *dwork = container_of(work, struct delayed_work,
@@ -1412,14 +1416,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
up(&cmd->sem);
}
+static int cmd_comp_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_core_dev *dev;
+ struct mlx5_cmd *cmd;
+ struct mlx5_eqe *eqe;
+
+ cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb);
+ dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ eqe = data;
+
+ mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+
+ return NOTIFY_OK;
+}
void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
{
+ MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD);
+ mlx5_eq_notifier_register(dev, &dev->cmd.nb);
mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
}
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
{
mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+ mlx5_eq_notifier_unregister(dev, &dev->cmd.nb);
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1435,7 +1457,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
}
}
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
{
struct mlx5_cmd *cmd = &dev->cmd;
struct mlx5_cmd_work_ent *ent;
@@ -1533,7 +1555,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
}
}
}
-EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+{
+ unsigned long flags;
+ u64 vector;
+
+ /* wait for pending handlers to complete */
+ mlx5_eq_synchronize_cmd_irq(dev);
+ spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+ vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+ if (!vector)
+ goto no_trig;
+
+ vector |= MLX5_TRIGGERED_CMD_COMP;
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+ mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+ mlx5_cmd_comp_handler(dev, vector, true);
+ return;
+
+no_trig:
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
static int status_to_err(u8 status)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 4b85abb5c9f7..713a17ee3751 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -38,6 +38,7 @@
#include <rdma/ib_verbs.h>
#include <linux/mlx5/cq.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
@@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
- struct mlx5_eq *eq;
+ struct mlx5_eq_comp *eq;
int err;
- eq = mlx5_eqn2eq(dev, eqn);
+ eq = mlx5_eqn2comp_eq(dev, eqn);
if (IS_ERR(eq))
return PTR_ERR(eq);
@@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
INIT_LIST_HEAD(&cq->tasklet_ctx.list);
/* Add to comp EQ CQ tree to recv comp events */
- err = mlx5_eq_add_cq(eq, cq);
+ err = mlx5_eq_add_cq(&eq->core, cq);
if (err)
goto err_cmd;
/* Add to async EQ CQ tree to recv async events */
- err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+ err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq);
if (err)
goto err_cq_add;
@@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
return 0;
err_cq_add:
- mlx5_eq_del_cq(eq, cq);
+ mlx5_eq_del_cq(&eq->core, cq);
err_cmd:
memset(din, 0, sizeof(din));
memset(dout, 0, sizeof(dout));
@@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
int err;
- err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+ err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
if (err)
return err;
- err = mlx5_eq_del_cq(cq->eq, cq);
+ err = mlx5_eq_del_cq(&cq->eq->core, cq);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 90fabd612b6c..a11e22d0b0cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
QP_PID,
@@ -349,6 +350,16 @@ out:
return param;
}
+static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
+
+ MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+ MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
int index)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 37ba7c78859d..d2ed14bc37c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -45,75 +45,11 @@ struct mlx5_device_context {
unsigned long state;
};
-struct mlx5_delayed_event {
- struct list_head list;
- struct mlx5_core_dev *dev;
- enum mlx5_dev_event event;
- unsigned long param;
-};
-
enum {
MLX5_INTERFACE_ADDED,
MLX5_INTERFACE_ATTACHED,
};
-static void add_delayed_event(struct mlx5_priv *priv,
- struct mlx5_core_dev *dev,
- enum mlx5_dev_event event,
- unsigned long param)
-{
- struct mlx5_delayed_event *delayed_event;
-
- delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
- if (!delayed_event) {
- mlx5_core_err(dev, "event %d is missed\n", event);
- return;
- }
-
- mlx5_core_dbg(dev, "Accumulating event %d\n", event);
- delayed_event->dev = dev;
- delayed_event->event = event;
- delayed_event->param = param;
- list_add_tail(&delayed_event->list, &priv->waiting_events_list);
-}
-
-static void delayed_event_release(struct mlx5_device_context *dev_ctx,
- struct mlx5_priv *priv)
-{
- struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
- struct mlx5_delayed_event *de;
- struct mlx5_delayed_event *n;
- struct list_head temp;
-
- INIT_LIST_HEAD(&temp);
-
- spin_lock_irq(&priv->ctx_lock);
-
- priv->is_accum_events = false;
- list_splice_init(&priv->waiting_events_list, &temp);
- if (!dev_ctx->context)
- goto out;
- list_for_each_entry_safe(de, n, &temp, list)
- dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
-
-out:
- spin_unlock_irq(&priv->ctx_lock);
-
- list_for_each_entry_safe(de, n, &temp, list) {
- list_del(&de->list);
- kfree(de);
- }
-}
-
-/* accumulating events that can come after mlx5_ib calls to
- * ib_register_device, till adding that interface to the events list.
- */
-static void delayed_event_start(struct mlx5_priv *priv)
-{
- spin_lock_irq(&priv->ctx_lock);
- priv->is_accum_events = true;
- spin_unlock_irq(&priv->ctx_lock);
-}
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
@@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
dev_ctx->intf = intf;
- delayed_event_start(priv);
-
dev_ctx->context = intf->add(dev);
if (dev_ctx->context) {
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
@@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (dev_ctx->intf->pfault) {
- if (priv->pfault) {
- mlx5_core_err(dev, "multiple page fault handlers not supported");
- } else {
- priv->pfault_ctx = dev_ctx->context;
- priv->pfault = dev_ctx->intf->pfault;
- }
- }
-#endif
spin_unlock_irq(&priv->ctx_lock);
}
- delayed_event_release(dev_ctx, priv);
-
if (!dev_ctx->context)
kfree(dev_ctx);
}
@@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
if (!dev_ctx)
return;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- spin_lock_irq(&priv->ctx_lock);
- if (priv->pfault == dev_ctx->intf->pfault)
- priv->pfault = NULL;
- spin_unlock_irq(&priv->ctx_lock);
-
- synchronize_srcu(&priv->pfault_srcu);
-#endif
-
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
@@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
if (!dev_ctx)
return;
- delayed_event_start(priv);
if (intf->attach) {
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
- goto out;
+ return;
if (intf->attach(dev, dev_ctx->context))
- goto out;
-
+ return;
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
- goto out;
+ return;
dev_ctx->context = intf->add(dev);
if (!dev_ctx->context)
- goto out;
-
+ return;
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
-
-out:
- delayed_event_release(dev_ctx, priv);
}
void mlx5_attach_device(struct mlx5_core_dev *dev)
@@ -422,44 +328,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
return res;
}
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
- unsigned long param)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_device_context *dev_ctx;
- unsigned long flags;
-
- spin_lock_irqsave(&priv->ctx_lock, flags);
-
- if (priv->is_accum_events)
- add_delayed_event(priv, dev, event, param);
-
- /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
- * still in priv->ctx_list. In this case, only notify the dev_ctx if its
- * ADDED or ATTACHED bit are set.
- */
- list_for_each_entry(dev_ctx, &priv->ctx_list, list)
- if (dev_ctx->intf->event &&
- (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
- test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
- dev_ctx->intf->event(dev, dev_ctx->context, event, param);
-
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
-}
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
- struct mlx5_pagefault *pfault)
-{
- struct mlx5_priv *priv = &dev->priv;
- int srcu_idx;
-
- srcu_idx = srcu_read_lock(&priv->pfault_srcu);
- if (priv->pfault)
- priv->pfault(dev, priv->pfault_ctx, pfault);
- srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
-}
-#endif
void mlx5_dev_list_lock(void)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index d4ec93bde4de..6999f4486e9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
#define CREATE_TRACE_POINTS
+#include "lib/eq.h"
#include "fw_tracer.h"
#include "fw_tracer_tracepoint.h"
@@ -846,9 +847,9 @@ free_tracer:
return ERR_PTR(err);
}
-/* Create HW resources + start tracer
- * must be called before Async EQ is created
- */
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
+
+/* Create HW resources + start tracer */
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
{
struct mlx5_core_dev *dev;
@@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
goto err_dealloc_pd;
}
+ MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
+ mlx5_eq_notifier_register(dev, &tracer->nb);
+
mlx5_fw_tracer_start(tracer);
return 0;
@@ -883,9 +887,7 @@ err_dealloc_pd:
return err;
}
-/* Stop tracer + Cleanup HW resources
- * must be called after Async EQ is destroyed
- */
+/* Stop tracer + Cleanup HW resources */
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
{
if (IS_ERR_OR_NULL(tracer))
@@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
tracer->owner);
-
+ mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
cancel_work_sync(&tracer->ownership_change_work);
cancel_work_sync(&tracer->handle_traces_work);
@@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
kfree(tracer);
}
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
{
- struct mlx5_fw_tracer *tracer = dev->tracer;
-
- if (!tracer)
- return;
+ struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct mlx5_eqe *eqe = data;
switch (eqe->sub_type) {
case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
@@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
eqe->sub_type);
}
+
+ return NOTIFY_OK;
}
EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 0347f2dd5cee..a8b8747f2b61 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -55,6 +55,7 @@
struct mlx5_fw_tracer {
struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
bool owner;
u8 trc_ver;
struct workqueue_struct *work_queue;
@@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 118324802926..7e0a0cf041d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -178,8 +178,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{
return is_kdump_kernel() ?
MLX5E_MIN_NUM_CHANNELS :
- min_t(int, mdev->priv.eq_table.num_comp_vectors,
- MLX5E_MAX_NUM_CHANNELS);
+ min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
}
/* Use this function to get max num channels after netdev was created */
@@ -633,7 +632,6 @@ struct mlx5e_channel_stats {
} ____cacheline_aligned_in_smp;
enum {
- MLX5E_STATE_ASYNC_EVENTS_ENABLED,
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
};
@@ -692,6 +690,8 @@ struct mlx5e_priv {
struct hwtstamp_config tstamp;
u16 q_counter;
u16 drop_rq_q_counter;
+ struct notifier_block events_nb;
+
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx dcbx;
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 871313d6b34d..138c7679eebb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -49,6 +49,7 @@
#include "lib/clock.h"
#include "en/port.h"
#include "en/xdp.h"
+#include "lib/eq.h"
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
@@ -293,33 +294,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->update_stats_work);
}
-static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
- enum mlx5_dev_event event, unsigned long param)
+static int async_event(struct notifier_block *nb, unsigned long event, void *data)
{
- struct mlx5e_priv *priv = vpriv;
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+ struct mlx5_eqe *eqe = data;
- if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
- return;
+ if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+ return NOTIFY_DONE;
- switch (event) {
- case MLX5_DEV_EVENT_PORT_UP:
- case MLX5_DEV_EVENT_PORT_DOWN:
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
queue_work(priv->wq, &priv->update_carrier_work);
break;
default:
- break;
+ return NOTIFY_DONE;
}
+
+ return NOTIFY_OK;
}
static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
{
- set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+ priv->events_nb.notifier_call = async_event;
+ mlx5_notifier_register(priv->mdev, &priv->events_nb);
}
static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
{
- clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
- synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
+ mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
}
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
@@ -1763,11 +1766,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
mlx5e_free_cq(cq);
}
-static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
-{
- return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
-}
-
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
@@ -1918,9 +1916,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel_param *cparam,
struct mlx5e_channel **cp)
{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
struct net_dim_cq_moder icocq_moder = {0, 0};
struct net_device *netdev = priv->netdev;
- int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c;
unsigned int irq;
int err;
@@ -4134,17 +4132,17 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
struct mlx5e_txqsq *sq)
{
- struct mlx5_eq *eq = sq->cq.mcq.eq;
+ struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
u32 eqe_count;
netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
- eq->eqn, eq->cons_index, eq->irqn);
+ eq->core.eqn, eq->core.cons_index, eq->core.irqn);
eqe_count = mlx5_eq_poll_irq_disabled(eq);
if (!eqe_count)
return false;
- netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
+ netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
sq->channel->stats->eq_rearm++;
return true;
}
@@ -4975,7 +4973,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
netif_carrier_off(netdev);
#ifdef CONFIG_MLX5_EN_ARFS
- netdev->rx_cpu_rmap = mdev->rmap;
+ netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev);
#endif
return 0;
@@ -5199,7 +5197,6 @@ static struct mlx5_interface mlx5e_interface = {
.remove = mlx5e_remove,
.attach = mlx5e_attach,
.detach = mlx5e_detach,
- .event = mlx5e_async_event,
.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
.get_dev = mlx5e_get_netdev,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 3e99d0728b2f..0b7f3ebb7b9d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include "lib/mlx5.h"
#include "en.h"
#include "en_accel/ipsec.h"
#include "en_accel/tls.h"
@@ -1122,15 +1123,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
int idx)
{
- struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+ struct mlx5_pme_stats pme_stats;
int i;
+ mlx5_get_pme_stats(priv->mdev, &pme_stats);
+
for (i = 0; i < NUM_PME_STATUS_STATS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters,
mlx5e_pme_status_desc, i);
for (i = 0; i < NUM_PME_ERR_STATS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters,
mlx5e_pme_error_desc, i);
return idx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c1e1a16a9b07..4aa39a1fe23f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -31,20 +31,22 @@
*/
#include <linux/interrupt.h>
+#include <linux/notifier.h>
#include <linux/module.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
#include <linux/mlx5/cmd.h>
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "fpga/core.h"
#include "eswitch.h"
#include "lib/clock.h"
#include "diag/fw_tracer.h"
enum {
- MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
MLX5_EQE_OWNER_INIT_VAL = 0x1,
};
@@ -55,14 +57,32 @@ enum {
};
enum {
- MLX5_NUM_SPARE_EQE = 0x80,
- MLX5_NUM_ASYNC_EQE = 0x1000,
- MLX5_NUM_CMD_EQE = 32,
- MLX5_NUM_PF_DRAIN = 64,
+ MLX5_EQ_DOORBEL_OFFSET = 0x40,
};
-enum {
- MLX5_EQ_DOORBEL_OFFSET = 0x40,
+struct mlx5_irq_info {
+ cpumask_var_t mask;
+ char name[MLX5_MAX_IRQ_NAME];
+ void *context; /* dev_id provided to request_irq */
+};
+
+struct mlx5_eq_table {
+ struct list_head comp_eqs_list;
+ struct mlx5_eq pages_eq;
+ struct mlx5_eq cmd_eq;
+ struct mlx5_eq async_eq;
+
+ struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
+
+ /* Since CQ DB is stored in async_eq */
+ struct mlx5_nb cq_err_nb;
+
+ struct mutex lock; /* sync async eqs creations */
+ int num_comp_vectors;
+ struct mlx5_irq_info *irq_info;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -78,17 +98,6 @@ enum {
(1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
(1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
-struct map_eq_in {
- u64 mask;
- u32 reserved;
- u32 unmap_eqn;
-};
-
-struct cre_des_eq {
- u8 reserved[15];
- u8 eqn;
-};
-
static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
{
u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
@@ -99,213 +108,56 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
-{
- return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
-}
-
-static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
-{
- struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
-
- return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
-}
-
-static const char *eqe_type_str(u8 type)
-{
- switch (type) {
- case MLX5_EVENT_TYPE_COMP:
- return "MLX5_EVENT_TYPE_COMP";
- case MLX5_EVENT_TYPE_PATH_MIG:
- return "MLX5_EVENT_TYPE_PATH_MIG";
- case MLX5_EVENT_TYPE_COMM_EST:
- return "MLX5_EVENT_TYPE_COMM_EST";
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- return "MLX5_EVENT_TYPE_SQ_DRAINED";
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
- return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
- case MLX5_EVENT_TYPE_CQ_ERROR:
- return "MLX5_EVENT_TYPE_CQ_ERROR";
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
- return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
- case MLX5_EVENT_TYPE_INTERNAL_ERROR:
- return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
- case MLX5_EVENT_TYPE_PORT_CHANGE:
- return "MLX5_EVENT_TYPE_PORT_CHANGE";
- case MLX5_EVENT_TYPE_GPIO_EVENT:
- return "MLX5_EVENT_TYPE_GPIO_EVENT";
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
- return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
- return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
- case MLX5_EVENT_TYPE_REMOTE_CONFIG:
- return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
- case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
- return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
- case MLX5_EVENT_TYPE_STALL_EVENT:
- return "MLX5_EVENT_TYPE_STALL_EVENT";
- case MLX5_EVENT_TYPE_CMD:
- return "MLX5_EVENT_TYPE_CMD";
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
- return "MLX5_EVENT_TYPE_PAGE_REQUEST";
- case MLX5_EVENT_TYPE_PAGE_FAULT:
- return "MLX5_EVENT_TYPE_PAGE_FAULT";
- case MLX5_EVENT_TYPE_PPS_EVENT:
- return "MLX5_EVENT_TYPE_PPS_EVENT";
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
- return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
- case MLX5_EVENT_TYPE_FPGA_ERROR:
- return "MLX5_EVENT_TYPE_FPGA_ERROR";
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
- return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
- return "MLX5_EVENT_TYPE_GENERAL_EVENT";
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
- return "MLX5_EVENT_TYPE_DEVICE_TRACER";
- default:
- return "Unrecognized event";
- }
-}
-
-static enum mlx5_dev_event port_subtype_event(u8 subtype)
-{
- switch (subtype) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- return MLX5_DEV_EVENT_PORT_DOWN;
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- return MLX5_DEV_EVENT_PORT_UP;
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
- return MLX5_DEV_EVENT_PORT_INITIALIZED;
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
- return MLX5_DEV_EVENT_LID_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
- return MLX5_DEV_EVENT_PKEY_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
- return MLX5_DEV_EVENT_GUID_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
- return MLX5_DEV_EVENT_CLIENT_REREG;
- }
- return -1;
-}
-
-static void eq_update_ci(struct mlx5_eq *eq, int arm)
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
{
- __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
- u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
-
- __raw_writel((__force u32)cpu_to_be32(val), addr);
- /* We still want ordering, just not swabbing, so add a barrier */
- mb();
-}
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *cq = NULL;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static void eqe_pf_action(struct work_struct *work)
-{
- struct mlx5_pagefault *pfault = container_of(work,
- struct mlx5_pagefault,
- work);
- struct mlx5_eq *eq = pfault->eq;
+ spin_lock(&table->lock);
+ cq = radix_tree_lookup(&table->tree, cqn);
+ if (likely(cq))
+ mlx5_cq_hold(cq);
+ spin_unlock(&table->lock);
- mlx5_core_page_fault(eq->dev, pfault);
- mempool_free(pfault, eq->pf_ctx.pool);
+ return cq;
}
-static void eq_pf_process(struct mlx5_eq *eq)
+static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
{
- struct mlx5_core_dev *dev = eq->dev;
- struct mlx5_eqe_page_fault *pf_eqe;
- struct mlx5_pagefault *pfault;
+ struct mlx5_eq_comp *eq_comp = eq_ptr;
+ struct mlx5_eq *eq = eq_ptr;
struct mlx5_eqe *eqe;
int set_ci = 0;
+ u32 cqn = -1;
while ((eqe = next_eqe_sw(eq))) {
- pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
- if (!pfault) {
- schedule_work(&eq->pf_ctx.work);
- break;
- }
-
+ struct mlx5_core_cq *cq;
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
dma_rmb();
- pf_eqe = &eqe->data.page_fault;
- pfault->event_subtype = eqe->sub_type;
- pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
-
- mlx5_core_dbg(dev,
- "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
- eqe->sub_type, pfault->bytes_committed);
-
- switch (eqe->sub_type) {
- case MLX5_PFAULT_SUBTYPE_RDMA:
- /* RDMA based event */
- pfault->type =
- be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
- pfault->token =
- be32_to_cpu(pf_eqe->rdma.pftype_token) &
- MLX5_24BIT_MASK;
- pfault->rdma.r_key =
- be32_to_cpu(pf_eqe->rdma.r_key);
- pfault->rdma.packet_size =
- be16_to_cpu(pf_eqe->rdma.packet_length);
- pfault->rdma.rdma_op_len =
- be32_to_cpu(pf_eqe->rdma.rdma_op_len);
- pfault->rdma.rdma_va =
- be64_to_cpu(pf_eqe->rdma.rdma_va);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
- pfault->type, pfault->token,
- pfault->rdma.r_key);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
- pfault->rdma.rdma_op_len,
- pfault->rdma.rdma_va);
- break;
-
- case MLX5_PFAULT_SUBTYPE_WQE:
- /* WQE based event */
- pfault->type =
- (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
- pfault->token =
- be32_to_cpu(pf_eqe->wqe.token);
- pfault->wqe.wq_num =
- be32_to_cpu(pf_eqe->wqe.pftype_wq) &
- MLX5_24BIT_MASK;
- pfault->wqe.wqe_index =
- be16_to_cpu(pf_eqe->wqe.wqe_index);
- pfault->wqe.packet_size =
- be16_to_cpu(pf_eqe->wqe.packet_length);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
- pfault->type, pfault->token,
- pfault->wqe.wq_num,
- pfault->wqe.wqe_index);
- break;
-
- default:
- mlx5_core_warn(dev,
- "Unsupported page fault event sub-type: 0x%02hhx\n",
- eqe->sub_type);
- /* Unsupported page faults should still be
- * resolved by the page fault handler
- */
+ /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
+ cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (likely(cq)) {
+ ++cq->arm_sn;
+ cq->comp(cq);
+ mlx5_cq_put(cq);
+ } else {
+ mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
}
- pfault->eq = eq;
- INIT_WORK(&pfault->work, eqe_pf_action);
- queue_work(eq->pf_ctx.wq, &pfault->work);
-
++eq->cons_index;
++set_ci;
+ /* The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create our EQs with MLX5_NUM_SPARE_EQE extra
+ * entries, so we must update our consumer index at
+ * least that often.
+ */
if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
eq_update_ci(eq, 0);
set_ci = 0;
@@ -313,165 +165,41 @@ static void eq_pf_process(struct mlx5_eq *eq)
}
eq_update_ci(eq, 1);
-}
-static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
-{
- struct mlx5_eq *eq = eq_ptr;
- unsigned long flags;
-
- if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
- eq_pf_process(eq);
- spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
- } else {
- schedule_work(&eq->pf_ctx.work);
- }
+ if (cqn != -1)
+ tasklet_schedule(&eq_comp->tasklet_ctx.task);
return IRQ_HANDLED;
}
-/* mempool_refill() was proposed but unfortunately wasn't accepted
- * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
- * Chip workaround.
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them. It is not recommended to use it, unless this is the last
+ * resort.
*/
-static void mempool_refill(mempool_t *pool)
-{
- while (pool->curr_nr < pool->min_nr)
- mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
-}
-
-static void eq_pf_action(struct work_struct *work)
-{
- struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
-
- mempool_refill(eq->pf_ctx.pool);
-
- spin_lock_irq(&eq->pf_ctx.lock);
- eq_pf_process(eq);
- spin_unlock_irq(&eq->pf_ctx.lock);
-}
-
-static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
-{
- spin_lock_init(&pf_ctx->lock);
- INIT_WORK(&pf_ctx->work, eq_pf_action);
-
- pf_ctx->wq = alloc_ordered_workqueue(name,
- WQ_MEM_RECLAIM);
- if (!pf_ctx->wq)
- return -ENOMEM;
-
- pf_ctx->pool = mempool_create_kmalloc_pool
- (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
- if (!pf_ctx->pool)
- goto err_wq;
-
- return 0;
-err_wq:
- destroy_workqueue(pf_ctx->wq);
- return -ENOMEM;
-}
-
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
- u32 wq_num, u8 type, int error)
-{
- u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
-
- MLX5_SET(page_fault_resume_in, in, opcode,
- MLX5_CMD_OP_PAGE_FAULT_RESUME);
- MLX5_SET(page_fault_resume_in, in, error, !!error);
- MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
- MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
- MLX5_SET(page_fault_resume_in, in, token, token);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
-#endif
-
-static void general_event_handler(struct mlx5_core_dev *dev,
- struct mlx5_eqe *eqe)
-{
- switch (eqe->sub_type) {
- case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
- if (dev->event)
- dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
- break;
- default:
- mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
- eqe->sub_type);
- }
-}
-
-static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
- struct mlx5_eqe *eqe)
-{
- u64 value_lsb;
- u64 value_msb;
-
- value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
- value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
-
- mlx5_core_warn(dev,
- "High temperature on sensors with bit set %llx %llx",
- value_msb, value_lsb);
-}
-
-/* caller must eventually call mlx5_cq_put on the returned cq */
-static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
-{
- struct mlx5_cq_table *table = &eq->cq_table;
- struct mlx5_core_cq *cq = NULL;
-
- spin_lock(&table->lock);
- cq = radix_tree_lookup(&table->tree, cqn);
- if (likely(cq))
- mlx5_cq_hold(cq);
- spin_unlock(&table->lock);
-
- return cq;
-}
-
-static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
{
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
- if (unlikely(!cq)) {
- mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
- return;
- }
-
- ++cq->arm_sn;
-
- cq->comp(cq);
-
- mlx5_cq_put(cq);
-}
-
-static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
-{
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
- if (unlikely(!cq)) {
- mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
- return;
- }
+ u32 count_eqe;
- cq->event(cq, event_type);
+ disable_irq(eq->core.irqn);
+ count_eqe = eq->core.cons_index;
+ mlx5_eq_comp_int(eq->core.irqn, eq);
+ count_eqe = eq->core.cons_index - count_eqe;
+ enable_irq(eq->core.irqn);
- mlx5_cq_put(cq);
+ return count_eqe;
}
-static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
+static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
{
struct mlx5_eq *eq = eq_ptr;
- struct mlx5_core_dev *dev = eq->dev;
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
int set_ci = 0;
- u32 cqn = -1;
- u32 rsn;
- u8 port;
+
+ dev = eq->dev;
+ eqt = dev->priv.eq_table;
while ((eqe = next_eqe_sw(eq))) {
/*
@@ -480,116 +208,12 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
*/
dma_rmb();
- mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
- eq->eqn, eqe_type_str(eqe->type));
- switch (eqe->type) {
- case MLX5_EVENT_TYPE_COMP:
- cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
- mlx5_eq_cq_completion(eq, cqn);
- break;
- case MLX5_EVENT_TYPE_DCT_DRAINED:
- rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
- rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
- mlx5_rsc_event(dev, rsn, eqe->type);
- break;
- case MLX5_EVENT_TYPE_PATH_MIG:
- case MLX5_EVENT_TYPE_COMM_EST:
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
- mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
- eqe_type_str(eqe->type), eqe->type, rsn);
- mlx5_rsc_event(dev, rsn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
- eqe_type_str(eqe->type), eqe->type, rsn);
- mlx5_srq_event(dev, rsn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_CMD:
- mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
- break;
-
- case MLX5_EVENT_TYPE_PORT_CHANGE:
- port = (eqe->data.port.port >> 4) & 0xf;
- switch (eqe->sub_type) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
- if (dev->event)
- dev->event(dev, port_subtype_event(eqe->sub_type),
- (unsigned long)port);
- break;
- default:
- mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
- port, eqe->sub_type);
- }
- break;
- case MLX5_EVENT_TYPE_CQ_ERROR:
- cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
- mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
- cqn, eqe->data.cq_err.syndrome);
- mlx5_eq_cq_event(eq, cqn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
- {
- u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
- s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
+ if (likely(eqe->type < MLX5_EVENT_TYPE_MAX))
+ atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
+ else
+ mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type);
- mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
- func_id, npages);
- mlx5_core_req_pages_handler(dev, func_id, npages);
- }
- break;
-
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
- mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
- break;
-
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
- mlx5_port_module_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_PPS_EVENT:
- mlx5_pps_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_FPGA_ERROR:
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
- mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
- break;
-
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
- mlx5_temp_warning_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
- general_event_handler(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
- mlx5_fw_tracer_event(dev, eqe);
- break;
-
- default:
- mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
- eqe->type, eq->eqn);
- break;
- }
+ atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
++eq->cons_index;
++set_ci;
@@ -608,30 +232,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
eq_update_ci(eq, 1);
- if (cqn != -1)
- tasklet_schedule(&eq->tasklet_ctx.task);
-
return IRQ_HANDLED;
}
-/* Some architectures don't latch interrupts when they are disabled, so using
- * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
- * avoid losing them. It is not recommended to use it, unless this is the last
- * resort.
- */
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
-{
- u32 count_eqe;
-
- disable_irq(eq->irqn);
- count_eqe = eq->cons_index;
- mlx5_eq_int(eq->irqn, eq);
- count_eqe = eq->cons_index - count_eqe;
- enable_irq(eq->irqn);
-
- return count_eqe;
-}
-
static void init_eq_buf(struct mlx5_eq *eq)
{
struct mlx5_eqe *eqe;
@@ -643,39 +246,35 @@ static void init_eq_buf(struct mlx5_eq *eq)
}
}
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type)
+static int
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+ struct mlx5_eq_param *param)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = &dev->priv;
- irq_handler_t handler;
+ u8 vecidx = param->index;
__be64 *pas;
void *eqc;
int inlen;
u32 *in;
int err;
+ if (eq_table->irq_info[vecidx].context)
+ return -EEXIST;
+
/* Init CQ table */
memset(cq_table, 0, sizeof(*cq_table));
spin_lock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
- eq->type = type;
- eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+ eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
if (err)
return err;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (type == MLX5_EQ_TYPE_PF)
- handler = mlx5_eq_pf_int;
- else
-#endif
- handler = mlx5_eq_int;
-
init_eq_buf(eq);
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
@@ -691,7 +290,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
mlx5_fill_page_array(&eq->buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
- MLX5_SET64(create_eq_in, in, event_bitmask, mask);
+ MLX5_SET64(create_eq_in, in, event_bitmask, param->mask);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -704,15 +303,17 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_in;
- snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
+ snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
name, pci_name(dev->pdev));
+ eq_table->irq_info[vecidx].context = param->context;
+ eq->vecidx = vecidx;
eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
eq->irqn = pci_irq_vector(dev->pdev, vecidx);
eq->dev = dev;
eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
- err = request_irq(eq->irqn, handler, 0,
- priv->irq_info[vecidx].name, eq);
+ err = request_irq(eq->irqn, param->handler, 0,
+ eq_table->irq_info[vecidx].name, param->context);
if (err)
goto err_eq;
@@ -720,21 +321,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_irq;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (type == MLX5_EQ_TYPE_PF) {
- err = init_pf_ctx(&eq->pf_ctx, name);
- if (err)
- goto err_irq;
- } else
-#endif
- {
- INIT_LIST_HEAD(&eq->tasklet_ctx.list);
- INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
- spin_lock_init(&eq->tasklet_ctx.lock);
- tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
- (unsigned long)&eq->tasklet_ctx);
- }
-
/* EQs are created in ARMED state
*/
eq_update_ci(eq, 1);
@@ -756,27 +342,25 @@ err_buf:
return err;
}
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ struct mlx5_irq_info *irq_info;
int err;
+ irq_info = &eq_table->irq_info[eq->vecidx];
+
mlx5_debug_eq_remove(dev, eq);
- free_irq(eq->irqn, eq);
+
+ free_irq(eq->irqn, irq_info->context);
+ irq_info->context = NULL;
+
err = mlx5_cmd_destroy_eq(dev, eq->eqn);
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
synchronize_irq(eq->irqn);
- if (eq->type == MLX5_EQ_TYPE_COMP) {
- tasklet_disable(&eq->tasklet_ctx.task);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- } else if (eq->type == MLX5_EQ_TYPE_PF) {
- cancel_work_sync(&eq->pf_ctx.work);
- destroy_workqueue(eq->pf_ctx.wq);
- mempool_destroy(eq->pf_ctx.pool);
-#endif
- }
mlx5_buf_free(dev, &eq->buf);
return err;
@@ -816,28 +400,106 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
return 0;
}
-int mlx5_eq_init(struct mlx5_core_dev *dev)
+int mlx5_eq_table_init(struct mlx5_core_dev *dev)
{
- int err;
+ struct mlx5_eq_table *eq_table;
+ int i, err;
- spin_lock_init(&dev->priv.eq_table.lock);
+ eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
+ if (!eq_table)
+ return -ENOMEM;
+
+ dev->priv.eq_table = eq_table;
err = mlx5_eq_debugfs_init(dev);
+ if (err)
+ goto kvfree_eq_table;
+ mutex_init(&eq_table->lock);
+ for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
+ ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+
+ return 0;
+
+kvfree_eq_table:
+ kvfree(eq_table);
+ dev->priv.eq_table = NULL;
return err;
}
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
{
mlx5_eq_debugfs_cleanup(dev);
+ kvfree(dev->priv.eq_table);
}
-int mlx5_start_eqs(struct mlx5_core_dev *dev)
+/* Async EQs */
+
+static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+ struct mlx5_eq *eq, struct mlx5_eq_param *param)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int err;
+
+ mutex_lock(&eq_table->lock);
+ if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
+ err = -ENOSPC;
+ goto unlock;
+ }
+
+ err = create_map_eq(dev, eq, name, param);
+unlock:
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
+ mutex_lock(&eq_table->lock);
+ err = destroy_unmap_eq(dev, eq);
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int cq_err_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_cq *cq;
+ struct mlx5_eqe *eqe;
+ struct mlx5_eq *eq;
+ u32 cqn;
+
+ /* type == MLX5_EVENT_TYPE_CQ_ERROR */
+
+ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
+ eq = &eqt->async_eq;
+ eqe = data;
+
+ cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+ cqn, eqe->data.cq_err.syndrome);
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+ return NOTIFY_OK;
+ }
+
+ cq->event(cq, type);
+
+ mlx5_cq_put(cq);
+
+ return NOTIFY_OK;
+}
+
+static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
+{
+ u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+
if (MLX5_VPORT_MANAGER(dev))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
@@ -865,127 +527,518 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
- err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
- MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
- "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
+ return async_event_mask;
+}
+
+static int create_async_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_param param = {};
+ int err;
+
+ MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
+ mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_CMD_IDX,
+ .mask = 1ull << MLX5_EVENT_TYPE_CMD,
+ .nent = MLX5_NUM_CMD_EQE,
+ .context = &table->cmd_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
- return err;
+ goto err0;
}
mlx5_cmd_use_events(dev);
- err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
- MLX5_NUM_ASYNC_EQE, async_event_mask,
- "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_ASYNC_IDX,
+ .mask = gather_async_events_mask(dev),
+ .nent = MLX5_NUM_ASYNC_EQE,
+ .context = &table->async_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
goto err1;
}
- err = mlx5_create_map_eq(dev, &table->pages_eq,
- MLX5_EQ_VEC_PAGES,
- /* TODO: sriov max_vf + */ 1,
- 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
- MLX5_EQ_TYPE_ASYNC);
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_PAGEREQ_IDX,
+ .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
+ .nent = /* TODO: sriov max_vf + */ 1,
+ .context = &table->pages_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
goto err2;
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg)) {
- err = mlx5_create_map_eq(dev, &table->pfault_eq,
- MLX5_EQ_VEC_PFAULT,
- MLX5_NUM_ASYNC_EQE,
- 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
- "mlx5_page_fault_eq",
- MLX5_EQ_TYPE_PF);
- if (err) {
- mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
- err);
- goto err3;
- }
- }
-
- return err;
-err3:
- mlx5_destroy_unmap_eq(dev, &table->pages_eq);
-#else
return err;
-#endif
err2:
- mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ destroy_async_eq(dev, &table->async_eq);
err1:
mlx5_cmd_use_polling(dev);
- mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ destroy_async_eq(dev, &table->cmd_eq);
+err0:
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
return err;
}
-void mlx5_stop_eqs(struct mlx5_core_dev *dev)
+static void destroy_async_eqs(struct mlx5_core_dev *dev)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
int err;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg)) {
- err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
- if (err)
- mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
- err);
- }
-#endif
-
- err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+ err = destroy_async_eq(dev, &table->pages_eq);
if (err)
mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
err);
- err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ err = destroy_async_eq(dev, &table->async_eq);
if (err)
mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
err);
+
mlx5_cmd_use_polling(dev);
- err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ err = destroy_async_eq(dev, &table->cmd_eq);
if (err)
mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
err);
+
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
}
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen)
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
{
- u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
+ return &dev->priv.eq_table->async_eq;
+}
- MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
- MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+}
+
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+}
+
+/* Generic EQ API for mlx5_core consumers
+ * Needed For RDMA ODP EQ for now
+ */
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+ struct mlx5_eq_param *param)
+{
+ struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
+ int err;
+
+ if (!eq)
+ return ERR_PTR(-ENOMEM);
+
+ err = create_async_eq(dev, name, eq, param);
+ if (err) {
+ kvfree(eq);
+ eq = ERR_PTR(err);
+ }
+
+ return eq;
+}
+EXPORT_SYMBOL(mlx5_eq_create_generic);
+
+int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ if (IS_ERR(eq))
+ return -EINVAL;
+
+ err = destroy_async_eq(dev, eq);
+ if (err)
+ goto out;
+
+ kvfree(eq);
+out:
+ return err;
+}
+EXPORT_SYMBOL(mlx5_eq_destroy_generic);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
+{
+ u32 ci = eq->cons_index + cc;
+ struct mlx5_eqe *eqe;
+
+ eqe = get_eqe(eq, ci & (eq->nent - 1));
+ eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ if (eqe)
+ dma_rmb();
+
+ return eqe;
+}
+EXPORT_SYMBOL(mlx5_eq_get_eqe);
+
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val;
+
+ eq->cons_index += cc;
+ val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+EXPORT_SYMBOL(mlx5_eq_update_ci);
+
+/* Completion EQs */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+ struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+ if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+ irq_info->mask);
+
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irq, irq_info->mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+ return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ struct mlx5_priv *priv = &mdev->priv;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+ struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+ irq_set_affinity_hint(irq, NULL);
+ free_cpumask_var(irq_info->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
+ err = set_comp_irq_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ clear_comp_irq_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
+ clear_comp_irq_affinity_hint(mdev, i);
+}
+
+static void destroy_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+
+ clear_comp_irqs_affinity_hints(dev);
+
+#ifdef CONFIG_RFS_ACCEL
+ if (table->rmap) {
+ free_irq_cpu_rmap(table->rmap);
+ table->rmap = NULL;
+ }
+#endif
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ list_del(&eq->list);
+ if (destroy_unmap_eq(dev, &eq->core))
+ mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
+ eq->core.eqn);
+ tasklet_disable(&eq->tasklet_ctx.task);
+ kfree(eq);
+ }
+}
+
+static int create_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ char name[MLX5_MAX_IRQ_NAME];
+ struct mlx5_eq_comp *eq;
+ int ncomp_vec;
+ int nent;
+ int err;
+ int i;
+
+ INIT_LIST_HEAD(&table->comp_eqs_list);
+ ncomp_vec = table->num_comp_vectors;
+ nent = MLX5_COMP_EQ_SIZE;
+#ifdef CONFIG_RFS_ACCEL
+ table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
+ if (!table->rmap)
+ return -ENOMEM;
+#endif
+ for (i = 0; i < ncomp_vec; i++) {
+ int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+ struct mlx5_eq_param param = {};
+
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq) {
+ err = -ENOMEM;
+ goto clean;
+ }
+
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+ (unsigned long)&eq->tasklet_ctx);
+
+#ifdef CONFIG_RFS_ACCEL
+ irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
+#endif
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+ param = (struct mlx5_eq_param) {
+ .index = vecidx,
+ .mask = 0,
+ .nent = nent,
+ .context = &eq->core,
+ .handler = mlx5_eq_comp_int
+ };
+ err = create_map_eq(dev, &eq->core, name, &param);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
+ /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
+ list_add_tail(&eq->list, &table->comp_eqs_list);
+ }
+
+ err = set_comp_irq_affinity_hints(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+ goto clean;
+ }
+
+ return 0;
+
+clean:
+ destroy_comp_eqs(dev);
+ return err;
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+ unsigned int *irqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+ int err = -ENOENT;
+ int i = 0;
+
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (i++ == vector) {
+ *eqn = eq->core.eqn;
+ *irqn = eq->core.irqn;
+ err = 0;
+ break;
+ }
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
+{
+ return dev->priv.eq_table->num_comp_vectors;
+}
+EXPORT_SYMBOL(mlx5_comp_vectors_count);
+
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
+{
+ /* TODO: consider irq_get_affinity_mask(irq) */
+ return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+}
+EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
+
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ return dev->priv.eq_table->rmap;
+#else
+ return NULL;
+#endif
+}
+
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq;
+
+ list_for_each_entry(eq, &table->comp_eqs_list, list) {
+ if (eq->core.eqn == eqn)
+ return eq;
+ }
+
+ return ERR_PTR(-ENOENT);
}
/* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int i, max_eqs;
+
+ clear_comp_irqs_affinity_hints(dev);
#ifdef CONFIG_RFS_ACCEL
- if (dev->rmap) {
- free_irq_cpu_rmap(dev->rmap);
- dev->rmap = NULL;
+ if (table->rmap) {
+ free_irq_cpu_rmap(table->rmap);
+ table->rmap = NULL;
}
#endif
- list_for_each_entry(eq, &table->comp_eqs_list, list)
- free_irq(eq->irqn, eq);
-
- free_irq(table->pages_eq.irqn, &table->pages_eq);
- free_irq(table->async_eq.irqn, &table->async_eq);
- free_irq(table->cmd_eq.irqn, &table->cmd_eq);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg))
- free_irq(table->pfault_eq.irqn, &table->pfault_eq);
-#endif
+
+ mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+ max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
+ for (i = max_eqs - 1; i >= 0; i--) {
+ if (!table->irq_info[i].context)
+ continue;
+ free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
+ table->irq_info[i].context = NULL;
+ }
+ mutex_unlock(&table->lock);
+ pci_free_irq_vectors(dev->pdev);
+}
+
+static int alloc_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_eq_table *table = priv->eq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int nvec;
+ int err;
+
+ nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+ MLX5_EQ_VEC_COMP_BASE;
+ nvec = min_t(int, nvec, num_eqs);
+ if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+ return -ENOMEM;
+
+ table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
+ if (!table->irq_info)
+ return -ENOMEM;
+
+ nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
+ nvec, PCI_IRQ_MSIX);
+ if (nvec < 0) {
+ err = nvec;
+ goto err_free_irq_info;
+ }
+
+ table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+
+ return 0;
+
+err_free_irq_info:
+ kfree(table->irq_info);
+ return err;
+}
+
+static void free_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
pci_free_irq_vectors(dev->pdev);
+ kfree(priv->eq_table->irq_info);
+}
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ err = alloc_irq_vectors(dev);
+ if (err) {
+ mlx5_core_err(dev, "alloc irq vectors failed\n");
+ return err;
+ }
+
+ err = create_async_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create async EQs\n");
+ goto err_async_eqs;
+ }
+
+ err = create_comp_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create completion EQs\n");
+ goto err_comp_eqs;
+ }
+
+ return 0;
+err_comp_eqs:
+ destroy_async_eqs(dev);
+err_async_eqs:
+ free_irq_vectors(dev);
+ return err;
+}
+
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
+{
+ destroy_comp_eqs(dev);
+ destroy_async_eqs(dev);
+ free_irq_vectors(dev);
+}
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
+}
+
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index d004957328f9..e6a9b19d8626 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -36,8 +36,10 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "eswitch.h"
#include "fs_core.h"
+#include "lib/eq.h"
#define UPLINK_VPORT 0xFFFF
@@ -1567,7 +1569,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
/* Mark this vport as disabled to discard new events */
vport->enabled = false;
- synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
/* Wait for current already scheduled events to complete */
flush_workqueue(esw->work_queue);
/* Disable events from this vport */
@@ -1593,10 +1594,25 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
mutex_unlock(&esw->state_lock);
}
+static int eswitch_vport_event(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb);
+ struct mlx5_eqe *eqe = data;
+ struct mlx5_vport *vport;
+ u16 vport_num;
+
+ vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
+ vport = &esw->vports[vport_num];
+ if (vport->enabled)
+ queue_work(esw->work_queue, &vport->vport_change_handler);
+
+ return NOTIFY_OK;
+}
+
/* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
-
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
{
int err;
@@ -1640,6 +1656,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
for (i = 0; i <= nvfs; i++)
esw_enable_vport(esw, i, enabled_events);
+ if (mode == SRIOV_LEGACY) {
+ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+ mlx5_eq_notifier_register(esw->dev, &esw->nb);
+ }
+
esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
esw->enabled_vports);
return 0;
@@ -1669,6 +1690,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
mc_promisc = &esw->mc_promisc;
nvports = esw->enabled_vports;
+ if (esw->mode == SRIOV_LEGACY)
+ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
for (i = 0; i < esw->total_vports; i++)
esw_disable_vport(esw, i);
@@ -1777,23 +1801,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
kfree(esw);
}
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
-{
- struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
- u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
- struct mlx5_vport *vport;
-
- if (!esw) {
- pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
- vport_num);
- return;
- }
-
- vport = &esw->vports[vport_num];
- if (vport->enabled)
- queue_work(esw->work_queue, &vport->vport_change_handler);
-}
-
/* Vport Administration */
#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index aaafc9f17115..480ffa294867 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -181,6 +181,7 @@ struct esw_mc_addr { /* SRIOV only */
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
struct mlx5_eswitch_fdb fdb_table;
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct workqueue_struct *work_queue;
@@ -211,7 +212,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
@@ -352,7 +352,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
new file mode 100644
index 000000000000..e92df7020a26
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+struct mlx5_event_nb {
+ struct mlx5_nb nb;
+ void *ctx;
+};
+
+/* General events handlers for the low level mlx5_core driver
+ *
+ * Other Major feature specific events such as
+ * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
+ * separate notifiers callbacks, specifically by those mlx5 components.
+ */
+static int any_notifier(struct notifier_block *, unsigned long, void *);
+static int temp_warn(struct notifier_block *, unsigned long, void *);
+static int port_module(struct notifier_block *, unsigned long, void *);
+
+/* handler which forwards the event to events->nh, driver notifiers */
+static int forward_event(struct notifier_block *, unsigned long, void *);
+
+static struct mlx5_nb events_nbs_ref[] = {
+ /* Events to be proccessed by mlx5_core */
+ {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
+ {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
+ {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+
+ /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+ /* QP/WQ resource events to forward */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
+ /* SRQ events */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
+};
+
+struct mlx5_events {
+ struct mlx5_core_dev *dev;
+ struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
+ /* driver notifier chain */
+ struct atomic_notifier_head nh;
+ /* port module events stats */
+ struct mlx5_pme_stats pme_stats;
+};
+
+static const char *eqe_type_str(u8 type)
+{
+ switch (type) {
+ case MLX5_EVENT_TYPE_COMP:
+ return "MLX5_EVENT_TYPE_COMP";
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ return "MLX5_EVENT_TYPE_PATH_MIG";
+ case MLX5_EVENT_TYPE_COMM_EST:
+ return "MLX5_EVENT_TYPE_COMM_EST";
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ return "MLX5_EVENT_TYPE_SQ_DRAINED";
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ return "MLX5_EVENT_TYPE_CQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+ return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return "MLX5_EVENT_TYPE_PORT_CHANGE";
+ case MLX5_EVENT_TYPE_GPIO_EVENT:
+ return "MLX5_EVENT_TYPE_GPIO_EVENT";
+ case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+ return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+ case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+ return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+ case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+ return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+ case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+ return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+ case MLX5_EVENT_TYPE_STALL_EVENT:
+ return "MLX5_EVENT_TYPE_STALL_EVENT";
+ case MLX5_EVENT_TYPE_CMD:
+ return "MLX5_EVENT_TYPE_CMD";
+ case MLX5_EVENT_TYPE_PAGE_REQUEST:
+ return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+ case MLX5_EVENT_TYPE_PAGE_FAULT:
+ return "MLX5_EVENT_TYPE_PAGE_FAULT";
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ return "MLX5_EVENT_TYPE_PPS_EVENT";
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_ERROR";
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+ case MLX5_EVENT_TYPE_DEVICE_TRACER:
+ return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+ default:
+ return "Unrecognized event";
+ }
+}
+
+/* handles all FW events, type == eqe->type */
+static int any_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ return NOTIFY_OK;
+}
+
+/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
+static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+ u64 value_lsb;
+ u64 value_msb;
+
+ value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+ value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+ mlx5_core_warn(events->dev,
+ "High temperature on sensors with bit set %llx %llx",
+ value_msb, value_lsb);
+
+ return NOTIFY_OK;
+}
+
+/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
+ "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */
+ "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */
+ "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */
+};
+
+static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
+ "Power budget exceeded",
+ "Long Range for non MLNX cable",
+ "Bus stuck(I2C or data shorted)",
+ "No EEPROM/retry timeout",
+ "Enforce part number list",
+ "Unknown identifier",
+ "High Temperature",
+ "Bad or shorted cable/module",
+ "Unknown status",
+};
+
+/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static int port_module(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ enum port_module_event_status_type module_status;
+ enum port_module_event_error_type error_type;
+ struct mlx5_eqe_port_module *module_event_eqe;
+ u8 module_num;
+
+ module_event_eqe = &eqe->data.port_module;
+ module_num = module_event_eqe->module;
+ module_status = module_event_eqe->module_status &
+ PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+ error_type = module_event_eqe->error_type &
+ PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+ if (module_status < MLX5_MODULE_STATUS_ERROR) {
+ events->pme_stats.status_counters[module_status - 1]++;
+ } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
+ if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
+ /* Unknown error type */
+ error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
+ events->pme_stats.error_counters[error_type]++;
+ }
+
+ if (!printk_ratelimit())
+ return NOTIFY_OK;
+
+ if (module_status < MLX5_MODULE_STATUS_ERROR)
+ mlx5_core_info(events->dev,
+ "Port module event: module %u, %s\n",
+ module_num, mlx5_pme_status[module_status - 1]);
+
+ else if (module_status == MLX5_MODULE_STATUS_ERROR)
+ mlx5_core_info(events->dev,
+ "Port module event[error]: module %u, %s, %s\n",
+ module_num, mlx5_pme_status[module_status - 1],
+ mlx5_pme_error[error_type]);
+
+ return NOTIFY_OK;
+}
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
+{
+ *stats = dev->priv.events->pme_stats;
+}
+
+/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
+static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ atomic_notifier_call_chain(&events->nh, event, data);
+ return NOTIFY_OK;
+}
+
+int mlx5_events_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
+
+ if (!events)
+ return -ENOMEM;
+
+ ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
+ events->dev = dev;
+ dev->priv.events = events;
+ return 0;
+}
+
+void mlx5_events_cleanup(struct mlx5_core_dev *dev)
+{
+ kvfree(dev->priv.events);
+}
+
+void mlx5_events_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
+ events->notifiers[i].nb = events_nbs_ref[i];
+ events->notifiers[i].ctx = events;
+ mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
+ }
+}
+
+void mlx5_events_stop(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
+ mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+}
+
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_register(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_register);
+
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_unregister(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_unregister);
+
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
+{
+ return atomic_notifier_call_chain(&events->nh, event, data);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 436a8136f26f..27c5f6c7d36a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -36,6 +36,7 @@
#include "mlx5_core.h"
#include "lib/mlx5.h"
+#include "lib/eq.h"
#include "fpga/core.h"
#include "fpga/conn.h"
@@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
return 0;
}
+static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
+
+static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
+static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
{
struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
if (err)
goto out;
+ MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
+ MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
+
err = mlx5_fpga_conn_device_init(fdev);
if (err)
goto err_rsvd_gid;
@@ -201,6 +223,8 @@ err_conn_init:
mlx5_fpga_conn_device_cleanup(fdev);
err_rsvd_gid:
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
mlx5_core_unreserve_gids(mdev, max_num_qps);
out:
spin_lock_irqsave(&fdev->state_lock, flags);
@@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
}
mlx5_fpga_conn_device_cleanup(fdev);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
+
max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
mlx5_core_unreserve_gids(mdev, max_num_qps);
}
@@ -283,9 +310,10 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
return "Unknown";
}
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
+ unsigned long event, void *eqe)
{
- struct mlx5_fpga_device *fdev = mdev->fpga;
+ void *data = ((struct mlx5_eqe *)eqe)->data.raw;
const char *event_name;
bool teardown = false;
unsigned long flags;
@@ -303,9 +331,7 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
break;
default:
- mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
- event);
- return;
+ return NOTIFY_DONE;
}
spin_lock_irqsave(&fdev->state_lock, flags);
@@ -326,4 +352,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
*/
if (teardown)
mlx5_trigger_health_work(fdev->mdev);
+
+ return NOTIFY_OK;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 3e2355c8df3f..7e2e871dbf83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -35,11 +35,16 @@
#ifdef CONFIG_MLX5_FPGA
+#include <linux/mlx5/eq.h>
+
+#include "lib/eq.h"
#include "fpga/cmd.h"
/* Represents an Innova device */
struct mlx5_fpga_device {
struct mlx5_core_dev *mdev;
+ struct mlx5_nb fpga_err_nb;
+ struct mlx5_nb fpga_qp_err_nb;
spinlock_t state_lock; /* Protects state transitions */
enum mlx5_fpga_status state;
enum mlx5_fpga_image last_admin_image;
@@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev);
void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
#else
@@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
{
}
-static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
- void *data)
-{
-}
-
#endif
#endif /* __MLX5_FPGA_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 43118de8ee99..196c07383082 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -38,6 +38,8 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
enum {
MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
@@ -78,29 +80,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
&dev->iseg->cmdq_addr_l_sz);
}
-static void trigger_cmd_completions(struct mlx5_core_dev *dev)
-{
- unsigned long flags;
- u64 vector;
-
- /* wait for pending handlers to complete */
- synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
- spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
- vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
- if (!vector)
- goto no_trig;
-
- vector |= MLX5_TRIGGERED_CMD_COMP;
- spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-
- mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
- mlx5_cmd_comp_handler(dev, vector, true);
- return;
-
-no_trig:
- spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-}
-
static int in_fatal(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
@@ -124,10 +103,10 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
mlx5_core_err(dev, "start\n");
if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
- trigger_cmd_completions(dev);
+ mlx5_cmd_trigger_completions(dev);
}
- mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
+ mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
mlx5_core_err(dev, "end\n");
unlock:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0d90b1b4a3d3..d27c239e7d6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -33,6 +33,7 @@
#include <linux/clocksource.h>
#include <linux/highmem.h>
#include <rdma/mlx5-abi.h>
+#include "lib/eq.h"
#include "en.h"
#include "clock.h"
@@ -439,16 +440,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
}
-void mlx5_pps_event(struct mlx5_core_dev *mdev,
- struct mlx5_eqe *eqe)
+static int mlx5_pps_event(struct notifier_block *nb,
+ unsigned long type, void *data)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+ struct mlx5_core_dev *mdev = clock->mdev;
struct ptp_clock_event ptp_event;
- struct timespec64 ts;
- u64 nsec_now, nsec_delta;
u64 cycles_now, cycles_delta;
+ u64 nsec_now, nsec_delta, ns;
+ struct mlx5_eqe *eqe = data;
int pin = eqe->data.pps.pin;
- s64 ns;
+ struct timespec64 ts;
unsigned long flags;
switch (clock->ptp_info.pin_config[pin].func) {
@@ -463,6 +465,7 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
} else {
ptp_event.type = PTP_CLOCK_EXTTS;
}
+ /* TODOL clock->ptp can be NULL if ptp_clock_register failes */
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
@@ -481,8 +484,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
write_sequnlock_irqrestore(&clock->lock, flags);
break;
default:
- mlx5_core_err(mdev, " Unhandled event\n");
+ mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
+ clock->ptp_info.pin_config[pin].func);
}
+
+ return NOTIFY_OK;
}
void mlx5_init_clock(struct mlx5_core_dev *mdev)
@@ -567,6 +573,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
PTR_ERR(clock->ptp));
clock->ptp = NULL;
}
+
+ MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
+ mlx5_eq_notifier_register(mdev, &clock->pps_nb);
}
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
@@ -576,6 +585,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
return;
+ mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
if (clock->ptp) {
ptp_clock_unregister(clock->ptp);
clock->ptp = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index 263cb6e2aeee..31600924bdc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -36,7 +36,6 @@
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
void mlx5_init_clock(struct mlx5_core_dev *mdev);
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
-void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
@@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
#else
static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
-static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {}
-
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
return -1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
new file mode 100644
index 000000000000..c0fb6d72b695
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_EQ_H__
+#define __LIB_MLX5_EQ_H__
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/mlx5/cq.h>
+
+#define MLX5_MAX_IRQ_NAME (32)
+#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe))
+
+struct mlx5_eq_tasklet {
+ struct list_head list;
+ struct list_head process_list;
+ struct tasklet_struct task;
+ spinlock_t lock; /* lock completion tasklet list */
+};
+
+struct mlx5_cq_table {
+ spinlock_t lock; /* protect radix tree */
+ struct radix_tree_root tree;
+};
+
+struct mlx5_eq {
+ struct mlx5_core_dev *dev;
+ struct mlx5_cq_table cq_table;
+ __be32 __iomem *doorbell;
+ u32 cons_index;
+ struct mlx5_frag_buf buf;
+ int size;
+ unsigned int vecidx;
+ unsigned int irqn;
+ u8 eqn;
+ int nent;
+ struct mlx5_rsc_debug *dbg;
+};
+
+struct mlx5_eq_comp {
+ struct mlx5_eq core; /* Must be first */
+ struct mlx5_eq_tasklet tasklet_ctx;
+ struct list_head list;
+};
+
+static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+ return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+ return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev);
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_eq_table_create(struct mlx5_core_dev *dev);
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
+void mlx5_cq_tasklet_cb(unsigned long data);
+struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
+#endif
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 7550b1cc8c6a..4d78a459676e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -33,6 +33,8 @@
#ifndef __LIB_MLX5_H__
#define __LIB_MLX5_H__
+#include "mlx5_core.h"
+
void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
@@ -40,4 +42,37 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+/* TODO move to lib/events.h */
+
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF
+
+enum port_module_event_status_type {
+ MLX5_MODULE_STATUS_PLUGGED = 0x1,
+ MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+ MLX5_MODULE_STATUS_ERROR = 0x3,
+ MLX5_MODULE_STATUS_NUM = 0x3,
+};
+
+enum port_module_event_error_type {
+ MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED,
+ MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE,
+ MLX5_MODULE_EVENT_ERROR_BUS_STUCK,
+ MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT,
+ MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST,
+ MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER,
+ MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE,
+ MLX5_MODULE_EVENT_ERROR_BAD_CABLE,
+ MLX5_MODULE_EVENT_ERROR_UNKNOWN,
+ MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_pme_stats {
+ u64 status_counters[MLX5_MODULE_STATUS_NUM];
+ u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 28132c7dc05f..778995573812 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -43,7 +43,6 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
#include <linux/debugfs.h>
#include <linux/kmod.h>
#include <linux/mlx5/mlx5_ifc.h>
@@ -53,6 +52,7 @@
#endif
#include <net/devlink.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "fs_core.h"
#include "lib/mpfs.h"
#include "eswitch.h"
@@ -319,51 +319,6 @@ static void release_bar(struct pci_dev *pdev)
pci_release_regions(pdev);
}
-static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_eq_table *table = &priv->eq_table;
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
- int nvec;
- int err;
-
- nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
- MLX5_EQ_VEC_COMP_BASE;
- nvec = min_t(int, nvec, num_eqs);
- if (nvec <= MLX5_EQ_VEC_COMP_BASE)
- return -ENOMEM;
-
- priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
- if (!priv->irq_info)
- return -ENOMEM;
-
- nvec = pci_alloc_irq_vectors(dev->pdev,
- MLX5_EQ_VEC_COMP_BASE + 1, nvec,
- PCI_IRQ_MSIX);
- if (nvec < 0) {
- err = nvec;
- goto err_free_irq_info;
- }
-
- table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
- return 0;
-
-err_free_irq_info:
- kfree(priv->irq_info);
- return err;
-}
-
-static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
-
- pci_free_irq_vectors(dev->pdev);
- kfree(priv->irq_info);
-}
-
struct mlx5_reg_host_endianness {
u8 he;
u8 rsvd[15];
@@ -637,177 +592,6 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
return (u64)timer_l | (u64)timer_h1 << 32;
}
-static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
- if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
- mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
- return -ENOMEM;
- }
-
- cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
- priv->irq_info[i].mask);
-
- if (IS_ENABLED(CONFIG_SMP) &&
- irq_set_affinity_hint(irq, priv->irq_info[i].mask))
- mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
- return 0;
-}
-
-static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
- irq_set_affinity_hint(irq, NULL);
- free_cpumask_var(priv->irq_info[i].mask);
-}
-
-static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int err;
- int i;
-
- for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
- err = mlx5_irq_set_affinity_hint(mdev, i);
- if (err)
- goto err_out;
- }
-
- return 0;
-
-err_out:
- for (i--; i >= 0; i--)
- mlx5_irq_clear_affinity_hint(mdev, i);
-
- return err;
-}
-
-static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int i;
-
- for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
- mlx5_irq_clear_affinity_hint(mdev, i);
-}
-
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
- unsigned int *irqn)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq, *n;
- int err = -ENOENT;
-
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
- if (eq->index == vector) {
- *eqn = eq->eqn;
- *irqn = eq->irqn;
- err = 0;
- break;
- }
- }
- spin_unlock(&table->lock);
-
- return err;
-}
-EXPORT_SYMBOL(mlx5_vector2eqn);
-
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq;
-
- spin_lock(&table->lock);
- list_for_each_entry(eq, &table->comp_eqs_list, list)
- if (eq->eqn == eqn) {
- spin_unlock(&table->lock);
- return eq;
- }
-
- spin_unlock(&table->lock);
-
- return ERR_PTR(-ENOENT);
-}
-
-static void free_comp_eqs(struct mlx5_core_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq, *n;
-
-#ifdef CONFIG_RFS_ACCEL
- if (dev->rmap) {
- free_irq_cpu_rmap(dev->rmap);
- dev->rmap = NULL;
- }
-#endif
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
- list_del(&eq->list);
- spin_unlock(&table->lock);
- if (mlx5_destroy_unmap_eq(dev, eq))
- mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
- eq->eqn);
- kfree(eq);
- spin_lock(&table->lock);
- }
- spin_unlock(&table->lock);
-}
-
-static int alloc_comp_eqs(struct mlx5_core_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- char name[MLX5_MAX_IRQ_NAME];
- struct mlx5_eq *eq;
- int ncomp_vec;
- int nent;
- int err;
- int i;
-
- INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_vec = table->num_comp_vectors;
- nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
- dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
- if (!dev->rmap)
- return -ENOMEM;
-#endif
- for (i = 0; i < ncomp_vec; i++) {
- eq = kzalloc(sizeof(*eq), GFP_KERNEL);
- if (!eq) {
- err = -ENOMEM;
- goto clean;
- }
-
-#ifdef CONFIG_RFS_ACCEL
- irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
- MLX5_EQ_VEC_COMP_BASE + i));
-#endif
- snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
- err = mlx5_create_map_eq(dev, eq,
- i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
- name, MLX5_EQ_TYPE_COMP);
- if (err) {
- kfree(eq);
- goto clean;
- }
- mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
- eq->index = i;
- spin_lock(&table->lock);
- list_add_tail(&eq->list, &table->comp_eqs_list);
- spin_unlock(&table->lock);
- }
-
- return 0;
-
-clean:
- free_comp_eqs(dev);
- return err;
-}
-
static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
{
u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
@@ -944,22 +728,26 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto out;
}
- err = mlx5_eq_init(dev);
+ err = mlx5_eq_table_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize eq\n");
goto out;
}
+ err = mlx5_events_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize events\n");
+ goto err_eq_cleanup;
+ }
+
err = mlx5_cq_debugfs_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
- goto err_eq_cleanup;
+ goto err_events_cleanup;
}
mlx5_init_qp_table(dev);
- mlx5_init_srq_table(dev);
-
mlx5_init_mkey_table(dev);
mlx5_init_reserved_gids(dev);
@@ -1013,12 +801,12 @@ err_rl_cleanup:
err_tables_cleanup:
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_mkey_table(dev);
- mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
-
+err_events_cleanup:
+ mlx5_events_cleanup(dev);
err_eq_cleanup:
- mlx5_eq_cleanup(dev);
+ mlx5_eq_table_cleanup(dev);
out:
return err;
@@ -1036,10 +824,10 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_cleanup_clock(dev);
mlx5_cleanup_reserved_gids(dev);
mlx5_cleanup_mkey_table(dev);
- mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
- mlx5_eq_cleanup(dev);
+ mlx5_events_cleanup(dev);
+ mlx5_eq_table_cleanup(dev);
}
static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
@@ -1131,16 +919,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto reclaim_boot_pages;
}
- err = mlx5_pagealloc_start(dev);
- if (err) {
- dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
- goto reclaim_boot_pages;
- }
-
err = mlx5_cmd_init_hca(dev, sw_owner_id);
if (err) {
dev_err(&pdev->dev, "init hca failed\n");
- goto err_pagealloc_stop;
+ goto reclaim_boot_pages;
}
mlx5_set_driver_version(dev);
@@ -1161,23 +943,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
}
}
- err = mlx5_alloc_irq_vectors(dev);
- if (err) {
- dev_err(&pdev->dev, "alloc irq vectors failed\n");
- goto err_cleanup_once;
- }
-
dev->priv.uar = mlx5_get_uars_page(dev);
if (IS_ERR(dev->priv.uar)) {
dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
err = PTR_ERR(dev->priv.uar);
- goto err_disable_msix;
+ goto err_get_uars;
}
- err = mlx5_start_eqs(dev);
+ mlx5_events_start(dev);
+ mlx5_pagealloc_start(dev);
+
+ err = mlx5_eq_table_create(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
- goto err_put_uars;
+ dev_err(&pdev->dev, "Failed to create EQs\n");
+ goto err_eq_table;
}
err = mlx5_fw_tracer_init(dev->tracer);
@@ -1186,18 +965,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_fw_tracer;
}
- err = alloc_comp_eqs(dev);
- if (err) {
- dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
- goto err_comp_eqs;
- }
-
- err = mlx5_irq_set_affinity_hints(dev);
- if (err) {
- dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
- goto err_affinity_hints;
- }
-
err = mlx5_fpga_device_start(dev);
if (err) {
dev_err(&pdev->dev, "fpga device start failed %d\n", err);
@@ -1266,24 +1033,17 @@ err_ipsec_start:
mlx5_fpga_device_stop(dev);
err_fpga_start:
- mlx5_irq_clear_affinity_hints(dev);
-
-err_affinity_hints:
- free_comp_eqs(dev);
-
-err_comp_eqs:
mlx5_fw_tracer_cleanup(dev->tracer);
err_fw_tracer:
- mlx5_stop_eqs(dev);
+ mlx5_eq_table_destroy(dev);
-err_put_uars:
+err_eq_table:
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
mlx5_put_uars_page(dev, priv->uar);
-err_disable_msix:
- mlx5_free_irq_vectors(dev);
-
-err_cleanup_once:
+err_get_uars:
if (boot)
mlx5_cleanup_once(dev);
@@ -1294,9 +1054,6 @@ err_stop_poll:
goto out_err;
}
-err_pagealloc_stop:
- mlx5_pagealloc_stop(dev);
-
reclaim_boot_pages:
mlx5_reclaim_startup_pages(dev);
@@ -1340,21 +1097,20 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
- mlx5_irq_clear_affinity_hints(dev);
- free_comp_eqs(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
- mlx5_stop_eqs(dev);
+ mlx5_eq_table_destroy(dev);
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
mlx5_put_uars_page(dev, priv->uar);
- mlx5_free_irq_vectors(dev);
if (cleanup)
mlx5_cleanup_once(dev);
mlx5_stop_health_poll(dev, cleanup);
+
err = mlx5_cmd_teardown_hca(dev);
if (err) {
dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
goto out;
}
- mlx5_pagealloc_stop(dev);
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev, 0);
mlx5_cmd_cleanup(dev);
@@ -1364,12 +1120,6 @@ out:
return err;
}
-struct mlx5_core_event_handler {
- void (*event)(struct mlx5_core_dev *dev,
- enum mlx5_dev_event event,
- void *data);
-};
-
static const struct devlink_ops mlx5_devlink_ops = {
#ifdef CONFIG_MLX5_ESWITCH
.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
@@ -1403,7 +1153,6 @@ static int init_one(struct pci_dev *pdev,
pci_set_drvdata(pdev, dev);
dev->pdev = pdev;
- dev->event = mlx5_core_event;
dev->profile = &profile[prof_sel];
INIT_LIST_HEAD(&priv->ctx_list);
@@ -1411,17 +1160,6 @@ static int init_one(struct pci_dev *pdev,
mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex);
- INIT_LIST_HEAD(&priv->waiting_events_list);
- priv->is_accum_events = false;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- err = init_srcu_struct(&priv->pfault_srcu);
- if (err) {
- dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
- err);
- goto clean_dev;
- }
-#endif
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
@@ -1430,7 +1168,7 @@ static int init_one(struct pci_dev *pdev,
err = mlx5_pci_init(dev, priv);
if (err) {
dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
- goto clean_srcu;
+ goto clean_dev;
}
err = mlx5_health_init(dev);
@@ -1439,12 +1177,14 @@ static int init_one(struct pci_dev *pdev,
goto close_pci;
}
- mlx5_pagealloc_init(dev);
+ err = mlx5_pagealloc_init(dev);
+ if (err)
+ goto err_pagealloc_init;
err = mlx5_load_one(dev, priv, true);
if (err) {
dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
- goto clean_health;
+ goto err_load_one;
}
request_module_nowait(MLX5_IB_MOD);
@@ -1458,16 +1198,13 @@ static int init_one(struct pci_dev *pdev,
clean_load:
mlx5_unload_one(dev, priv, true);
-clean_health:
+err_load_one:
mlx5_pagealloc_cleanup(dev);
+err_pagealloc_init:
mlx5_health_cleanup(dev);
close_pci:
mlx5_pci_close(dev, priv);
-clean_srcu:
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&priv->pfault_srcu);
clean_dev:
-#endif
devlink_free(devlink);
return err;
@@ -1491,9 +1228,6 @@ static void remove_one(struct pci_dev *pdev)
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
mlx5_pci_close(dev, priv);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&priv->pfault_srcu);
-#endif
devlink_free(devlink);
}
@@ -1637,7 +1371,6 @@ succeed:
* kexec. There is no need to cleanup the mlx5_core software
* contexts.
*/
- mlx5_irq_clear_affinity_hints(dev);
mlx5_core_eq_free_irqs(dev);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 0594d0961cb3..fd3141a4b3f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -78,6 +78,11 @@ do { \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
+#define mlx5_core_warn_once(__dev, format, ...) \
+ dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
#define mlx5_core_info(__dev, format, ...) \
dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
@@ -97,12 +102,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
-
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
- unsigned long param);
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
- struct mlx5_pagefault *pfault);
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
void mlx5_disable_device(struct mlx5_core_dev *dev);
void mlx5_recover_device(struct mlx5_core_dev *dev);
@@ -124,28 +123,7 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
-/* This function should only be called after mlx5_cmd_force_teardown_hca */
-void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
-void mlx5_cq_tasklet_cb(unsigned long data);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
@@ -159,6 +137,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
+int mlx5_events_init(struct mlx5_core_dev *dev);
+void mlx5_events_cleanup(struct mlx5_core_dev *dev);
+void mlx5_events_start(struct mlx5_core_dev *dev);
+void mlx5_events_stop(struct mlx5_core_dev *dev);
+
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_attach_device(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index e36d3e3675f9..a83b517b0714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -37,6 +37,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
MLX5_PAGES_CANT_GIVE = 0,
@@ -433,15 +434,28 @@ static void pages_work_handler(struct work_struct *work)
kfree(req);
}
-void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
- s32 npages)
+static int req_pages_handler(struct notifier_block *nb,
+ unsigned long type, void *data)
{
struct mlx5_pages_req *req;
-
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ struct mlx5_eqe *eqe;
+ u16 func_id;
+ s32 npages;
+
+ priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+ eqe = data;
+
+ func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+ npages = be32_to_cpu(eqe->data.req_pages.num_pages);
+ mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+ func_id, npages);
req = kzalloc(sizeof(*req), GFP_ATOMIC);
if (!req) {
mlx5_core_warn(dev, "failed to allocate pages request\n");
- return;
+ return NOTIFY_DONE;
}
req->dev = dev;
@@ -449,6 +463,7 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
req->npages = npages;
INIT_WORK(&req->work, pages_work_handler);
queue_work(dev->priv.pg_wq, &req->work);
+ return NOTIFY_OK;
}
int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
@@ -524,29 +539,32 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
return 0;
}
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
{
dev->priv.page_root = RB_ROOT;
INIT_LIST_HEAD(&dev->priv.free_list);
+ dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+ if (!dev->priv.pg_wq)
+ return -ENOMEM;
+
+ return 0;
}
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
{
- /* nothing */
+ destroy_workqueue(dev->priv.pg_wq);
}
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev)
{
- dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
- if (!dev->priv.pg_wq)
- return -ENOMEM;
-
- return 0;
+ MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST);
+ mlx5_eq_notifier_register(dev, &dev->priv.pg_nb);
}
void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
{
- destroy_workqueue(dev->priv.pg_wq);
+ mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb);
+ flush_workqueue(dev->priv.pg_wq);
}
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 31a9cbd85689..2b82f35f4c35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -915,63 +915,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
*enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
}
-static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
- "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */
- "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */
- "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */
-};
-
-static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
- "Power budget exceeded",
- "Long Range for non MLNX cable",
- "Bus stuck(I2C or data shorted)",
- "No EEPROM/retry timeout",
- "Enforce part number list",
- "Unknown identifier",
- "High Temperature",
- "Bad or shorted cable/module",
- "Unknown status",
-};
-
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
-{
- enum port_module_event_status_type module_status;
- enum port_module_event_error_type error_type;
- struct mlx5_eqe_port_module *module_event_eqe;
- struct mlx5_priv *priv = &dev->priv;
- u8 module_num;
-
- module_event_eqe = &eqe->data.port_module;
- module_num = module_event_eqe->module;
- module_status = module_event_eqe->module_status &
- PORT_MODULE_EVENT_MODULE_STATUS_MASK;
- error_type = module_event_eqe->error_type &
- PORT_MODULE_EVENT_ERROR_TYPE_MASK;
-
- if (module_status < MLX5_MODULE_STATUS_ERROR) {
- priv->pme_stats.status_counters[module_status - 1]++;
- } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
- if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
- /* Unknown error type */
- error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
- priv->pme_stats.error_counters[error_type]++;
- }
-
- if (!printk_ratelimit())
- return;
-
- if (module_status < MLX5_MODULE_STATUS_ERROR)
- mlx5_core_info(dev,
- "Port module event: module %u, %s\n",
- module_num, mlx5_pme_status[module_status - 1]);
-
- else if (module_status == MLX5_MODULE_STATUS_ERROR)
- mlx5_core_info(dev,
- "Port module event[error]: module %u, %s, %s\n",
- module_num, mlx5_pme_status[module_status - 1],
- mlx5_pme_error[error_type]);
-}
-
int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
{
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 91b8139a388d..388f205a497f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -38,11 +38,11 @@
#include <linux/mlx5/transobj.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
-static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
- u32 rsn)
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
struct mlx5_core_rsc_common *common;
spin_lock(&table->lock);
@@ -53,11 +53,6 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
spin_unlock(&table->lock);
- if (!common) {
- mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
- rsn);
- return NULL;
- }
return common;
}
@@ -120,19 +115,57 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
}
}
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+static int rsc_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
{
- struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+ struct mlx5_core_rsc_common *common;
+ struct mlx5_qp_table *table;
+ struct mlx5_core_dev *dev;
struct mlx5_core_dct *dct;
+ u8 event_type = (u8)type;
struct mlx5_core_qp *qp;
+ struct mlx5_priv *priv;
+ struct mlx5_eqe *eqe;
+ u32 rsn;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ eqe = data;
+ rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ eqe = data;
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ table = container_of(nb, struct mlx5_qp_table, nb);
+ priv = container_of(table, struct mlx5_priv, qp_table);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn);
- if (!common)
- return;
+ common = mlx5_get_rsc(table, rsn);
+ if (!common) {
+ mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn);
+ return NOTIFY_OK;
+ }
if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
event_type, rsn);
- return;
+ goto out;
}
switch (common->res) {
@@ -150,8 +183,10 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
default:
mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
}
-
+out:
mlx5_core_put_rsc(common);
+
+ return NOTIFY_OK;
}
static int create_resource_common(struct mlx5_core_dev *dev,
@@ -487,10 +522,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev)
spin_lock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
mlx5_qp_debugfs_init(dev);
+
+ table->nb.notifier_call = rsc_event_notifier;
+ mlx5_notifier_register(dev, &table->nb);
}
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ mlx5_notifier_unregister(dev, &table->nb);
mlx5_qp_debugfs_cleanup(dev);
}
@@ -670,3 +711,20 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type)
+{
+ u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ return mlx5_get_rsc(table, rsn);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+ mlx5_core_put_rsc(res);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_put);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index a1ee9a8a769e..c4d4b76096dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -258,115 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
}
EXPORT_SYMBOL(mlx5_core_destroy_tis);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn)
-{
- u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
- int err;
-
- MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- if (!err)
- *rmpn = MLX5_GET(create_rmp_out, out, rmpn);
-
- return err;
-}
-
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
-{
- u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
-
- MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
- return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-}
-
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
-
- MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
- MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out,
- sizeof(out));
-}
-
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
-{
- u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
- int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
-
- MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
- MLX5_SET(query_rmp_in, in, rmpn, rmpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
-{
- void *in;
- void *rmpc;
- void *wq;
- void *bitmask;
- int err;
-
- in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
- bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(modify_rmp_in, in, rmpn, rmpn);
- MLX5_SET(wq, wq, lwm, lwm);
- MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-
- err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
- kvfree(in);
-
- return err;
-}
-
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *xsrqn)
-{
- u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
- int err;
-
- MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- if (!err)
- *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
-
- return err;
-}
-
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
- MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ);
- MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
-{
- u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
- MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
- MLX5_SET(arm_xrc_srq_in, in, lwm, lwm);
- MLX5_SET(arm_xrc_srq_in, in, op_mod,
- MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rqtn)
{
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 31a750570c38..28b757a64029 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -60,7 +60,7 @@ struct mlx5_core_cq {
} tasklet_ctx;
int reset_notify_added;
struct list_head reset_notify;
- struct mlx5_eq *eq;
+ struct mlx5_eq_comp *eq;
u16 uid;
};
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index b4c0457fbebd..f7c8bebfe472 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -212,6 +212,13 @@ enum {
MLX5_PFAULT_SUBTYPE_RDMA = 1,
};
+enum wqe_page_fault_type {
+ MLX5_WQE_PF_TYPE_RMP = 0,
+ MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE = 1,
+ MLX5_WQE_PF_TYPE_RESP = 2,
+ MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC = 3,
+};
+
enum {
MLX5_PERM_LOCAL_READ = 1 << 2,
MLX5_PERM_LOCAL_WRITE = 1 << 3,
@@ -294,9 +301,15 @@ enum {
MLX5_EVENT_QUEUE_TYPE_DCT = 6,
};
+/* mlx5 components can subscribe to any one of these events via
+ * mlx5_eq_notifier_register API.
+ */
enum mlx5_event {
+ /* Special value to subscribe to any event */
+ MLX5_EVENT_TYPE_NOTIFY_ANY = 0x0,
+ /* HW events enum start: comp events are not subscribable */
MLX5_EVENT_TYPE_COMP = 0x0,
-
+ /* HW Async events enum start: subscribable events */
MLX5_EVENT_TYPE_PATH_MIG = 0x01,
MLX5_EVENT_TYPE_COMM_EST = 0x02,
MLX5_EVENT_TYPE_SQ_DRAINED = 0x03,
@@ -334,6 +347,8 @@ enum mlx5_event {
MLX5_EVENT_TYPE_FPGA_QP_ERROR = 0x21,
MLX5_EVENT_TYPE_DEVICE_TRACER = 0x26,
+
+ MLX5_EVENT_TYPE_MAX = MLX5_EVENT_TYPE_DEVICE_TRACER + 1,
};
enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index aa5963b5d38e..b090a96f87df 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -46,10 +46,11 @@
#include <linux/mempool.h>
#include <linux/interrupt.h>
#include <linux/idr.h>
+#include <linux/notifier.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/doorbell.h>
-#include <linux/mlx5/srq.h>
+#include <linux/mlx5/eq.h>
#include <linux/timecounter.h>
#include <linux/ptp_clock_kernel.h>
@@ -85,18 +86,6 @@ enum {
};
enum {
- MLX5_EQ_VEC_PAGES = 0,
- MLX5_EQ_VEC_CMD = 1,
- MLX5_EQ_VEC_ASYNC = 2,
- MLX5_EQ_VEC_PFAULT = 3,
- MLX5_EQ_VEC_COMP_BASE,
-};
-
-enum {
- MLX5_MAX_IRQ_NAME = 32
-};
-
-enum {
MLX5_ATOMIC_MODE_OFFSET = 16,
MLX5_ATOMIC_MODE_IB_COMP = 1,
MLX5_ATOMIC_MODE_CX = 2,
@@ -205,16 +194,7 @@ struct mlx5_rsc_debug {
};
enum mlx5_dev_event {
- MLX5_DEV_EVENT_SYS_ERROR,
- MLX5_DEV_EVENT_PORT_UP,
- MLX5_DEV_EVENT_PORT_DOWN,
- MLX5_DEV_EVENT_PORT_INITIALIZED,
- MLX5_DEV_EVENT_LID_CHANGE,
- MLX5_DEV_EVENT_PKEY_CHANGE,
- MLX5_DEV_EVENT_GUID_CHANGE,
- MLX5_DEV_EVENT_CLIENT_REREG,
- MLX5_DEV_EVENT_PPS,
- MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT,
+ MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */
};
enum mlx5_port_status {
@@ -222,14 +202,6 @@ enum mlx5_port_status {
MLX5_PORT_DOWN = 2,
};
-enum mlx5_eq_type {
- MLX5_EQ_TYPE_COMP,
- MLX5_EQ_TYPE_ASYNC,
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- MLX5_EQ_TYPE_PF,
-#endif
-};
-
struct mlx5_bfreg_info {
u32 *sys_pages;
int num_low_latency_bfregs;
@@ -297,6 +269,8 @@ struct mlx5_cmd_stats {
};
struct mlx5_cmd {
+ struct mlx5_nb nb;
+
void *cmd_alloc_buf;
dma_addr_t alloc_dma;
int alloc_size;
@@ -366,51 +340,6 @@ struct mlx5_frag_buf_ctrl {
u8 log_frag_strides;
};
-struct mlx5_eq_tasklet {
- struct list_head list;
- struct list_head process_list;
- struct tasklet_struct task;
- /* lock on completion tasklet list */
- spinlock_t lock;
-};
-
-struct mlx5_eq_pagefault {
- struct work_struct work;
- /* Pagefaults lock */
- spinlock_t lock;
- struct workqueue_struct *wq;
- mempool_t *pool;
-};
-
-struct mlx5_cq_table {
- /* protect radix tree */
- spinlock_t lock;
- struct radix_tree_root tree;
-};
-
-struct mlx5_eq {
- struct mlx5_core_dev *dev;
- struct mlx5_cq_table cq_table;
- __be32 __iomem *doorbell;
- u32 cons_index;
- struct mlx5_frag_buf buf;
- int size;
- unsigned int irqn;
- u8 eqn;
- int nent;
- u64 mask;
- struct list_head list;
- int index;
- struct mlx5_rsc_debug *dbg;
- enum mlx5_eq_type type;
- union {
- struct mlx5_eq_tasklet tasklet_ctx;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct mlx5_eq_pagefault pf_ctx;
-#endif
- };
-};
-
struct mlx5_core_psv {
u32 psv_idx;
struct psv_layout {
@@ -463,36 +392,6 @@ struct mlx5_core_rsc_common {
struct completion free;
};
-struct mlx5_core_srq {
- struct mlx5_core_rsc_common common; /* must be first */
- u32 srqn;
- int max;
- size_t max_gs;
- size_t max_avail_gather;
- int wqe_shift;
- void (*event) (struct mlx5_core_srq *, enum mlx5_event);
-
- atomic_t refcount;
- struct completion free;
- u16 uid;
-};
-
-struct mlx5_eq_table {
- void __iomem *update_ci;
- void __iomem *update_arm_ci;
- struct list_head comp_eqs_list;
- struct mlx5_eq pages_eq;
- struct mlx5_eq async_eq;
- struct mlx5_eq cmd_eq;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct mlx5_eq pfault_eq;
-#endif
- int num_comp_vectors;
- /* protect EQs list
- */
- spinlock_t lock;
-};
-
struct mlx5_uars_page {
void __iomem *map;
bool wc;
@@ -542,13 +441,8 @@ struct mlx5_core_health {
};
struct mlx5_qp_table {
- /* protect radix tree
- */
- spinlock_t lock;
- struct radix_tree_root tree;
-};
+ struct notifier_block nb;
-struct mlx5_srq_table {
/* protect radix tree
*/
spinlock_t lock;
@@ -575,11 +469,6 @@ struct mlx5_core_sriov {
int enabled_vfs;
};
-struct mlx5_irq_info {
- cpumask_var_t mask;
- char name[MLX5_MAX_IRQ_NAME];
-};
-
struct mlx5_fc_stats {
spinlock_t counters_idr_lock; /* protects counters_idr */
struct idr counters_idr;
@@ -593,10 +482,11 @@ struct mlx5_fc_stats {
unsigned long sampling_interval; /* jiffies */
};
+struct mlx5_events;
struct mlx5_mpfs;
struct mlx5_eswitch;
struct mlx5_lag;
-struct mlx5_pagefault;
+struct mlx5_eq_table;
struct mlx5_rate_limit {
u32 rate;
@@ -619,37 +509,12 @@ struct mlx5_rl_table {
struct mlx5_rl_entry *rl_entry;
};
-enum port_module_event_status_type {
- MLX5_MODULE_STATUS_PLUGGED = 0x1,
- MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
- MLX5_MODULE_STATUS_ERROR = 0x3,
- MLX5_MODULE_STATUS_NUM = 0x3,
-};
-
-enum port_module_event_error_type {
- MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED,
- MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE,
- MLX5_MODULE_EVENT_ERROR_BUS_STUCK,
- MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT,
- MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST,
- MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER,
- MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE,
- MLX5_MODULE_EVENT_ERROR_BAD_CABLE,
- MLX5_MODULE_EVENT_ERROR_UNKNOWN,
- MLX5_MODULE_EVENT_ERROR_NUM,
-};
-
-struct mlx5_port_module_event_stats {
- u64 status_counters[MLX5_MODULE_STATUS_NUM];
- u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
-};
-
struct mlx5_priv {
char name[MLX5_MAX_NAME_LEN];
- struct mlx5_eq_table eq_table;
- struct mlx5_irq_info *irq_info;
+ struct mlx5_eq_table *eq_table;
/* pages stuff */
+ struct mlx5_nb pg_nb;
struct workqueue_struct *pg_wq;
struct rb_root page_root;
int fw_pages;
@@ -659,8 +524,6 @@ struct mlx5_priv {
struct mlx5_core_health health;
- struct mlx5_srq_table srq_table;
-
/* start: qp staff */
struct mlx5_qp_table qp_table;
struct dentry *qp_debugfs;
@@ -690,9 +553,7 @@ struct mlx5_priv {
struct list_head dev_list;
struct list_head ctx_list;
spinlock_t ctx_lock;
-
- struct list_head waiting_events_list;
- bool is_accum_events;
+ struct mlx5_events *events;
struct mlx5_flow_steering *steering;
struct mlx5_mpfs *mpfs;
@@ -703,15 +564,6 @@ struct mlx5_priv {
struct mlx5_fc_stats fc_stats;
struct mlx5_rl_table rl_table;
- struct mlx5_port_module_event_stats pme_stats;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- void (*pfault)(struct mlx5_core_dev *dev,
- void *context,
- struct mlx5_pagefault *pfault);
- void *pfault_ctx;
- struct srcu_struct pfault_srcu;
-#endif
struct mlx5_bfreg_data bfregs;
struct mlx5_uars_page *uar;
};
@@ -736,44 +588,6 @@ enum mlx5_pagefault_type_flags {
MLX5_PFAULT_RDMA = 1 << 2,
};
-/* Contains the details of a pagefault. */
-struct mlx5_pagefault {
- u32 bytes_committed;
- u32 token;
- u8 event_subtype;
- u8 type;
- union {
- /* Initiator or send message responder pagefault details. */
- struct {
- /* Received packet size, only valid for responders. */
- u32 packet_size;
- /*
- * Number of resource holding WQE, depends on type.
- */
- u32 wq_num;
- /*
- * WQE index. Refers to either the send queue or
- * receive queue, according to event_subtype.
- */
- u16 wqe_index;
- } wqe;
- /* RDMA responder pagefault details */
- struct {
- u32 r_key;
- /*
- * Received packet size, minimal size page fault
- * resolution required for forward progress.
- */
- u32 packet_size;
- u32 rdma_op_len;
- u64 rdma_va;
- } rdma;
- };
-
- struct mlx5_eq *eq;
- struct work_struct work;
-};
-
struct mlx5_td {
struct list_head tirs_list;
u32 tdn;
@@ -803,6 +617,8 @@ struct mlx5_pps {
};
struct mlx5_clock {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_nb pps_nb;
seqlock_t lock;
struct cyclecounter cycles;
struct timecounter tc;
@@ -810,7 +626,6 @@ struct mlx5_clock {
u32 nominal_c_mult;
unsigned long overflow_period;
struct delayed_work overflow_work;
- struct mlx5_core_dev *mdev;
struct ptp_clock *ptp;
struct ptp_clock_info ptp_info;
struct mlx5_pps pps_info;
@@ -843,9 +658,6 @@ struct mlx5_core_dev {
/* sync interface state */
struct mutex intf_state_mutex;
unsigned long intf_state;
- void (*event) (struct mlx5_core_dev *dev,
- enum mlx5_dev_event event,
- unsigned long param);
struct mlx5_priv priv;
struct mlx5_profile *profile;
atomic_t num_qps;
@@ -859,9 +671,6 @@ struct mlx5_core_dev {
#ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;
#endif
-#ifdef CONFIG_RFS_ACCEL
- struct cpu_rmap *rmap;
-#endif
struct mlx5_clock clock;
struct mlx5_ib_clock_info *clock_info;
struct page *clock_info_page;
@@ -940,8 +749,8 @@ struct mlx5_hca_vport_context {
u64 node_guid;
u32 cap_mask1;
u32 cap_mask1_perm;
- u32 cap_mask2;
- u32 cap_mask2_perm;
+ u16 cap_mask2;
+ u16 cap_mask2_perm;
u16 lid;
u8 init_type_reply; /* bitmask: see ib spec 14.2.5.6 InitTypeReply */
u8 lmc;
@@ -1070,13 +879,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
gfp_t flags, int npages);
void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
struct mlx5_cmd_mailbox *head);
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in);
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq);
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out);
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- u16 lwm, int is_srq);
void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
@@ -1095,9 +897,9 @@ int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port);
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev);
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev);
void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
s32 npages);
@@ -1108,9 +910,6 @@ void mlx5_unregister_debugfs(void);
void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
unsigned int *irqn);
int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1155,6 +954,9 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
bool map_wc, bool fast_path);
void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev);
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector);
unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
u8 roce_version, u8 roce_l3_type, const u8 *gid,
@@ -1202,11 +1004,6 @@ struct mlx5_interface {
void (*remove)(struct mlx5_core_dev *dev, void *context);
int (*attach)(struct mlx5_core_dev *dev, void *context);
void (*detach)(struct mlx5_core_dev *dev, void *context);
- void (*event)(struct mlx5_core_dev *dev, void *context,
- enum mlx5_dev_event event, unsigned long param);
- void (*pfault)(struct mlx5_core_dev *dev,
- void *context,
- struct mlx5_pagefault *pfault);
void * (*get_dev)(void *context);
int protocol;
struct list_head list;
@@ -1215,6 +1012,9 @@ struct mlx5_interface {
void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol);
int mlx5_register_interface(struct mlx5_interface *intf);
void mlx5_unregister_interface(struct mlx5_interface *intf);
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb);
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb);
+
int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
@@ -1306,10 +1106,4 @@ enum {
MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
};
-static inline const struct cpumask *
-mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector)
-{
- return dev->priv.irq_info[vector].mask;
-}
-
#endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
new file mode 100644
index 000000000000..00045cc4ea11
--- /dev/null
+++ b/include/linux/mlx5/eq.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef MLX5_CORE_EQ_H
+#define MLX5_CORE_EQ_H
+
+enum {
+ MLX5_EQ_PAGEREQ_IDX = 0,
+ MLX5_EQ_CMD_IDX = 1,
+ MLX5_EQ_ASYNC_IDX = 2,
+ /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */
+ MLX5_EQ_PFAULT_IDX = 3,
+ MLX5_EQ_MAX_ASYNC_EQS,
+ /* completion eqs vector indices start here */
+ MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS,
+};
+
+#define MLX5_NUM_CMD_EQE (32)
+#define MLX5_NUM_ASYNC_EQE (0x1000)
+#define MLX5_NUM_SPARE_EQE (0x80)
+
+struct mlx5_eq;
+struct mlx5_core_dev;
+
+struct mlx5_eq_param {
+ u8 index;
+ int nent;
+ u64 mask;
+ void *context;
+ irq_handler_t handler;
+};
+
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+ struct mlx5_eq_param *param);
+int
+mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc);
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm);
+
+/* The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create EQs with MLX5_NUM_SPARE_EQE extra entries,
+ * so we must update our consumer index at
+ * least that often.
+ *
+ * mlx5_eq_update_cc must be called on every EQE @EQ irq handler
+ */
+static inline u32 mlx5_eq_update_cc(struct mlx5_eq *eq, u32 cc)
+{
+ if (unlikely(cc >= MLX5_NUM_SPARE_EQE)) {
+ mlx5_eq_update_ci(eq, cc, 0);
+ cc = 0;
+ }
+ return cc;
+}
+
+struct mlx5_nb {
+ struct notifier_block nb;
+ u8 event_type;
+};
+
+#define mlx5_nb_cof(ptr, type, member) \
+ (container_of(container_of(ptr, struct mlx5_nb, nb), type, member))
+
+#define MLX5_NB_INIT(name, handler, event) do { \
+ (name)->nb.notifier_call = handler; \
+ (name)->event_type = MLX5_EVENT_TYPE_##event; \
+} while (0)
+
+#endif /* MLX5_CORE_EQ_H */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 34e17e6f8942..52393fbcf3b4 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -144,6 +144,9 @@ enum {
MLX5_CMD_OP_DESTROY_XRQ = 0x718,
MLX5_CMD_OP_QUERY_XRQ = 0x719,
MLX5_CMD_OP_ARM_XRQ = 0x71a,
+ MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725,
+ MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726,
+ MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727,
MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750,
MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751,
MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
@@ -245,6 +248,7 @@ enum {
MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c,
MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d,
MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e,
+ MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT = 0x93f,
MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942,
@@ -260,6 +264,12 @@ enum {
MLX5_CMD_OP_MAX
};
+/* Valid range for general commands that don't work over an object */
+enum {
+ MLX5_CMD_OP_GENERAL_START = 0xb00,
+ MLX5_CMD_OP_GENERAL_END = 0xd00,
+};
+
struct mlx5_ifc_flow_table_fields_supported_bits {
u8 outer_dmac[0x1];
u8 outer_smac[0x1];
@@ -349,7 +359,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 reformat_l3_tunnel_to_l2[0x1];
u8 reformat_l2_to_l3_tunnel[0x1];
u8 reformat_and_modify_action[0x1];
- u8 reserved_at_14[0xb];
+ u8 reserved_at_15[0xb];
u8 reserved_at_20[0x2];
u8 log_max_ft_size[0x6];
u8 log_max_modify_header_context[0x8];
@@ -586,7 +596,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
u8 fdb_multi_path_to_table[0x1];
u8 reserved_at_1d[0x1];
u8 multi_fdb_encap[0x1];
- u8 reserved_at_1e[0x1e1];
+ u8 reserved_at_1f[0x1e1];
struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
@@ -829,7 +839,7 @@ struct mlx5_ifc_vector_calc_cap_bits {
struct mlx5_ifc_calc_op calc2;
struct mlx5_ifc_calc_op calc3;
- u8 reserved_at_e0[0x720];
+ u8 reserved_at_c0[0x720];
};
enum {
@@ -883,6 +893,10 @@ enum {
MLX5_CAP_UMR_FENCE_NONE = 0x2,
};
+enum {
+ MLX5_UCTX_CAP_RAW_TX = 1UL << 0,
+};
+
struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_0[0x30];
u8 vhca_id[0x10];
@@ -1043,7 +1057,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 vector_calc[0x1];
u8 umr_ptr_rlky[0x1];
u8 imaicl[0x1];
- u8 reserved_at_232[0x4];
+ u8 qp_packet_based[0x1];
+ u8 reserved_at_233[0x3];
u8 qkv[0x1];
u8 pkv[0x1];
u8 set_deth_sqpn[0x1];
@@ -1193,7 +1208,13 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 num_vhca_ports[0x8];
u8 reserved_at_618[0x6];
u8 sw_owner_id[0x1];
- u8 reserved_at_61f[0x1e1];
+ u8 reserved_at_61f[0x1];
+
+ u8 reserved_at_620[0x80];
+
+ u8 uctx_cap[0x20];
+
+ u8 reserved_at_6c0[0x140];
};
enum mlx5_flow_destination_type {
@@ -2249,7 +2270,8 @@ struct mlx5_ifc_qpc_bits {
u8 st[0x8];
u8 reserved_at_10[0x3];
u8 pm_state[0x2];
- u8 reserved_at_15[0x3];
+ u8 reserved_at_15[0x1];
+ u8 req_e2e_credit_mode[0x2];
u8 offload_type[0x4];
u8 end_padding_mode[0x2];
u8 reserved_at_1e[0x2];
@@ -5567,7 +5589,7 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits {
struct mlx5_ifc_modify_nic_vport_field_select_bits {
u8 reserved_at_0[0x12];
u8 affiliation[0x1];
- u8 reserved_at_e[0x1];
+ u8 reserved_at_13[0x1];
u8 disable_uc_local_lb[0x1];
u8 disable_mc_local_lb[0x1];
u8 node_guid[0x1];
@@ -9028,7 +9050,7 @@ struct mlx5_ifc_dcbx_param_bits {
u8 dcbx_cee_cap[0x1];
u8 dcbx_ieee_cap[0x1];
u8 dcbx_standby_cap[0x1];
- u8 reserved_at_0[0x5];
+ u8 reserved_at_3[0x5];
u8 port_number[0x8];
u8 reserved_at_10[0xa];
u8 max_application_table_size[6];
@@ -9276,7 +9298,9 @@ struct mlx5_ifc_umem_bits {
struct mlx5_ifc_uctx_bits {
u8 modify_field_select[0x40];
- u8 reserved_at_40[0x1c0];
+ u8 cap[0x20];
+
+ u8 reserved_at_60[0x1a0];
};
struct mlx5_ifc_create_umem_in_bits {
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 34aed6032f86..bf4bc01ffb0c 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -107,9 +107,6 @@ enum mlx5e_connector_type {
#define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
-#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
-#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF
-
int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
int ptys_size, int proto_mask, u8 local_port);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index fbe322c966bc..b26ea9077384 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -596,6 +596,11 @@ int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id);
int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
int reset, void *out, int out_size);
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type);
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
+
static inline const char *mlx5_qp_type_str(int type)
{
switch (type) {
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
deleted file mode 100644
index 1b1f3c20c6a3..000000000000
--- a/include/linux/mlx5/srq.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef MLX5_SRQ_H
-#define MLX5_SRQ_H
-
-#include <linux/mlx5/driver.h>
-
-enum {
- MLX5_SRQ_FLAG_ERR = (1 << 0),
- MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
- MLX5_SRQ_FLAG_RNDV = (1 << 2),
-};
-
-struct mlx5_srq_attr {
- u32 type;
- u32 flags;
- u32 log_size;
- u32 wqe_shift;
- u32 log_page_size;
- u32 wqe_cnt;
- u32 srqn;
- u32 xrcd;
- u32 page_offset;
- u32 cqn;
- u32 pd;
- u32 lwm;
- u32 user_index;
- u64 db_record;
- __be64 *pas;
- u32 tm_log_list_size;
- u32 tm_next_tag;
- u32 tm_hw_phase_cnt;
- u32 tm_sw_phase_cnt;
- u16 uid;
-};
-
-struct mlx5_core_dev;
-
-void mlx5_init_srq_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev);
-
-#endif /* MLX5_SRQ_H */
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
index 7f5ca2cd3a32..a261d5528ff7 100644
--- a/include/linux/mlx5/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -58,17 +58,6 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
int inlen);
void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn);
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn);
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-
int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rqtn);
int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h
index f62b842e6596..f8982e4e9702 100644
--- a/include/rdma/ib_fmr_pool.h
+++ b/include/rdma/ib_fmr_pool.h
@@ -88,6 +88,6 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
int list_len,
u64 io_virtual_address);
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
+void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
#endif /* IB_FMR_POOL_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index f6ba366051c7..fdef558e3a2d 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -277,6 +277,7 @@ enum ib_port_capability_mask_bits {
IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+ IB_PORT_CAP_MASK2_SUP = 1 << 15,
IB_PORT_CM_SUP = 1 << 16,
IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
IB_PORT_REINIT_SUP = 1 << 18,
@@ -295,6 +296,15 @@ enum ib_port_capability_mask_bits {
IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
};
+enum ib_port_capability_mask2_bits {
+ IB_PORT_SET_NODE_DESC_SUP = 1 << 0,
+ IB_PORT_EX_PORT_INFO_EX_SUP = 1 << 1,
+ IB_PORT_VIRT_SUP = 1 << 2,
+ IB_PORT_SWITCH_PORT_STATE_TABLE_SUP = 1 << 3,
+ IB_PORT_LINK_WIDTH_2X_SUP = 1 << 4,
+ IB_PORT_LINK_SPEED_HDR_SUP = 1 << 5,
+};
+
#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
struct opa_class_port_info {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9c0c2132a2d6..89eead636e68 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -41,14 +41,11 @@
#include <linux/types.h>
#include <linux/device.h>
-#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/rwsem.h>
-#include <linux/scatterlist.h>
#include <linux/workqueue.h>
-#include <linux/socket.h>
#include <linux/irq_poll.h>
#include <uapi/linux/if_ether.h>
#include <net/ipv6.h>
@@ -56,7 +53,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
-
+#include <linux/refcount.h>
#include <linux/if_link.h>
#include <linux/atomic.h>
#include <linux/mmu_notifier.h>
@@ -437,6 +434,7 @@ enum ib_port_state {
enum ib_port_width {
IB_WIDTH_1X = 1,
+ IB_WIDTH_2X = 16,
IB_WIDTH_4X = 2,
IB_WIDTH_8X = 4,
IB_WIDTH_12X = 8
@@ -446,6 +444,7 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
{
switch (width) {
case IB_WIDTH_1X: return 1;
+ case IB_WIDTH_2X: return 2;
case IB_WIDTH_4X: return 4;
case IB_WIDTH_8X: return 8;
case IB_WIDTH_12X: return 12;
@@ -595,6 +594,7 @@ struct ib_port_attr {
u8 active_width;
u8 active_speed;
u8 phys_state;
+ u16 port_cap_flags2;
};
enum ib_device_modify_flags {
@@ -732,7 +732,11 @@ enum ib_rate {
IB_RATE_25_GBPS = 15,
IB_RATE_100_GBPS = 16,
IB_RATE_200_GBPS = 17,
- IB_RATE_300_GBPS = 18
+ IB_RATE_300_GBPS = 18,
+ IB_RATE_28_GBPS = 19,
+ IB_RATE_50_GBPS = 20,
+ IB_RATE_400_GBPS = 21,
+ IB_RATE_600_GBPS = 22,
};
/**
@@ -1508,6 +1512,10 @@ struct ib_ucontext {
#endif
struct ib_rdmacg_object cg_obj;
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
struct ib_uobject {
@@ -2301,7 +2309,7 @@ struct ib_device {
* index - Updated the single counter pointed to by index
* num_counters - Updated all counters (will reset the timestamp
* and prevent further calls for lifespan milliseconds)
- * Drivers are allowed to update all counters in leiu of just the
+ * Drivers are allowed to update all counters in lieu of just the
* one given in index at their option
*/
int (*get_hw_stats)(struct ib_device *device,
@@ -2603,8 +2611,14 @@ struct ib_device {
const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
int comp_vector);
- const struct uverbs_object_tree_def *const *driver_specs;
+ const struct uapi_definition *driver_def;
enum rdma_driver_id driver_id;
+ /*
+ * Provides synchronization between device unregistration and netlink
+ * commands on a device. To be used only by core.
+ */
+ refcount_t refcount;
+ struct completion unreg_completion;
};
struct ib_client {
@@ -4204,10 +4218,10 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
*/
void rdma_roce_rescan_device(struct ib_device *ibdev);
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
+
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs);
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 3584d0816fcd..dd0ed8048bb4 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -269,6 +269,13 @@ struct rvt_driver_provided {
void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
/*
+ * Init a struture allocated with qp_priv_alloc(). This should be
+ * called after all qp fields have been initialized in rdmavt.
+ */
+ int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr);
+
+ /*
* Free the driver's private qp structure.
*/
void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 2638fa7cd702..f34aa96e4518 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -39,6 +39,10 @@ enum rdma_restrack_type {
*/
RDMA_RESTRACK_MR,
/**
+ * @RDMA_RESTRACK_CTX: Verbs contexts (CTX)
+ */
+ RDMA_RESTRACK_CTX,
+ /**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
RDMA_RESTRACK_MAX
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 84d3d15f1f38..2f56844fb7da 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -79,6 +79,8 @@ struct uverbs_attr_spec {
*/
u8 alloc_and_copy:1;
u8 mandatory:1;
+ /* True if this is from UVERBS_ATTR_UHW */
+ u8 is_udata:1;
union {
struct {
@@ -140,6 +142,13 @@ struct uverbs_attr_spec {
*
* The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
* the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
+ *
+ * This also encodes the tables for the write() and write() extended commands
+ * using the coding
+ * OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command #
+ * OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex #
+ * ie the WRITE path is treated as a special method type in the ioctl
+ * framework.
*/
enum uapi_radix_data {
UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
@@ -147,12 +156,16 @@ enum uapi_radix_data {
UVERBS_API_ATTR_KEY_BITS = 6,
UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
+ UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS,
UVERBS_API_METHOD_KEY_BITS = 5,
UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
- UVERBS_API_METHOD_KEY_NUM_CORE = 24,
- UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) -
- UVERBS_API_METHOD_KEY_NUM_CORE,
+ UVERBS_API_METHOD_KEY_NUM_CORE = 22,
+ UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT,
+ UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT,
+ UVERBS_API_METHOD_KEY_NUM_DRIVER =
+ (UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) -
+ UVERBS_API_METHOD_KEY_NUM_CORE,
UVERBS_API_METHOD_KEY_MASK = GENMASK(
UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
UVERBS_API_METHOD_KEY_SHIFT),
@@ -205,7 +218,22 @@ static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id)
return id << UVERBS_API_METHOD_KEY_SHIFT;
}
-static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
+static inline __attribute_const__ u32 uapi_key_write_method(u32 id)
+{
+ if (id >= UVERBS_API_WRITE_KEY_NUM)
+ return UVERBS_API_KEY_ERR;
+ return UVERBS_API_METHOD_IS_WRITE | id;
+}
+
+static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id)
+{
+ if (id >= UVERBS_API_WRITE_KEY_NUM)
+ return UVERBS_API_KEY_ERR;
+ return UVERBS_API_METHOD_IS_WRITE_EX | id;
+}
+
+static inline __attribute_const__ u32
+uapi_key_attr_to_ioctl_method(u32 attr_key)
{
return attr_key &
(UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
@@ -213,10 +241,23 @@ static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
{
- return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+ unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+ return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
(key & UVERBS_API_ATTR_KEY_MASK) == 0;
}
+static inline __attribute_const__ bool uapi_key_is_write_method(u32 key)
+{
+ return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE;
+}
+
+static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key)
+{
+ return (key & UVERBS_API_METHOD_KEY_MASK) ==
+ UVERBS_API_METHOD_IS_WRITE_EX;
+}
+
static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
{
/* 0 is the method slot itself */
@@ -246,9 +287,12 @@ static inline __attribute_const__ u32 uapi_key_attr(u32 id)
return id;
}
+/* Only true for ioctl methods */
static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
{
- return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+ unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+ return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
(key & UVERBS_API_ATTR_KEY_MASK) != 0;
}
@@ -285,8 +329,7 @@ struct uverbs_method_def {
u32 flags;
size_t num_attrs;
const struct uverbs_attr_def * const (*attrs)[];
- int (*handler)(struct ib_uverbs_file *ufile,
- struct uverbs_attr_bundle *ctx);
+ int (*handler)(struct uverbs_attr_bundle *attrs);
};
struct uverbs_object_def {
@@ -296,11 +339,131 @@ struct uverbs_object_def {
const struct uverbs_method_def * const (*methods)[];
};
-struct uverbs_object_tree_def {
- size_t num_objects;
- const struct uverbs_object_def * const (*objects)[];
+enum uapi_definition_kind {
+ UAPI_DEF_END = 0,
+ UAPI_DEF_OBJECT_START,
+ UAPI_DEF_WRITE,
+ UAPI_DEF_CHAIN_OBJ_TREE,
+ UAPI_DEF_CHAIN,
+ UAPI_DEF_IS_SUPPORTED_FUNC,
+ UAPI_DEF_IS_SUPPORTED_DEV_FN,
+};
+
+enum uapi_definition_scope {
+ UAPI_SCOPE_OBJECT = 1,
+ UAPI_SCOPE_METHOD = 2,
};
+struct uapi_definition {
+ u8 kind;
+ u8 scope;
+ union {
+ struct {
+ u16 object_id;
+ } object_start;
+ struct {
+ u16 command_num;
+ u8 is_ex:1;
+ u8 has_udata:1;
+ u8 has_resp:1;
+ u8 req_size;
+ u8 resp_size;
+ } write;
+ };
+
+ union {
+ bool (*func_is_supported)(struct ib_device *device);
+ int (*func_write)(struct uverbs_attr_bundle *attrs);
+ const struct uapi_definition *chain;
+ const struct uverbs_object_def *chain_obj_tree;
+ size_t needs_fn_offset;
+ };
+};
+
+/* Define things connected to object_id */
+#define DECLARE_UVERBS_OBJECT(_object_id, ...) \
+ { \
+ .kind = UAPI_DEF_OBJECT_START, \
+ .object_start = { .object_id = _object_id }, \
+ }, \
+ ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...) \
+ { \
+ .kind = UAPI_DEF_WRITE, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .write = { .is_ex = 0, .command_num = _command_num }, \
+ .func_write = _func, \
+ _cmd_desc, \
+ }, \
+ ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...) \
+ { \
+ .kind = UAPI_DEF_WRITE, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .write = { .is_ex = 1, .command_num = _command_num }, \
+ .func_write = _func, \
+ _cmd_desc, \
+ }, \
+ ##__VA_ARGS__
+
+/*
+ * Object is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .needs_fn_offset = \
+ offsetof(struct ib_device, ibdev_fn) + \
+ BUILD_BUG_ON_ZERO( \
+ sizeof(((struct ib_device *)0)->ibdev_fn) != \
+ sizeof(void *)), \
+ }
+
+/*
+ * Method is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \
+ .scope = UAPI_SCOPE_METHOD, \
+ .needs_fn_offset = \
+ offsetof(struct ib_device, ibdev_fn) + \
+ BUILD_BUG_ON_ZERO( \
+ sizeof(((struct ib_device *)0)->ibdev_fn) != \
+ sizeof(void *)), \
+ }
+
+/* Call a function to determine if the entire object is supported or not */
+#define UAPI_DEF_IS_OBJ_SUPPORTED(_func) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_FUNC, \
+ .scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func, \
+ }
+
+/* Include another struct uapi_definition in this one */
+#define UAPI_DEF_CHAIN(_def_var) \
+ { \
+ .kind = UAPI_DEF_CHAIN, .chain = _def_var, \
+ }
+
+/* Temporary until the tree base description is replaced */
+#define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr) \
+ { \
+ .kind = UAPI_DEF_CHAIN_OBJ_TREE, \
+ .object_start = { .object_id = _object_enum }, \
+ .chain_obj_tree = _object_ptr, \
+ }
+#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \
+ UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum)), \
+ ##__VA_ARGS__
+
/*
* =======================================
* Attribute Specifications
@@ -361,6 +524,12 @@ struct uverbs_object_tree_def {
.u2.objs_arr.max_len = _max_len, \
__VA_ARGS__ } })
+/*
+ * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted,
+ * the user must validate the type of the uobject instead.
+ */
+#define UVERBS_IDR_ANY_OBJECT 0xFFFF
+
#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \
(&(const struct uverbs_attr_def){ \
.id = _attr_id, \
@@ -433,25 +602,12 @@ struct uverbs_object_tree_def {
#define UVERBS_ATTR_UHW() \
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \
UVERBS_ATTR_MIN_SIZE(0), \
- UA_OPTIONAL), \
+ UA_OPTIONAL, \
+ .is_udata = 1), \
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \
UVERBS_ATTR_MIN_SIZE(0), \
- UA_OPTIONAL)
-
-/*
- * =======================================
- * Declaration helpers
- * =======================================
- */
-
-#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \
- static const struct uverbs_object_def *const _name##_ptr[] = { \
- __VA_ARGS__, \
- }; \
- static const struct uverbs_object_tree_def _name = { \
- .num_objects = ARRAY_SIZE(_name##_ptr), \
- .objects = &_name##_ptr, \
- }
+ UA_OPTIONAL, \
+ .is_udata = 1)
/* =================================================
* Parsing infrastructure
@@ -492,6 +648,8 @@ struct uverbs_attr {
};
struct uverbs_attr_bundle {
+ struct ib_udata driver_udata;
+ struct ib_udata ucore;
struct ib_uverbs_file *ufile;
DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
struct uverbs_attr attrs[];
@@ -660,6 +818,12 @@ static inline int _uverbs_copy_from_or_zero(void *to,
#define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \
_uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
+static inline struct ib_ucontext *
+ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs)
+{
+ return ib_uverbs_get_ucontext_file(attrs->ufile);
+}
+
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, u64 allowed_bits);
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index b3b21733cc55..3447bfe356d6 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -43,7 +43,7 @@
#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y)
#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
-#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
+#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id)
/* These are static so they do not need to be qualified */
#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
@@ -102,18 +102,11 @@
#define ADD_UVERBS_METHODS(_name, _object_id, ...) \
static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
_object_id)[] = { __VA_ARGS__ }; \
- static const struct uverbs_object_def _name##_struct = { \
+ static const struct uverbs_object_def _name = { \
.id = _object_id, \
.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
.methods = &UVERBS_OBJECT_METHODS(_object_id) \
- }; \
- static const struct uverbs_object_def *const _name##_ptrs[] = { \
- &_name##_struct, \
- }; \
- static const struct uverbs_object_tree_def _name = { \
- .num_objects = 1, \
- .objects = &_name##_ptrs, \
- }
+ };
/* Used by drivers to declare a complete parsing tree for a single method that
* differs only in having additional driver specific attributes.
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 3db2802fbc68..883abcf6d36e 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -37,15 +37,6 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/ib_user_ioctl_verbs.h>
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
-#else
-static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
- return NULL;
-}
-#endif
-
/* Returns _id, or causes a compile error if _id is not a u32.
*
* The uobj APIs should only be used with the write based uAPI to access
@@ -54,15 +45,15 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
*/
#define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
-#define uobj_get_type(_ufile, _object) \
- uapi_get_object((_ufile)->device->uapi, _object)
+#define uobj_get_type(_attrs, _object) \
+ uapi_get_object((_attrs)->ufile->device->uapi, _object)
-#define uobj_get_read(_type, _id, _ufile) \
- rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
+#define uobj_get_read(_type, _id, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
_uobj_check_id(_id), UVERBS_LOOKUP_READ)
-#define ufd_get_read(_type, _fdnum, _ufile) \
- rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
+#define ufd_get_read(_type, _fdnum, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
(_fdnum)*typecheck(s32, _fdnum), \
UVERBS_LOOKUP_READ)
@@ -72,26 +63,27 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
return NULL;
return uobj->object;
}
-#define uobj_get_obj_read(_object, _type, _id, _ufile) \
+#define uobj_get_obj_read(_object, _type, _id, _attrs) \
((struct ib_##_object *)_uobj_get_obj_read( \
- uobj_get_read(_type, _id, _ufile)))
+ uobj_get_read(_type, _id, _attrs)))
-#define uobj_get_write(_type, _id, _ufile) \
- rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
+#define uobj_get_write(_type, _id, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
_uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
- struct ib_uverbs_file *ufile, int success_res);
-#define uobj_perform_destroy(_type, _id, _ufile, _success_res) \
- __uobj_perform_destroy(uobj_get_type(_ufile, _type), \
- _uobj_check_id(_id), _ufile, _success_res)
+ const struct uverbs_attr_bundle *attrs);
+#define uobj_perform_destroy(_type, _id, _attrs) \
+ __uobj_perform_destroy(uobj_get_type(_attrs, _type), \
+ _uobj_check_id(_id), _attrs)
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
- u32 id, struct ib_uverbs_file *ufile);
+ u32 id,
+ const struct uverbs_attr_bundle *attrs);
-#define uobj_get_destroy(_type, _id, _ufile) \
- __uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id), \
- _ufile)
+#define uobj_get_destroy(_type, _id, _attrs) \
+ __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \
+ _attrs)
static inline void uobj_put_destroy(struct ib_uobject *uobj)
{
@@ -111,14 +103,13 @@ static inline void uobj_put_write(struct ib_uobject *uobj)
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
}
-static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
- int success_res)
+static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj)
{
int ret = rdma_alloc_commit_uobject(uobj);
if (ret)
return ret;
- return success_res;
+ return 0;
}
static inline void uobj_alloc_abort(struct ib_uobject *uobj)
@@ -127,18 +118,18 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
}
static inline struct ib_uobject *
-__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
- struct ib_device **ib_dev)
+__uobj_alloc(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev)
{
- struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile);
+ struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs->ufile);
if (!IS_ERR(uobj))
*ib_dev = uobj->context->device;
return uobj;
}
-#define uobj_alloc(_type, _ufile, _ib_dev) \
- __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
+#define uobj_alloc(_type, _attrs, _ib_dev) \
+ __uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev)
static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
struct ib_uobject *uobj,
@@ -191,5 +182,17 @@ static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
uflow->resources = uflow_res;
}
+struct uverbs_api_object {
+ const struct uverbs_obj_type *type_attrs;
+ const struct uverbs_obj_type_class *type_class;
+ u8 disabled:1;
+ u32 id;
+};
+
+static inline u32 uobj_get_object_id(struct ib_uobject *uobj)
+{
+ return uobj->uapi_object->id;
+}
+
#endif
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index c6a984c0c881..01ac5853d9ac 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -6,7 +6,7 @@
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -95,7 +95,7 @@
#define HFI1_CAP_SDMA_AHG (1UL << 2) /* Enable SDMA AHG support */
#define HFI1_CAP_EXTENDED_PSN (1UL << 3) /* Enable Extended PSN support */
#define HFI1_CAP_HDRSUPP (1UL << 4) /* Enable Header Suppression */
-/* 1UL << 5 unused */
+#define HFI1_CAP_TID_RDMA (1UL << 5) /* Enable TID RDMA operations */
#define HFI1_CAP_USE_SDMA_HEAD (1UL << 6) /* DMA Hdr Q tail vs. use CSR */
#define HFI1_CAP_MULTI_PKT_EGR (1UL << 7) /* Enable multi-packet Egr buffs*/
#define HFI1_CAP_NODROP_RHQ_FULL (1UL << 8) /* Don't drop on Hdr Q full */
@@ -106,7 +106,7 @@
#define HFI1_CAP_NO_INTEGRITY (1UL << 13) /* Enable ctxt integrity checks */
#define HFI1_CAP_PKEY_CHECK (1UL << 14) /* Enable ctxt PKey checking */
#define HFI1_CAP_STATIC_RATE_CTRL (1UL << 15) /* Allow PBC.StaticRateControl */
-/* 1UL << 16 unused */
+#define HFI1_CAP_OPFN (1UL << 16) /* Enable the OPFN protocol */
#define HFI1_CAP_SDMA_HEAD_CHECK (1UL << 17) /* SDMA head checking */
#define HFI1_CAP_EARLY_CREDIT_RETURN (1UL << 18) /* early credit return */
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index c1f87735514f..ef3c7ec793a7 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -46,6 +46,12 @@ struct hns_roce_ib_create_cq_resp {
__aligned_u64 cap_flags;
};
+struct hns_roce_ib_create_srq {
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
+ __aligned_u64 que_addr;
+};
+
struct hns_roce_ib_create_qp {
__aligned_u64 buf_addr;
__aligned_u64 db_addr;
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 1254b51a551a..c586fc43739c 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -164,6 +164,7 @@ struct ib_uverbs_get_context {
struct ib_uverbs_get_context_resp {
__u32 async_fd;
__u32 num_comp_vectors;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_device {
@@ -310,6 +311,7 @@ struct ib_uverbs_alloc_pd {
struct ib_uverbs_alloc_pd_resp {
__u32 pd_handle;
+ __u32 driver_data[0];
};
struct ib_uverbs_dealloc_pd {
@@ -325,6 +327,7 @@ struct ib_uverbs_open_xrcd {
struct ib_uverbs_open_xrcd_resp {
__u32 xrcd_handle;
+ __u32 driver_data[0];
};
struct ib_uverbs_close_xrcd {
@@ -345,6 +348,7 @@ struct ib_uverbs_reg_mr_resp {
__u32 mr_handle;
__u32 lkey;
__u32 rkey;
+ __u32 driver_data[0];
};
struct ib_uverbs_rereg_mr {
@@ -356,11 +360,13 @@ struct ib_uverbs_rereg_mr {
__aligned_u64 hca_va;
__u32 pd_handle;
__u32 access_flags;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_rereg_mr_resp {
__u32 lkey;
__u32 rkey;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_dereg_mr {
@@ -372,11 +378,13 @@ struct ib_uverbs_alloc_mw {
__u32 pd_handle;
__u8 mw_type;
__u8 reserved[3];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_alloc_mw_resp {
__u32 mw_handle;
__u32 rkey;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_dealloc_mw {
@@ -419,6 +427,7 @@ struct ib_uverbs_ex_create_cq {
struct ib_uverbs_create_cq_resp {
__u32 cq_handle;
__u32 cqe;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_ex_create_cq_resp {
@@ -629,6 +638,7 @@ struct ib_uverbs_create_qp_resp {
__u32 max_recv_sge;
__u32 max_inline_data;
__u32 reserved;
+ __u32 driver_data[0];
};
struct ib_uverbs_ex_create_qp_resp {
@@ -733,9 +743,6 @@ struct ib_uverbs_ex_modify_qp {
__u32 reserved;
};
-struct ib_uverbs_modify_qp_resp {
-};
-
struct ib_uverbs_ex_modify_qp_resp {
__u32 comp_mask;
__u32 response_length;
@@ -863,10 +870,12 @@ struct ib_uverbs_create_ah {
__u32 pd_handle;
__u32 reserved;
struct ib_uverbs_ah_attr attr;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_create_ah_resp {
__u32 ah_handle;
+ __u32 driver_data[0];
};
struct ib_uverbs_destroy_ah {
@@ -1175,6 +1184,7 @@ struct ib_uverbs_create_srq_resp {
__u32 max_wr;
__u32 max_sge;
__u32 srqn;
+ __u32 driver_data[0];
};
struct ib_uverbs_modify_srq {
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 8fa9f90e2bb1..87b3198f4b5d 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -48,6 +48,7 @@ enum {
MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6,
MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7,
MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8,
+ MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE = 1 << 9,
};
enum {
@@ -236,6 +237,7 @@ enum mlx5_ib_query_dev_resp_flags {
/* Support 128B CQE compression */
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1,
+ MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2,
};
enum mlx5_ib_tunnel_offloads {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 408e220034de..b8d121d457f1 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -158,6 +158,7 @@ enum mlx5_ib_create_flow_attrs {
MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
};
enum mlx5_ib_destoy_flow_attrs {