diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/Kconfig | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/Makefile | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 9 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 56 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 13 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.h | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mad.c | 60 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 61 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 20 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 29 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 554 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 23 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 236 |
13 files changed, 775 insertions, 300 deletions
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index 8d651c05de62..ea248def4556 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config MLX5_INFINIBAND tristate "Mellanox 5th generation network adapters (ConnectX series) support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 33f5adb14e4e..9924be8384d8 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index e3ec79b8f7f5..6c8645033102 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -190,12 +190,12 @@ int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id) { struct mlx5_core_dev *dev = dm->dev; - u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; unsigned long *block_map; u64 icm_start_addr; u32 log_icm_size; + u32 num_blocks; u32 max_blocks; u64 block_idx; void *sw_icm; @@ -224,6 +224,8 @@ int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, return -EINVAL; } + num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); spin_lock(&dm->lock); block_idx = bitmap_find_next_zero_area(block_map, @@ -266,13 +268,16 @@ int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, u16 uid, phys_addr_t addr, u32 obj_id) { struct mlx5_core_dev *dev = dm->dev; - u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; unsigned long *block_map; + u32 num_blocks; u64 start_idx; int err; + num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + switch (type) { case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: start_idx = diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 4efbbd2fce0c..45f48cde6b9d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -884,15 +884,15 @@ static void notify_soft_wc_handler(struct work_struct *work) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; int vector = attr->comp_vector; struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_cq *cq = to_mcq(ibcq); u32 out[MLX5_ST_SZ_DW(create_cq_out)]; - struct mlx5_ib_cq *cq; int uninitialized_var(index); int uninitialized_var(inlen); u32 *cqb = NULL; @@ -904,18 +904,14 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, if (entries < 0 || (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) - return ERR_PTR(-EINVAL); + return -EINVAL; if (check_cq_create_flags(attr->flags)) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; entries = roundup_pow_of_two(entries + 1); if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) - return ERR_PTR(-EINVAL); - - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + return -EINVAL; cq->ibcq.cqe = entries - 1; mutex_init(&cq->resize_mutex); @@ -930,13 +926,13 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, &index, &inlen); if (err) - goto err_create; + return err; } else { cqe_size = cache_line_size() == 128 ? 128 : 64; err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, &index, &inlen); if (err) - goto err_create; + return err; INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } @@ -981,7 +977,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, kvfree(cqb); - return &cq->ibcq; + return 0; err_cmd: mlx5_core_destroy_cq(dev->mdev, &cq->mcq); @@ -992,14 +988,10 @@ err_cqb: destroy_cq_user(cq, udata); else destroy_cq_kernel(dev, cq); - -err_create: - kfree(cq); - - return ERR_PTR(err); + return err; } -int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); @@ -1009,10 +1001,6 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) destroy_cq_user(mcq, udata); else destroy_cq_kernel(dev, mcq); - - kfree(mcq); - - return 0; } static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) @@ -1138,11 +1126,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return 0; } -static void un_resize_user(struct mlx5_ib_cq *cq) -{ - ib_umem_release(cq->resize_umem); -} - static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, int cqe_size) { @@ -1165,12 +1148,6 @@ ex: return err; } -static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) -{ - free_cq_buf(dev, cq->resize_buf); - cq->resize_buf = NULL; -} - static int copy_resize_cqes(struct mlx5_ib_cq *cq) { struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); @@ -1351,10 +1328,11 @@ ex_alloc: kvfree(in); ex_resize: - if (udata) - un_resize_user(cq); - else - un_resize_kernel(dev, cq); + ib_umem_release(cq->resize_umem); + if (!udata) { + free_cq_buf(dev, cq->resize_buf); + cq->resize_buf = NULL; + } ex: mutex_unlock(&cq->resize_mutex); return err; diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 3065c5d0ee96..f30e0d881368 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -114,15 +114,15 @@ u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) } struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { - return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB); + return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_IB); } struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { - return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH); + return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH); } struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) @@ -130,9 +130,10 @@ struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB); } -struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport) +struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, + u16 vport_num) { - return mlx5_eswitch_vport_rep(esw, vport); + return mlx5_eswitch_vport_rep(esw, vport_num); } struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 478503ce20df..de43b423bafc 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -14,17 +14,17 @@ extern const struct mlx5_ib_profile uplink_rep_profile; u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - int vport_index); + u16 vport_num); struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw); struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - int vport_index); + u16 vport_num); void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u16 port); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, - int vport_index); + u16 vport_num); #else /* CONFIG_MLX5_ESWITCH */ static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) { @@ -33,7 +33,7 @@ static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) static inline struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { return NULL; } @@ -46,7 +46,7 @@ struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) static inline struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { return NULL; } @@ -63,7 +63,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, static inline struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { return NULL; } diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 6c529e6f3a01..348c1df69cdc 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -200,19 +200,33 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt, vl_15_dropped); } -static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num, +static int process_pma_cmd(struct mlx5_ib_dev *dev, u8 port_num, const struct ib_mad *in_mad, struct ib_mad *out_mad) { - int err; + struct mlx5_core_dev *mdev; + bool native_port = true; + u8 mdev_port_num; void *out_cnt; + int err; + mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); + if (!mdev) { + /* Fail to get the native port, likely due to 2nd port is still + * unaffiliated. In such case default to 1st port and attached + * PF device. + */ + native_port = false; + mdev = dev->mdev; + mdev_port_num = 1; + } /* Declaring support of extended counters */ if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { struct ib_class_port_info cpi = {}; cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; memcpy((out_mad->data + 40), &cpi, sizeof(cpi)); - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + goto done; } if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) { @@ -221,11 +235,13 @@ static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num, int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out); out_cnt = kvzalloc(sz, GFP_KERNEL); - if (!out_cnt) - return IB_MAD_RESULT_FAILURE; + if (!out_cnt) { + err = IB_MAD_RESULT_FAILURE; + goto done; + } err = mlx5_core_query_vport_counter(mdev, 0, 0, - port_num, out_cnt, sz); + mdev_port_num, out_cnt, sz); if (!err) pma_cnt_ext_assign(pma_cnt_ext, out_cnt); } else { @@ -234,20 +250,23 @@ static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num, int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); out_cnt = kvzalloc(sz, GFP_KERNEL); - if (!out_cnt) - return IB_MAD_RESULT_FAILURE; + if (!out_cnt) { + err = IB_MAD_RESULT_FAILURE; + goto done; + } - err = mlx5_core_query_ib_ppcnt(mdev, port_num, + err = mlx5_core_query_ib_ppcnt(mdev, mdev_port_num, out_cnt, sz); if (!err) pma_cnt_assign(pma_cnt, out_cnt); - } - + } kvfree(out_cnt); - if (err) - return IB_MAD_RESULT_FAILURE; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + err = err ? IB_MAD_RESULT_FAILURE : + IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +done: + if (native_port) + mlx5_ib_put_native_port_mdev(dev, port_num); + return err; } int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -259,8 +278,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - struct mlx5_core_dev *mdev; - u8 mdev_port_num; int ret; if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || @@ -269,19 +286,14 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, memset(out_mad->data, 0, sizeof(out_mad->data)); - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); - if (!mdev) - return IB_MAD_RESULT_FAILURE; - - if (MLX5_CAP_GEN(mdev, vport_counters) && + if (MLX5_CAP_GEN(dev->mdev, vport_counters) && in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { - ret = process_pma_cmd(mdev, mdev_port_num, in_mad, out_mad); + ret = process_pma_cmd(dev, port_num, in_mad, out_mad); } else { ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, in_mad, out_mad); } - mlx5_ib_put_native_port_mdev(dev, port_num); return ret; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 26b1ce2359ba..9db8c06aa01e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -52,6 +52,7 @@ #include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> +#include <linux/mlx5/eswitch.h> #include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> @@ -888,7 +889,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { - props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; + props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | IB_PROT_T10DIF_TYPE_2 | @@ -1008,6 +1009,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); + props->max_pi_fast_reg_page_list_len = + props->max_fast_reg_page_list_len / 2; get_atomic_caps_qp(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); @@ -2344,7 +2347,7 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, /* Allocation size must a multiple of the basic block size * and a power of 2. */ - act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); + act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); act_size = roundup_pow_of_two(act_size); dm->size = act_size; @@ -3257,11 +3260,14 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int max_table_size; int num_entries; int num_groups; + bool esw_encap; u32 flags = 0; int priority; max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { enum mlx5_flow_namespace_type fn_type; @@ -3274,10 +3280,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, if (ft_type == MLX5_IB_FT_RX) { fn_type = MLX5_FLOW_NAMESPACE_BYPASS; prio = &dev->flow_db->prios[priority]; - if (!dev->is_rep && + if (!dev->is_rep && !esw_encap && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->is_rep && + if (!dev->is_rep && !esw_encap && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; @@ -3287,7 +3293,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, log_max_ft_size)); fn_type = MLX5_FLOW_NAMESPACE_EGRESS; prio = &dev->flow_db->egress_prios[priority]; - if (!dev->is_rep && + if (!dev->is_rep && !esw_encap && MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } @@ -3923,6 +3929,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio = NULL; int max_table_size = 0; + bool esw_encap; u32 flags = 0; int priority; @@ -3931,22 +3938,30 @@ _get_flow_table(struct mlx5_ib_dev *dev, else priority = ib_prio_to_core_prio(fs_matcher->priority, false); + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) + reformat_l3_tunnel_to_l2) && + !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { max_table_size = BIT( MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { max_table_size = BIT( MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && + esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; priority = FDB_BYPASS_PATH; } @@ -4926,18 +4941,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) if (ret) goto error0; - devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL); - if (IS_ERR(devr->c0)) { - ret = PTR_ERR(devr->c0); + devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq); + if (!devr->c0) { + ret = -ENOMEM; goto error1; } - devr->c0->device = &dev->ib_dev; - devr->c0->uobject = NULL; - devr->c0->comp_handler = NULL; - devr->c0->event_handler = NULL; - devr->c0->cq_context = NULL; + + devr->c0->device = &dev->ib_dev; atomic_set(&devr->c0->usecnt, 0); + ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL); + if (ret) + goto err_create_cq; + devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x0)) { ret = PTR_ERR(devr->x0); @@ -5029,6 +5045,8 @@ error3: mlx5_ib_dealloc_xrcd(devr->x0, NULL); error2: mlx5_ib_destroy_cq(devr->c0, NULL); +err_create_cq: + kfree(devr->c0); error1: mlx5_ib_dealloc_pd(devr->p0, NULL); error0: @@ -5047,6 +5065,7 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_xrcd(devr->x0, NULL); mlx5_ib_dealloc_xrcd(devr->x1, NULL); mlx5_ib_destroy_cq(devr->c0, NULL); + kfree(devr->c0); mlx5_ib_dealloc_pd(devr->p0, NULL); kfree(devr->p0); @@ -6079,7 +6098,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->ib_dev.phys_port_cnt = dev->num_ports; @@ -6159,8 +6177,13 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) } static const struct ib_device_ops mlx5_ib_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_MLX5, + .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION, + .add_gid = mlx5_ib_add_gid, .alloc_mr = mlx5_ib_alloc_mr, + .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity, .alloc_pd = mlx5_ib_alloc_pd, .alloc_ucontext = mlx5_ib_alloc_ucontext, .attach_mcast = mlx5_ib_mcg_attach, @@ -6190,6 +6213,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, .map_mr_sg = mlx5_ib_map_mr_sg, + .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi, .mmap = mlx5_ib_mmap, .modify_cq = mlx5_ib_modify_cq, .modify_device = mlx5_ib_modify_device, @@ -6214,6 +6238,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .resize_cq = mlx5_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), @@ -6256,7 +6281,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; dev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -6325,7 +6349,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); - dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 9f90be296ee0..fe1a76d8531c 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -55,9 +55,10 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int i = 0; struct scatterlist *sg; int entry; - unsigned long page_shift = umem->page_shift; if (umem->is_odp) { + unsigned int page_shift = to_ib_umem_odp(umem)->page_shift; + *ncont = ib_umem_page_count(umem); *count = *ncont << (page_shift - PAGE_SHIFT); *shift = page_shift; @@ -67,15 +68,15 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, return; } - addr = addr >> page_shift; + addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; m = find_first_bit(&tmp, BITS_PER_LONG); if (max_page_shift) - m = min_t(unsigned long, max_page_shift - page_shift, m); + m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> page_shift; - pfn = sg_dma_address(sg) >> page_shift; + len = sg_dma_len(sg) >> PAGE_SHIFT; + pfn = sg_dma_address(sg) >> PAGE_SHIFT; if (base + p != pfn) { /* If either the offset or the new * base are unaligned update m @@ -107,7 +108,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *ncont = 0; } - *shift = page_shift + m; + *shift = PAGE_SHIFT + m; *count = i; } @@ -140,8 +141,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, __be64 *pas, int access_flags) { - unsigned long umem_page_shift = umem->page_shift; - int shift = page_shift - umem_page_shift; + int shift = page_shift - PAGE_SHIFT; int mask = (1 << shift) - 1; int i, k, idx; u64 cur = 0; @@ -165,7 +165,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> umem_page_shift; + len = sg_dma_len(sg) >> PAGE_SHIFT; base = sg_dma_address(sg); /* Skip elements below offset */ @@ -184,7 +184,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, for (; k < len; k++) { if (!(i & mask)) { - cur = base + (k << umem_page_shift); + cur = base + (k << PAGE_SHIFT); cur |= access_flags; idx = (i >> shift) - offset; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ee73dc122d28..305d26cdf7f3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -431,9 +431,6 @@ struct mlx5_ib_qp { int create_type; - /* Store signature errors */ - bool signature_en; - struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; @@ -587,6 +584,9 @@ struct mlx5_ib_mr { void *descs; dma_addr_t desc_map; int ndescs; + int data_length; + int meta_ndescs; + int meta_length; int max_descs; int desc_size; int access_mode; @@ -605,6 +605,13 @@ struct mlx5_ib_mr { int access_flags; /* Needed for rereg MR */ struct mlx5_ib_mr *parent; + /* Needed for IB_MR_TYPE_INTEGRITY */ + struct mlx5_ib_mr *pi_mr; + struct mlx5_ib_mr *klm_mr; + struct mlx5_ib_mr *mtt_mr; + u64 data_iova; + u64 pi_iova; + atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; struct mlx5_async_work cb_work; @@ -1115,10 +1122,9 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, int buflen, size_t *bc); int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, int buflen, size_t *bc); -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); -int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -1148,8 +1154,15 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata); +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_sg, + u32 max_num_meta_sg); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, @@ -1201,7 +1214,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 83b452d977d4..20ece6e0b2fc 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1507,10 +1507,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return 0; err: - if (mr->umem) { - ib_umem_release(mr->umem); - mr->umem = NULL; - } + ib_umem_release(mr->umem); + mr->umem = NULL; + clean_mr(dev, mr); return err; } @@ -1606,8 +1605,9 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ if (umem_odp->page_list) - mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem), - ib_umem_end(umem)); + mlx5_ib_invalidate_range(umem_odp, + ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); else mlx5_ib_free_implicit_mr(mr); /* @@ -1629,28 +1629,85 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) * remove the DMA mapping. */ mlx5_mr_cache_free(dev, mr); - if (umem) { - ib_umem_release(umem); + ib_umem_release(umem); + if (umem) atomic_sub(npages, &dev->mdev->priv.reg_pages); - } + if (!mr->allocated_from_cache) kfree(mr); } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { - dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + + if (ibmr->type == IB_MR_TYPE_INTEGRITY) { + dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr); + dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); + } + + dereg_mr(to_mdev(ibmr->device), mmr); + return 0; } -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) +static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, + int access_mode, int page_shift) +{ + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, log_page_size, page_shift); +} + +static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int ndescs, int desc_size, int page_shift, + int access_mode, u32 *in, int inlen) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + int err; + + mr->access_mode = access_mode; + mr->desc_size = desc_size; + mr->max_descs = ndescs; + + err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); + if (err) + return err; + + mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); + + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + if (err) + goto err_free_descs; + + mr->mmkey.type = MLX5_MKEY_MR; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + + return 0; + +err_free_descs: + mlx5_free_priv_descs(mr); + return err; +} + +static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, + u32 max_num_sg, u32 max_num_meta_sg, + int desc_size, int access_mode) +{ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - int ndescs = ALIGN(max_num_sg, 4); + int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); + int page_shift = 0; struct mlx5_ib_mr *mr; - void *mkc; u32 *in; int err; @@ -1658,99 +1715,168 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, if (!mr) return ERR_PTR(-ENOMEM); + mr->ibmr.pd = pd; + mr->ibmr.device = pd->device; + in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } + if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) + page_shift = PAGE_SHIFT; + + err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, + access_mode, in, inlen); + if (err) + goto err_free_in; + + mr->umem = NULL; + kfree(in); + + return mr; + +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int ndescs, u32 *in, int inlen) +{ + return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), + PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, + inlen); +} + +static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int ndescs, u32 *in, int inlen) +{ + return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), + 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); +} + +static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int max_num_sg, int max_num_meta_sg, + u32 *in, int inlen) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 psv_index[2]; + void *mkc; + int err; + + mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); + if (!mr->sig) + return -ENOMEM; + + /* create mem & wire PSVs */ + err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); + if (err) + goto err_free_sig; + + mr->sig->psv_memory.psv_idx = psv_index[0]; + mr->sig->psv_wire.psv_idx = psv_index[1]; + + mr->sig->sig_status_checked = true; + mr->sig->sig_err_exists = false; + /* Next UMR, Arm SIGERR */ + ++mr->sig->sigerr_count; + mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, + sizeof(struct mlx5_klm), + MLX5_MKC_ACCESS_MODE_KLMS); + if (IS_ERR(mr->klm_mr)) { + err = PTR_ERR(mr->klm_mr); + goto err_destroy_psv; + } + mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, + sizeof(struct mlx5_mtt), + MLX5_MKC_ACCESS_MODE_MTT); + if (IS_ERR(mr->mtt_mr)) { + err = PTR_ERR(mr->mtt_mr); + goto err_free_klm_mr; + } + + /* Set bsf descriptors for mkey */ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, translations_octword_size, ndescs); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, bsf_en, 1); + MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); - if (mr_type == IB_MR_TYPE_MEM_REG) { - mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); - err = mlx5_alloc_priv_descs(pd->device, mr, - ndescs, sizeof(struct mlx5_mtt)); - if (err) - goto err_free_in; + err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, + MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); + if (err) + goto err_free_mtt_mr; - mr->desc_size = sizeof(struct mlx5_mtt); - mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_SG_GAPS) { - mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; + return 0; - err = mlx5_alloc_priv_descs(pd->device, mr, - ndescs, sizeof(struct mlx5_klm)); - if (err) - goto err_free_in; - mr->desc_size = sizeof(struct mlx5_klm); - mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_SIGNATURE) { - u32 psv_index[2]; - - MLX5_SET(mkc, mkc, bsf_en, 1); - MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); - mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); - if (!mr->sig) { - err = -ENOMEM; - goto err_free_in; - } +err_free_mtt_mr: + dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); + mr->mtt_mr = NULL; +err_free_klm_mr: + dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr); + mr->klm_mr = NULL; +err_destroy_psv: + if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); +err_free_sig: + kfree(mr->sig); - /* create mem & wire PSVs */ - err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, - 2, psv_index); - if (err) - goto err_free_sig; + return err; +} + +static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, u32 max_num_sg, + u32 max_num_meta_sg) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + int ndescs = ALIGN(max_num_sg, 4); + struct mlx5_ib_mr *mr; + u32 *in; + int err; - mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; - mr->sig->psv_memory.psv_idx = psv_index[0]; - mr->sig->psv_wire.psv_idx = psv_index[1]; + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); - mr->sig->sig_status_checked = true; - mr->sig->sig_err_exists = false; - /* Next UMR, Arm SIGERR */ - ++mr->sig->sigerr_count; - } else { + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + mr->ibmr.device = pd->device; + mr->umem = NULL; + + switch (mr_type) { + case IB_MR_TYPE_MEM_REG: + err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); + break; + case IB_MR_TYPE_SG_GAPS: + err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); + break; + case IB_MR_TYPE_INTEGRITY: + err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, + max_num_meta_sg, in, inlen); + break; + default: mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); err = -EINVAL; - goto err_free_in; } - MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, umr_en, 1); - - mr->ibmr.device = pd->device; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) - goto err_destroy_psv; + goto err_free_in; - mr->mmkey.type = MLX5_MKEY_MR; - mr->ibmr.lkey = mr->mmkey.key; - mr->ibmr.rkey = mr->mmkey.key; - mr->umem = NULL; kfree(in); return &mr->ibmr; -err_destroy_psv: - if (mr->sig) { - if (mlx5_core_destroy_psv(dev->mdev, - mr->sig->psv_memory.psv_idx)) - mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", - mr->sig->psv_memory.psv_idx); - if (mlx5_core_destroy_psv(dev->mdev, - mr->sig->psv_wire.psv_idx)) - mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", - mr->sig->psv_wire.psv_idx); - } - mlx5_free_priv_descs(mr); -err_free_sig: - kfree(mr->sig); err_free_in: kfree(in); err_free: @@ -1758,6 +1884,19 @@ err_free: return ERR_PTR(err); } +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) +{ + return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); +} + +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_sg, u32 max_num_meta_sg) +{ + return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, + max_num_meta_sg); +} + struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { @@ -1887,16 +2026,53 @@ done: } static int +mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + unsigned int sg_offset = 0; + int n = 0; + + mr->meta_length = 0; + if (data_sg_nents == 1) { + n++; + mr->ndescs = 1; + if (data_sg_offset) + sg_offset = *data_sg_offset; + mr->data_length = sg_dma_len(data_sg) - sg_offset; + mr->data_iova = sg_dma_address(data_sg) + sg_offset; + if (meta_sg_nents == 1) { + n++; + mr->meta_ndescs = 1; + if (meta_sg_offset) + sg_offset = *meta_sg_offset; + else + sg_offset = 0; + mr->meta_length = sg_dma_len(meta_sg) - sg_offset; + mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; + } + ibmr->length = mr->data_length + mr->meta_length; + } + + return n; +} + +static int mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, struct scatterlist *sgl, unsigned short sg_nents, - unsigned int *sg_offset_p) + unsigned int *sg_offset_p, + struct scatterlist *meta_sgl, + unsigned short meta_sg_nents, + unsigned int *meta_sg_offset_p) { struct scatterlist *sg = sgl; struct mlx5_klm *klms = mr->descs; unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; u32 lkey = mr->ibmr.pd->local_dma_lkey; - int i; + int i, j = 0; mr->ibmr.iova = sg_dma_address(sg) + sg_offset; mr->ibmr.length = 0; @@ -1911,12 +2087,36 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, sg_offset = 0; } - mr->ndescs = i; if (sg_offset_p) *sg_offset_p = sg_offset; - return i; + mr->ndescs = i; + mr->data_length = mr->ibmr.length; + + if (meta_sg_nents) { + sg = meta_sgl; + sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; + for_each_sg(meta_sgl, sg, meta_sg_nents, j) { + if (unlikely(i + j >= mr->max_descs)) + break; + klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + + sg_offset); + klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - + sg_offset); + klms[i + j].key = cpu_to_be32(lkey); + mr->ibmr.length += sg_dma_len(sg) - sg_offset; + + sg_offset = 0; + } + if (meta_sg_offset_p) + *meta_sg_offset_p = sg_offset; + + mr->meta_ndescs = j; + mr->meta_length = mr->ibmr.length - mr->data_length; + } + + return i + j; } static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) @@ -1933,6 +2133,181 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) return 0; } +static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + __be64 *descs; + + if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs)) + return -ENOMEM; + + descs = mr->descs; + descs[mr->ndescs + mr->meta_ndescs++] = + cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); + + return 0; +} + +static int +mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = mr->mtt_mr; + int n; + + pi_mr->ndescs = 0; + pi_mr->meta_ndescs = 0; + pi_mr->meta_length = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + pi_mr->ibmr.page_size = ibmr->page_size; + n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, + mlx5_set_page); + if (n != data_sg_nents) + return n; + + pi_mr->data_iova = pi_mr->ibmr.iova; + pi_mr->data_length = pi_mr->ibmr.length; + pi_mr->ibmr.length = pi_mr->data_length; + ibmr->length = pi_mr->data_length; + + if (meta_sg_nents) { + u64 page_mask = ~((u64)ibmr->page_size - 1); + u64 iova = pi_mr->data_iova; + + n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, + meta_sg_offset, mlx5_set_page_pi); + + pi_mr->meta_length = pi_mr->ibmr.length; + /* + * PI address for the HW is the offset of the metadata address + * relative to the first data page address. + * It equals to first data page address + size of data pages + + * metadata offset at the first metadata page + */ + pi_mr->pi_iova = (iova & page_mask) + + pi_mr->ndescs * ibmr->page_size + + (pi_mr->ibmr.iova & ~page_mask); + /* + * In order to use one MTT MR for data and metadata, we register + * also the gaps between the end of the data and the start of + * the metadata (the sig MR will verify that the HW will access + * to right addresses). This mapping is safe because we use + * internal mkey for the registration. + */ + pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; + pi_mr->ibmr.iova = iova; + ibmr->length += pi_mr->meta_length; + } + + ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + return n; +} + +static int +mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = mr->klm_mr; + int n; + + pi_mr->ndescs = 0; + pi_mr->meta_ndescs = 0; + pi_mr->meta_length = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, + meta_sg, meta_sg_nents, meta_sg_offset); + + ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + /* This is zero-based memory region */ + pi_mr->data_iova = 0; + pi_mr->ibmr.iova = 0; + pi_mr->pi_iova = pi_mr->data_length; + ibmr->length = pi_mr->ibmr.length; + + return n; +} + +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = NULL; + int n; + + WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); + + mr->ndescs = 0; + mr->data_length = 0; + mr->data_iova = 0; + mr->meta_ndescs = 0; + mr->pi_iova = 0; + /* + * As a performance optimization, if possible, there is no need to + * perform UMR operation to register the data/metadata buffers. + * First try to map the sg lists to PA descriptors with local_dma_lkey. + * Fallback to UMR only in case of a failure. + */ + n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); + if (n == data_sg_nents + meta_sg_nents) + goto out; + /* + * As a performance optimization, if possible, there is no need to map + * the sg lists to KLM descriptors. First try to map the sg lists to MTT + * descriptors and fallback to KLM only in case of a failure. + * It's more efficient for the HW to work with MTT descriptors + * (especially in high load). + * Use KLM (indirect access) only if it's mandatory. + */ + pi_mr = mr->mtt_mr; + n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); + if (n == data_sg_nents + meta_sg_nents) + goto out; + + pi_mr = mr->klm_mr; + n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); + if (unlikely(n != data_sg_nents + meta_sg_nents)) + return -ENOMEM; + +out: + /* This is zero-based memory region */ + ibmr->iova = 0; + mr->pi_mr = pi_mr; + if (pi_mr) + ibmr->sig_attrs->meta_length = pi_mr->meta_length; + else + ibmr->sig_attrs->meta_length = mr->meta_length; + + return 0; +} + int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { @@ -1946,7 +2321,8 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, DMA_TO_DEVICE); if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) - n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); + n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, + NULL); else n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx5_set_page); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 831c450b271a..5b642d81e617 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -150,7 +150,7 @@ static struct ib_umem_odp *odp_lookup(u64 start, u64 length, if (!rb) goto not_found; odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (ib_umem_start(&odp->umem) > start + length) + if (ib_umem_start(odp) > start + length) goto not_found; } not_found: @@ -200,7 +200,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, static void mr_leaf_free_action(struct work_struct *work) { struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); - int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT; + int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; mr->parent = NULL; @@ -224,7 +224,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; - struct ib_umem *umem; int in_block = 0; u64 addr; @@ -232,15 +231,14 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, pr_err("invalidation called on NULL umem or non-ODP umem\n"); return; } - umem = &umem_odp->umem; mr = umem_odp->private; if (!mr || !mr->ibmr.pd) return; - start = max_t(u64, ib_umem_start(umem), start); - end = min_t(u64, ib_umem_end(umem), end); + start = max_t(u64, ib_umem_start(umem_odp), start); + end = min_t(u64, ib_umem_end(umem_odp), end); /* * Iteration one - zap the HW's MTTs. The notifiers_count ensures that @@ -249,8 +247,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, * but they will write 0s as well, so no difference in the end result. */ - for (addr = start; addr < end; addr += BIT(umem->page_shift)) { - idx = (addr - ib_umem_start(umem)) >> umem->page_shift; + for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) { + idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; /* * Strive to write the MTTs in chunks, but avoid overwriting * non-existing MTTs. The huristic here can be improved to @@ -544,13 +542,12 @@ static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end, void *cookie) { struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie; - struct ib_umem *umem = &umem_odp->umem; if (mr->parent != imr) return 0; - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), - ib_umem_end(umem)); + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); if (umem_odp->dying) return 0; @@ -602,9 +599,9 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, } next_mr: - size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt); + size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt); - page_shift = mr->umem->page_shift; + page_shift = odp->page_shift; page_mask = ~(BIT(page_shift) - 1); start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 768c7e81f688..8b7a60ada92c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -442,9 +442,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) } size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN && ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) - return MLX5_SIG_WQE_SIZE; + return MLX5_SIG_WQE_SIZE; else return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } @@ -496,9 +496,6 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) - qp->signature_en = true; - wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { @@ -790,8 +787,7 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, atomic_dec(&dev->delay_drop.rqs_cnt); mlx5_ib_db_unmap_user(context, &rwq->db); - if (rwq->umem) - ib_umem_release(rwq->umem); + ib_umem_release(rwq->umem); } static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, @@ -977,8 +973,7 @@ err_free: kvfree(*in); err_umem: - if (ubuffer->umem) - ib_umem_release(ubuffer->umem); + ib_umem_release(ubuffer->umem); err_bfreg: if (bfregn != MLX5_IB_INVALID_BFREG) @@ -997,8 +992,7 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, ibucontext); mlx5_ib_db_unmap_user(context, &qp->db); - if (base->ubuffer.umem) - ib_umem_release(base->ubuffer.umem); + ib_umem_release(base->ubuffer.umem); /* * Free only the BFREGs which are handled by the kernel. @@ -1042,7 +1036,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, void *qpc; int err; - if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | + if (init_attr->create_flags & ~(IB_QP_CREATE_INTEGRITY_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | @@ -4170,15 +4164,13 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr, bool umr_inline) + struct mlx5_ib_mr *mr, u8 flags) { - int size = mr->ndescs * mr->desc_size; + int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; memset(umr, 0, sizeof(*umr)); - umr->flags = MLX5_UMR_CHECK_NOT_FREE; - if (umr_inline) - umr->flags |= MLX5_UMR_INLINE; + umr->flags = flags; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); umr->mkey_mask = frwr_mkey_mask(); } @@ -4305,7 +4297,7 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, struct mlx5_ib_mr *mr, u32 key, int access) { - int ndescs = ALIGN(mr->ndescs, 8) >> 1; + int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1; memset(seg, 0, sizeof(*seg)); @@ -4356,7 +4348,7 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, struct mlx5_ib_mr *mr, struct mlx5_ib_pd *pd) { - int bcount = mr->desc_size * mr->ndescs; + int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs); dseg->addr = cpu_to_be64(mr->desc_map); dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); @@ -4549,23 +4541,37 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, - struct mlx5_ib_qp *qp, void **seg, - int *size, void **cur_edge) +static int set_sig_data_segment(const struct ib_send_wr *send_wr, + struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { - struct ib_sig_attrs *sig_attrs = wr->sig_attrs; - struct ib_mr *sig_mr = wr->sig_mr; struct mlx5_bsf *bsf; - u32 data_len = wr->wr.sg_list->length; - u32 data_key = wr->wr.sg_list->lkey; - u64 data_va = wr->wr.sg_list->addr; + u32 data_len; + u32 data_key; + u64 data_va; + u32 prot_len = 0; + u32 prot_key = 0; + u64 prot_va = 0; + bool prot = false; int ret; int wqe_size; + struct mlx5_ib_mr *mr = to_mmr(sig_mr); + struct mlx5_ib_mr *pi_mr = mr->pi_mr; + + data_len = pi_mr->data_length; + data_key = pi_mr->ibmr.lkey; + data_va = pi_mr->data_iova; + if (pi_mr->meta_ndescs) { + prot_len = pi_mr->meta_length; + prot_key = pi_mr->ibmr.lkey; + prot_va = pi_mr->pi_iova; + prot = true; + } - if (!wr->prot || - (data_key == wr->prot->lkey && - data_va == wr->prot->addr && - data_len == wr->prot->length)) { + if (!prot || (data_key == prot_key && data_va == prot_va && + data_len == prot_len)) { /** * Source domain doesn't contain signature information * or data and protection are interleaved in memory. @@ -4599,8 +4605,6 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, struct mlx5_stride_block_ctrl_seg *sblock_ctrl; struct mlx5_stride_block_entry *data_sentry; struct mlx5_stride_block_entry *prot_sentry; - u32 prot_key = wr->prot->lkey; - u64 prot_va = wr->prot->addr; u16 block_size = sig_attrs->mem.sig.dif.pi_interval; int prot_size; @@ -4650,17 +4654,15 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - const struct ib_sig_handover_wr *wr, u32 size, - u32 length, u32 pdn) + struct ib_mr *sig_mr, int access_flags, + u32 size, u32 length, u32 pdn) { - struct ib_mr *sig_mr = wr->sig_mr; u32 sig_key = sig_mr->rkey; u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); - seg->flags = get_umr_flags(wr->access_flags) | - MLX5_MKC_ACCESS_MODE_KLMS; + seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); @@ -4680,49 +4682,50 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = sig_mkey_mask(); } - -static int set_sig_umr_wr(const struct ib_send_wr *send_wr, - struct mlx5_ib_qp *qp, void **seg, int *size, - void **cur_edge) +static int set_pi_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { - const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); - struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); + const struct ib_reg_wr *wr = reg_wr(send_wr); + struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr); + struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr; + struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs; u32 pdn = get_pd(qp)->pdn; u32 xlt_size; int region_len, ret; - if (unlikely(wr->wr.num_sge != 1) || - unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + if (unlikely(send_wr->num_sge != 0) || + unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; /* length of the protected region, data + protection */ - region_len = wr->wr.sg_list->length; - if (wr->prot && - (wr->prot->lkey != wr->wr.sg_list->lkey || - wr->prot->addr != wr->wr.sg_list->addr || - wr->prot->length != wr->wr.sg_list->length)) - region_len += wr->prot->length; + region_len = pi_mr->ibmr.length; /** * KLM octoword size - if protection was provided * then we use strided block format (3 octowords), * else we use single KLM (1 octoword) **/ - xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm); + if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE) + xlt_size = 0x30; + else + xlt_size = sizeof(struct mlx5_klm); set_sig_umr_segment(*seg, xlt_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn); + set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len, + pdn); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - ret = set_sig_data_segment(wr, qp, seg, size, cur_edge); + ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size, + cur_edge); if (ret) return ret; @@ -4759,12 +4762,14 @@ static int set_psv_wr(struct ib_sig_domain *domain, static int set_reg_wr(struct mlx5_ib_qp *qp, const struct ib_reg_wr *wr, - void **seg, int *size, void **cur_edge) + void **seg, int *size, void **cur_edge, + bool check_not_free) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - size_t mr_list_size = mr->ndescs * mr->desc_size; + int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; + u8 flags = 0; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), @@ -4772,7 +4777,12 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return -EINVAL; } - set_reg_umr_seg(*seg, mr, umr_inline); + if (check_not_free) + flags |= MLX5_UMR_CHECK_NOT_FREE; + if (umr_inline) + flags |= MLX5_UMR_INLINE; + + set_reg_umr_seg(*seg, mr, flags); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); @@ -4898,8 +4908,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = dev->mdev; + struct ib_reg_wr reg_pi_wr; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *pi_mr; + struct mlx5_ib_mr pa_pi_mr; + struct ib_sig_attrs *sig_attrs; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; void *cur_edge; @@ -4953,7 +4967,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - if (wr->opcode == IB_WR_REG_MR) { + if (wr->opcode == IB_WR_REG_MR || + wr->opcode == IB_WR_REG_MR_INTEGRITY) { fence = dev->umr_fence; next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; } else { @@ -5003,7 +5018,7 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); err = set_reg_wr(qp, reg_wr(wr), &seg, &size, - &cur_edge); + &cur_edge, true); if (err) { *bad_wr = wr; goto out; @@ -5011,26 +5026,82 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, num_sge = 0; break; - case IB_WR_REG_SIG_MR: - qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; - mr = to_mmr(sig_handover_wr(wr)->sig_mr); - + case IB_WR_REG_MR_INTEGRITY: + qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY; + + mr = to_mmr(reg_wr(wr)->mr); + pi_mr = mr->pi_mr; + + if (pi_mr) { + memset(®_pi_wr, 0, + sizeof(struct ib_reg_wr)); + + reg_pi_wr.mr = &pi_mr->ibmr; + reg_pi_wr.access = reg_wr(wr)->access; + reg_pi_wr.key = pi_mr->ibmr.rkey; + + ctrl->imm = cpu_to_be32(reg_pi_wr.key); + /* UMR for data + prot registration */ + err = set_reg_wr(qp, ®_pi_wr, &seg, + &size, &cur_edge, + false); + if (err) { + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, + cur_edge, idx, wr->wr_id, + nreq, fence, + MLX5_OPCODE_UMR); + + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, &cur_edge, + nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + } else { + memset(&pa_pi_mr, 0, + sizeof(struct mlx5_ib_mr)); + /* No UMR, use local_dma_lkey */ + pa_pi_mr.ibmr.lkey = + mr->ibmr.pd->local_dma_lkey; + + pa_pi_mr.ndescs = mr->ndescs; + pa_pi_mr.data_length = mr->data_length; + pa_pi_mr.data_iova = mr->data_iova; + if (mr->meta_ndescs) { + pa_pi_mr.meta_ndescs = + mr->meta_ndescs; + pa_pi_mr.meta_length = + mr->meta_length; + pa_pi_mr.pi_iova = mr->pi_iova; + } + + pa_pi_mr.ibmr.length = mr->ibmr.length; + mr->pi_mr = &pa_pi_mr; + } ctrl->imm = cpu_to_be32(mr->ibmr.rkey); - err = set_sig_umr_wr(wr, qp, &seg, &size, - &cur_edge); + /* UMR for sig MR */ + err = set_pi_umr_wr(wr, qp, &seg, &size, + &cur_edge); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, fence, MLX5_OPCODE_UMR); + /* * SET_PSV WQEs are not signaled and solicited * on error */ + sig_attrs = mr->ibmr.sig_attrs; err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, nreq, false, true); @@ -5040,19 +5111,18 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, *bad_wr = wr; goto out; } - - err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem, - mr->sig->psv_memory.psv_idx, &seg, - &size); + err = set_psv_wr(&sig_attrs->mem, + mr->sig->psv_memory.psv_idx, + &seg, &size); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, + wr->wr_id, nreq, next_fence, MLX5_OPCODE_SET_PSV); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, nreq, false, true); @@ -5062,20 +5132,20 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, *bad_wr = wr; goto out; } - - err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, - mr->sig->psv_wire.psv_idx, &seg, - &size); + err = set_psv_wr(&sig_attrs->wire, + mr->sig->psv_wire.psv_idx, + &seg, &size); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, + wr->wr_id, nreq, next_fence, MLX5_OPCODE_SET_PSV); - qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + + qp->next_fence = + MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -6047,7 +6117,7 @@ err: return ERR_PTR(err); } -int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) +void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); @@ -6055,8 +6125,6 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); destroy_user_rq(dev, wq->pd, rwq, udata); kfree(rwq); - - return 0; } struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, |