aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/cma_configfs.c12
-rw-r--r--drivers/infiniband/core/counters.c62
-rw-r--r--drivers/infiniband/core/nldev.c4
-rw-r--r--drivers/infiniband/core/rw.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c49
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c29
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h2
-rw-r--r--drivers/infiniband/hw/cxgb4/restrack.c2
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c4
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c115
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c64
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c226
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h94
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c16
-rw-r--r--drivers/infiniband/sw/siw/siw.h2
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c271
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c26
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c4
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c20
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c10
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c2
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c11
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c120
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h3
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h5
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c5
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c20
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c32
-rw-r--r--include/rdma/rdma_counter.h3
36 files changed, 723 insertions, 529 deletions
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 97a77ea8d3c9..e0d5e3bae458 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -204,7 +204,6 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
unsigned int i;
unsigned int ports_num;
struct cma_dev_port_group *ports;
- int err;
ibdev = cma_get_ib_dev(cma_dev);
@@ -215,10 +214,8 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
GFP_KERNEL);
- if (!ports) {
- err = -ENOMEM;
- goto free;
- }
+ if (!ports)
+ return -ENOMEM;
for (i = 0; i < ports_num; i++) {
char port_str[10];
@@ -234,12 +231,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
}
cma_dev_group->ports = ports;
-
return 0;
-free:
- kfree(ports);
- cma_dev_group->ports = NULL;
- return err;
}
static void release_cma_dev(struct config_item *item)
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 92745522250e..2b9a1ee7a160 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -10,30 +10,35 @@
#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
-static int __counter_set_mode(struct rdma_counter_mode *curr,
+static int __counter_set_mode(struct rdma_port_counter *port_counter,
enum rdma_nl_counter_mode new_mode,
enum rdma_nl_counter_mask new_mask)
{
- if ((new_mode == RDMA_COUNTER_MODE_AUTO) &&
- ((new_mask & (~ALL_AUTO_MODE_MASKS)) ||
- (curr->mode != RDMA_COUNTER_MODE_NONE)))
- return -EINVAL;
+ if (new_mode == RDMA_COUNTER_MODE_AUTO && port_counter->num_counters)
+ if (new_mask & ~ALL_AUTO_MODE_MASKS ||
+ port_counter->mode.mode != RDMA_COUNTER_MODE_NONE)
+ return -EINVAL;
- curr->mode = new_mode;
- curr->mask = new_mask;
+ port_counter->mode.mode = new_mode;
+ port_counter->mode.mask = new_mask;
return 0;
}
/**
* rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
*
- * When @on is true, the @mask must be set; When @on is false, it goes
- * into manual mode if there's any counter, so that the user is able to
- * manually access them.
+ * @dev: Device to operate
+ * @port: Port to use
+ * @mask: Mask to configure
+ * @extack: Message to the user
+ *
+ * Return 0 on success.
*/
int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
- bool on, enum rdma_nl_counter_mask mask)
+ enum rdma_nl_counter_mask mask,
+ struct netlink_ext_ack *extack)
{
+ enum rdma_nl_counter_mode mode = RDMA_COUNTER_MODE_AUTO;
struct rdma_port_counter *port_counter;
int ret;
@@ -42,23 +47,23 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
return -EOPNOTSUPP;
mutex_lock(&port_counter->lock);
- if (on) {
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_AUTO, mask);
- } else {
- if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
- ret = -EINVAL;
- goto out;
- }
+ if (mask) {
+ ret = __counter_set_mode(port_counter, mode, mask);
+ if (ret)
+ NL_SET_ERR_MSG(
+ extack,
+ "Turning on auto mode is not allowed when there is bound QP");
+ goto out;
+ }
- if (port_counter->num_counters)
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_MANUAL, 0);
- else
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_NONE, 0);
+ if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
+ ret = -EINVAL;
+ goto out;
}
+ mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
+ RDMA_COUNTER_MODE_NONE;
+ ret = __counter_set_mode(port_counter, mode, 0);
out:
mutex_unlock(&port_counter->lock);
return ret;
@@ -122,8 +127,8 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port,
mutex_lock(&port_counter->lock);
switch (mode) {
case RDMA_COUNTER_MODE_MANUAL:
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_MANUAL, 0);
+ ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
+ 0);
if (ret) {
mutex_unlock(&port_counter->lock);
goto err_mode;
@@ -170,8 +175,7 @@ static void rdma_counter_free(struct rdma_counter *counter)
port_counter->num_counters--;
if (!port_counter->num_counters &&
(port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
- __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE,
- 0);
+ __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0);
mutex_unlock(&port_counter->lock);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 08366e254b1d..d306049c22a2 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1768,9 +1768,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
mask = nla_get_u32(
tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
-
- ret = rdma_counter_set_auto_mode(device, port,
- mask ? true : false, mask);
+ ret = rdma_counter_set_auto_mode(device, port, mask, extack);
if (ret)
goto err_msg;
} else {
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index a96030b784eb..31156e22d3e7 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -410,7 +410,7 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
ctx->type = RDMA_RW_SIG_MR;
ctx->nr_ops = 1;
- ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL);
+ ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL);
if (!ctx->reg) {
ret = -ENOMEM;
goto out_unmap_prot_sg;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 401bdc9e931e..ba515efd4fdc 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -469,7 +469,6 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
struct bnxt_re_mr *mr = NULL;
dma_addr_t dma_addr = 0;
struct ib_mw *mw;
- u64 pbl_tbl;
int rc;
dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
@@ -504,9 +503,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
mr->ib_mr.lkey = mr->qplib_mr.lkey;
mr->qplib_mr.va = (u64)(unsigned long)fence->va;
mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
- pbl_tbl = dma_addr;
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
- BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL,
+ BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n");
goto fail;
@@ -3589,7 +3587,6 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mr *mr;
- u64 pbl = 0;
int rc;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
@@ -3608,7 +3605,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
mr->qplib_mr.hwq.level = PBL_LVL_MAX;
mr->qplib_mr.total_size = -1; /* Infinte length */
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false,
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0,
PAGE_SIZE);
if (rc)
goto fail_mr;
@@ -3779,19 +3776,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
return rc;
}
-static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
- int page_shift)
-{
- u64 *pbl_tbl = pbl_tbl_orig;
- u64 page_size = BIT_ULL(page_shift);
- struct ib_block_iter biter;
-
- rdma_umem_for_each_dma_block(umem, &biter, page_size)
- *pbl_tbl++ = rdma_block_iter_dma_address(&biter);
-
- return pbl_tbl - pbl_tbl_orig;
-}
-
/* uverbs */
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
@@ -3801,7 +3785,6 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mr *mr;
struct ib_umem *umem;
- u64 *pbl_tbl = NULL;
unsigned long page_size;
int umem_pgs, rc;
@@ -3846,39 +3829,19 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
}
mr->qplib_mr.total_size = length;
- if (page_size == BNXT_RE_PAGE_SIZE_4K &&
- length > BNXT_RE_MAX_MR_SIZE_LOW) {
- ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu",
- length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
- rc = -EINVAL;
- goto free_umem;
- }
-
umem_pgs = ib_umem_num_dma_blocks(umem, page_size);
- pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL);
- if (!pbl_tbl) {
- rc = -ENOMEM;
- goto free_umem;
- }
-
- /* Map umem buf ptrs to the PBL */
- umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size));
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
- umem_pgs, false, page_size);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem,
+ umem_pgs, page_size);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to register user MR");
- goto fail;
+ goto free_umem;
}
- kfree(pbl_tbl);
-
mr->ib_mr.lkey = mr->qplib_mr.lkey;
mr->ib_mr.rkey = mr->qplib_mr.lkey;
atomic_inc(&rdev->mr_count);
return &mr->ib_mr;
-fail:
- kfree(pbl_tbl);
free_umem:
ib_umem_release(umem);
free_mrw:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 6316179583a6..049b3576302b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -650,42 +650,32 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
}
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
- u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size)
+ struct ib_umem *umem, int num_pbls, u32 buf_pg_size)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct bnxt_qplib_hwq_attr hwq_attr = {};
struct bnxt_qplib_sg_info sginfo = {};
struct creq_register_mr_resp resp;
struct cmdq_register_mr req;
- int pg_ptrs, pages, i, rc;
u16 cmd_flags = 0, level;
- dma_addr_t **pbl_ptr;
+ int pages, rc;
u32 pg_size;
if (num_pbls) {
+ pages = roundup_pow_of_two(num_pbls);
/* Allocate memory for the non-leaf pages to store buf ptrs.
* Non-leaf pages always uses system PAGE_SIZE
*/
- pg_ptrs = roundup_pow_of_two(num_pbls);
- pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
- if (!pages)
- pages++;
-
- if (pages > MAX_PBL_LVL_1_PGS) {
- dev_err(&res->pdev->dev,
- "SP: Reg MR: pages requested (0x%x) exceeded max (0x%x)\n",
- pages, MAX_PBL_LVL_1_PGS);
- return -ENOMEM;
- }
/* Free the hwq if it already exist, must be a rereg */
if (mr->hwq.max_elements)
bnxt_qplib_free_hwq(res, &mr->hwq);
/* Use system PAGE_SIZE */
hwq_attr.res = res;
hwq_attr.depth = pages;
- hwq_attr.stride = PAGE_SIZE;
+ hwq_attr.stride = buf_pg_size;
hwq_attr.type = HWQ_TYPE_MR;
hwq_attr.sginfo = &sginfo;
+ hwq_attr.sginfo->umem = umem;
hwq_attr.sginfo->npages = pages;
hwq_attr.sginfo->pgsize = PAGE_SIZE;
hwq_attr.sginfo->pgshft = PAGE_SHIFT;
@@ -695,11 +685,6 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
"SP: Reg MR memory allocation failed\n");
return -ENOMEM;
}
- /* Write to the hwq */
- pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr;
- for (i = 0; i < num_pbls; i++)
- pbl_ptr[PTR_PG(i)][PTR_IDX(i)] =
- (pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID;
}
RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags);
@@ -711,7 +696,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
req.pbl = 0;
pg_size = PAGE_SIZE;
} else {
- level = mr->hwq.level + 1;
+ level = mr->hwq.level;
req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
}
pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE;
@@ -728,7 +713,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
req.mr_size = cpu_to_le64(mr->total_size);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, block);
+ (void *)&resp, NULL, false);
if (rc)
goto fail;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 967890cd81f2..bc228340684f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -254,7 +254,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
bool block);
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
- u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size);
+ struct ib_umem *umem, int num_pbls, u32 buf_pg_size);
int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
struct bnxt_qplib_mrw *mr, int max);
diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
index b32e6516d65f..ff645b955a08 100644
--- a/drivers/infiniband/hw/cxgb4/restrack.c
+++ b/drivers/infiniband/hw/cxgb4/restrack.c
@@ -209,7 +209,7 @@ int c4iw_fill_res_cm_id_entry(struct sk_buff *msg,
epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data;
if (!epcp)
return 0;
- uep = kcalloc(1, sizeof(*uep), GFP_KERNEL);
+ uep = kzalloc(sizeof(*uep), GFP_KERNEL);
if (!uep)
return 0;
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 8386c84c2d92..38f311f855b5 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -242,7 +242,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
msgs[0].buf = offset_bytes;
msgs[1].addr = slave_addr;
- msgs[1].flags = I2C_M_NOSTART,
+ msgs[1].flags = I2C_M_NOSTART;
msgs[1].len = len;
msgs[1].buf = data;
break;
@@ -290,7 +290,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
msgs[0].buf = offset_bytes;
msgs[1].addr = slave_addr;
- msgs[1].flags = I2C_M_RD,
+ msgs[1].flags = I2C_M_RD;
msgs[1].len = len;
msgs[1].buf = data;
break;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index a307d4c8b15a..27ec2851160a 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1740,7 +1740,7 @@ retry:
sane = (hwhead == swhead);
if (unlikely(!sane)) {
- dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
+ dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n",
sde->this_idx,
use_dmahead ? "dma" : "kreg",
hwhead, swhead, swtail, cnt);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 8533fc2d8df2..ffb7f7e5c641 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -38,11 +38,74 @@
#include "hns_roce_hem.h"
#include "hns_roce_common.h"
+static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank)
+{
+ u32 least_load = bank[0].inuse;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
+ }
+
+ return bankid;
+}
+
+static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+ u8 bankid;
+ int id;
+
+ mutex_lock(&cq_table->bank_mutex);
+ bankid = get_least_load_bankid_for_cq(cq_table->bank);
+ bank = &cq_table->bank[bankid];
+
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ mutex_unlock(&cq_table->bank_mutex);
+ return id;
+ }
+
+ /* the lower 2 bits is bankid */
+ hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid;
+ bank->inuse++;
+ mutex_unlock(&cq_table->bank_mutex);
+
+ return 0;
+}
+
+static inline u8 get_cq_bankid(unsigned long cqn)
+{
+ /* The lower 2 bits of CQN are used to hash to different banks */
+ return (u8)(cqn & GENMASK(1, 0));
+}
+
+static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+
+ bank = &cq_table->bank[get_cq_bankid(cqn)];
+
+ ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT);
+
+ mutex_lock(&cq_table->bank_mutex);
+ bank->inuse--;
+ mutex_unlock(&cq_table->bank_mutex);
+}
+
static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cmd_mailbox *mailbox;
- struct hns_roce_cq_table *cq_table;
u64 mtts[MTT_MIN_COUNT] = { 0 };
dma_addr_t dma_handle;
int ret;
@@ -54,13 +117,6 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
return -EINVAL;
}
- cq_table = &hr_dev->cq_table;
- ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret);
- return ret;
- }
-
/* Get CQC memory HEM(Hardware Entry Memory) table */
ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
if (ret) {
@@ -110,7 +166,6 @@ err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
err_out:
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
return ret;
}
@@ -138,7 +193,6 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
wait_for_completion(&hr_cq->free);
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
@@ -298,11 +352,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
goto err_cq_buf;
}
+ ret = alloc_cqn(hr_dev, hr_cq);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
+ goto err_cq_db;
+ }
+
ret = alloc_cqc(hr_dev, hr_cq);
if (ret) {
ibdev_err(ibdev,
"failed to alloc CQ context, ret = %d.\n", ret);
- goto err_cq_db;
+ goto err_cqn;
}
/*
@@ -326,6 +386,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
err_cqc:
free_cqc(hr_dev, hr_cq);
+err_cqn:
+ free_cqn(hr_dev, hr_cq->cqn);
err_cq_db:
free_cq_db(hr_dev, hr_cq, udata);
err_cq_buf:
@@ -341,9 +403,11 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
if (hr_dev->hw->destroy_cq)
hr_dev->hw->destroy_cq(ib_cq, udata);
- free_cq_buf(hr_dev, hr_cq);
- free_cq_db(hr_dev, hr_cq, udata);
free_cqc(hr_dev, hr_cq);
+ free_cqn(hr_dev, hr_cq->cqn);
+ free_cq_db(hr_dev, hr_cq, udata);
+ free_cq_buf(hr_dev, hr_cq);
+
return 0;
}
@@ -402,18 +466,33 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
complete(&hr_cq->free);
}
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ unsigned int reserved_from_bot;
+ unsigned int i;
+ mutex_init(&cq_table->bank_mutex);
xa_init(&cq_table->array);
- return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs,
- hr_dev->caps.num_cqs - 1,
- hr_dev->caps.reserved_cqs, 0);
+ reserved_from_bot = hr_dev->caps.reserved_cqs;
+
+ for (i = 0; i < reserved_from_bot; i++) {
+ cq_table->bank[get_cq_bankid(i)].inuse++;
+ cq_table->bank[get_cq_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ ida_init(&cq_table->bank[i].ida);
+ cq_table->bank[i].max = hr_dev->caps.num_cqs /
+ HNS_ROCE_CQ_BANK_NUM - 1;
+ }
}
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap);
+ int i;
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
+ ida_destroy(&hr_dev->cq_table.bank[i].ida);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 55d538625e36..c46b330a8c0a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -119,6 +119,9 @@
#define SRQ_DB_REG 0x230
#define HNS_ROCE_QP_BANK_NUM 8
+#define HNS_ROCE_CQ_BANK_NUM 4
+
+#define CQ_BANKID_SHIFT 2
/* The chip implementation of the consumer index is calculated
* according to twice the actual EQ depth
@@ -536,9 +539,10 @@ struct hns_roce_qp_table {
};
struct hns_roce_cq_table {
- struct hns_roce_bitmap bitmap;
struct xarray array;
struct hns_roce_hem_table table;
+ struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM];
+ struct mutex bank_mutex;
};
struct hns_roce_srq_table {
@@ -779,7 +783,7 @@ struct hns_roce_caps {
u32 max_cqes;
u32 min_cqes;
u32 min_wqes;
- int reserved_cqs;
+ u32 reserved_cqs;
int reserved_srqs;
int num_aeq_vectors;
int num_comp_vectors;
@@ -1164,7 +1168,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 833e1f259936..4c068899c52b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -361,7 +361,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,
} else if (unlikely(hr_qp->state == IB_QPS_RESET ||
hr_qp->state == IB_QPS_INIT ||
hr_qp->state == IB_QPS_RTR)) {
- ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n",
+ ibdev_err(ibdev, "failed to post WQE, QP state %u!\n",
hr_qp->state);
return -EINVAL;
} else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index d9179bae4989..2b78b1ff63d3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -748,11 +748,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
goto err_pd_table_free;
}
- ret = hns_roce_init_cq_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to init completion queue table.\n");
- goto err_mr_table_free;
- }
+ hns_roce_init_cq_table(hr_dev);
ret = hns_roce_init_qp_table(hr_dev);
if (ret) {
@@ -777,8 +773,6 @@ err_qp_table_free:
err_cq_table_free:
hns_roce_cleanup_cq_table(hr_dev);
-
-err_mr_table_free:
hns_roce_cleanup_mr_table(hr_dev);
err_pd_table_free:
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index c02315aed8d1..5be47ce7d319 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -7,45 +7,61 @@
#include "rxe.h"
#include "rxe_loc.h"
+/* caller should hold mc_grp_pool->pool_lock */
+static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe,
+ struct rxe_pool *pool,
+ union ib_gid *mgid)
+{
+ int err;
+ struct rxe_mc_grp *grp;
+
+ grp = rxe_alloc_nl(&rxe->mc_grp_pool);
+ if (!grp)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&grp->qp_list);
+ spin_lock_init(&grp->mcg_lock);
+ grp->rxe = rxe;
+ rxe_add_key_nl(grp, mgid);
+
+ err = rxe_mcast_add(rxe, mgid);
+ if (unlikely(err)) {
+ rxe_drop_key_nl(grp);
+ rxe_drop_ref(grp);
+ return ERR_PTR(err);
+ }
+
+ return grp;
+}
+
int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
struct rxe_mc_grp **grp_p)
{
int err;
struct rxe_mc_grp *grp;
+ struct rxe_pool *pool = &rxe->mc_grp_pool;
+ unsigned long flags;
- if (rxe->attr.max_mcast_qp_attach == 0) {
- err = -EINVAL;
- goto err1;
- }
+ if (rxe->attr.max_mcast_qp_attach == 0)
+ return -EINVAL;
- grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
+ write_lock_irqsave(&pool->pool_lock, flags);
+
+ grp = rxe_pool_get_key_nl(pool, mgid);
if (grp)
goto done;
- grp = rxe_alloc(&rxe->mc_grp_pool);
- if (!grp) {
- err = -ENOMEM;
- goto err1;
+ grp = create_grp(rxe, pool, mgid);
+ if (IS_ERR(grp)) {
+ write_unlock_irqrestore(&pool->pool_lock, flags);
+ err = PTR_ERR(grp);
+ return err;
}
- INIT_LIST_HEAD(&grp->qp_list);
- spin_lock_init(&grp->mcg_lock);
- grp->rxe = rxe;
-
- rxe_add_key(grp, mgid);
-
- err = rxe_mcast_add(rxe, mgid);
- if (err)
- goto err2;
-
done:
+ write_unlock_irqrestore(&pool->pool_lock, flags);
*grp_p = grp;
return 0;
-
-err2:
- rxe_drop_ref(grp);
-err1:
- return err;
}
int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index b374eb53e2fe..d26730eec720 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -15,21 +15,25 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_UC] = {
.name = "rxe-uc",
.size = sizeof(struct rxe_ucontext),
+ .elem_offset = offsetof(struct rxe_ucontext, pelem),
.flags = RXE_POOL_NO_ALLOC,
},
[RXE_TYPE_PD] = {
.name = "rxe-pd",
.size = sizeof(struct rxe_pd),
+ .elem_offset = offsetof(struct rxe_pd, pelem),
.flags = RXE_POOL_NO_ALLOC,
},
[RXE_TYPE_AH] = {
.name = "rxe-ah",
.size = sizeof(struct rxe_ah),
- .flags = RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC,
+ .elem_offset = offsetof(struct rxe_ah, pelem),
+ .flags = RXE_POOL_NO_ALLOC,
},
[RXE_TYPE_SRQ] = {
.name = "rxe-srq",
.size = sizeof(struct rxe_srq),
+ .elem_offset = offsetof(struct rxe_srq, pelem),
.flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
.min_index = RXE_MIN_SRQ_INDEX,
.max_index = RXE_MAX_SRQ_INDEX,
@@ -37,6 +41,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_QP] = {
.name = "rxe-qp",
.size = sizeof(struct rxe_qp),
+ .elem_offset = offsetof(struct rxe_qp, pelem),
.cleanup = rxe_qp_cleanup,
.flags = RXE_POOL_INDEX,
.min_index = RXE_MIN_QP_INDEX,
@@ -45,12 +50,14 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_CQ] = {
.name = "rxe-cq",
.size = sizeof(struct rxe_cq),
+ .elem_offset = offsetof(struct rxe_cq, pelem),
.flags = RXE_POOL_NO_ALLOC,
.cleanup = rxe_cq_cleanup,
},
[RXE_TYPE_MR] = {
.name = "rxe-mr",
.size = sizeof(struct rxe_mem),
+ .elem_offset = offsetof(struct rxe_mem, pelem),
.cleanup = rxe_mem_cleanup,
.flags = RXE_POOL_INDEX,
.max_index = RXE_MAX_MR_INDEX,
@@ -59,6 +66,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_MW] = {
.name = "rxe-mw",
.size = sizeof(struct rxe_mem),
+ .elem_offset = offsetof(struct rxe_mem, pelem),
.flags = RXE_POOL_INDEX,
.max_index = RXE_MAX_MW_INDEX,
.min_index = RXE_MIN_MW_INDEX,
@@ -66,6 +74,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_MC_GRP] = {
.name = "rxe-mc_grp",
.size = sizeof(struct rxe_mc_grp),
+ .elem_offset = offsetof(struct rxe_mc_grp, pelem),
.cleanup = rxe_mc_cleanup,
.flags = RXE_POOL_KEY,
.key_offset = offsetof(struct rxe_mc_grp, mgid),
@@ -74,6 +83,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_MC_ELEM] = {
.name = "rxe-mc_elem",
.size = sizeof(struct rxe_mc_elem),
+ .elem_offset = offsetof(struct rxe_mc_elem, pelem),
.flags = RXE_POOL_ATOMIC,
},
};
@@ -94,18 +104,18 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
goto out;
}
- pool->max_index = max;
- pool->min_index = min;
+ pool->index.max_index = max;
+ pool->index.min_index = min;
size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
- pool->table = kmalloc(size, GFP_KERNEL);
- if (!pool->table) {
+ pool->index.table = kmalloc(size, GFP_KERNEL);
+ if (!pool->index.table) {
err = -ENOMEM;
goto out;
}
- pool->table_size = size;
- bitmap_zero(pool->table, max - min + 1);
+ pool->index.table_size = size;
+ bitmap_zero(pool->index.table, max - min + 1);
out:
return err;
@@ -127,7 +137,8 @@ int rxe_pool_init(
pool->max_elem = max_elem;
pool->elem_size = ALIGN(size, RXE_POOL_ALIGN);
pool->flags = rxe_type_info[type].flags;
- pool->tree = RB_ROOT;
+ pool->index.tree = RB_ROOT;
+ pool->key.tree = RB_ROOT;
pool->cleanup = rxe_type_info[type].cleanup;
atomic_set(&pool->num_elem, 0);
@@ -145,8 +156,8 @@ int rxe_pool_init(
}
if (rxe_type_info[type].flags & RXE_POOL_KEY) {
- pool->key_offset = rxe_type_info[type].key_offset;
- pool->key_size = rxe_type_info[type].key_size;
+ pool->key.key_offset = rxe_type_info[type].key_offset;
+ pool->key.key_size = rxe_type_info[type].key_size;
}
pool->state = RXE_POOL_STATE_VALID;
@@ -160,7 +171,7 @@ static void rxe_pool_release(struct kref *kref)
struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
pool->state = RXE_POOL_STATE_INVALID;
- kfree(pool->table);
+ kfree(pool->index.table);
}
static void rxe_pool_put(struct rxe_pool *pool)
@@ -185,27 +196,27 @@ void rxe_pool_cleanup(struct rxe_pool *pool)
static u32 alloc_index(struct rxe_pool *pool)
{
u32 index;
- u32 range = pool->max_index - pool->min_index + 1;
+ u32 range = pool->index.max_index - pool->index.min_index + 1;
- index = find_next_zero_bit(pool->table, range, pool->last);
+ index = find_next_zero_bit(pool->index.table, range, pool->index.last);
if (index >= range)
- index = find_first_zero_bit(pool->table, range);
+ index = find_first_zero_bit(pool->index.table, range);
WARN_ON_ONCE(index >= range);
- set_bit(index, pool->table);
- pool->last = index;
- return index + pool->min_index;
+ set_bit(index, pool->index.table);
+ pool->index.last = index;
+ return index + pool->index.min_index;
}
static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
- struct rb_node **link = &pool->tree.rb_node;
+ struct rb_node **link = &pool->index.tree.rb_node;
struct rb_node *parent = NULL;
struct rxe_pool_entry *elem;
while (*link) {
parent = *link;
- elem = rb_entry(parent, struct rxe_pool_entry, node);
+ elem = rb_entry(parent, struct rxe_pool_entry, index_node);
if (elem->index == new->index) {
pr_warn("element already exists!\n");
@@ -218,25 +229,25 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
link = &(*link)->rb_right;
}
- rb_link_node(&new->node, parent, link);
- rb_insert_color(&new->node, &pool->tree);
+ rb_link_node(&new->index_node, parent, link);
+ rb_insert_color(&new->index_node, &pool->index.tree);
out:
return;
}
static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
- struct rb_node **link = &pool->tree.rb_node;
+ struct rb_node **link = &pool->key.tree.rb_node;
struct rb_node *parent = NULL;
struct rxe_pool_entry *elem;
int cmp;
while (*link) {
parent = *link;
- elem = rb_entry(parent, struct rxe_pool_entry, node);
+ elem = rb_entry(parent, struct rxe_pool_entry, key_node);
- cmp = memcmp((u8 *)elem + pool->key_offset,
- (u8 *)new + pool->key_offset, pool->key_size);
+ cmp = memcmp((u8 *)elem + pool->key.key_offset,
+ (u8 *)new + pool->key.key_offset, pool->key.key_size);
if (cmp == 0) {
pr_warn("key already exists!\n");
@@ -249,73 +260,95 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
link = &(*link)->rb_right;
}
- rb_link_node(&new->node, parent, link);
- rb_insert_color(&new->node, &pool->tree);
+ rb_link_node(&new->key_node, parent, link);
+ rb_insert_color(&new->key_node, &pool->key.tree);
out:
return;
}
-void rxe_add_key(void *arg, void *key)
+void __rxe_add_key_nl(struct rxe_pool_entry *elem, void *key)
{
- struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
- unsigned long flags;
- write_lock_irqsave(&pool->pool_lock, flags);
- memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
+ memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
insert_key(pool, elem);
- write_unlock_irqrestore(&pool->pool_lock, flags);
}
-void rxe_drop_key(void *arg)
+void __rxe_add_key(struct rxe_pool_entry *elem, void *key)
{
- struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
write_lock_irqsave(&pool->pool_lock, flags);
- rb_erase(&elem->node, &pool->tree);
+ __rxe_add_key_nl(elem, key);
write_unlock_irqrestore(&pool->pool_lock, flags);
}
-void rxe_add_index(void *arg)
+void __rxe_drop_key_nl(struct rxe_pool_entry *elem)
+{
+ struct rxe_pool *pool = elem->pool;
+
+ rb_erase(&elem->key_node, &pool->key.tree);
+}
+
+void __rxe_drop_key(struct rxe_pool_entry *elem)
{
- struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
write_lock_irqsave(&pool->pool_lock, flags);
+ __rxe_drop_key_nl(elem);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void __rxe_add_index_nl(struct rxe_pool_entry *elem)
+{
+ struct rxe_pool *pool = elem->pool;
+
elem->index = alloc_index(pool);
insert_index(pool, elem);
+}
+
+void __rxe_add_index(struct rxe_pool_entry *elem)
+{
+ struct rxe_pool *pool = elem->pool;
+ unsigned long flags;
+
+ write_lock_irqsave(&pool->pool_lock, flags);
+ __rxe_add_index_nl(elem);
write_unlock_irqrestore(&pool->pool_lock, flags);
}
-void rxe_drop_index(void *arg)
+void __rxe_drop_index_nl(struct rxe_pool_entry *elem)
+{
+ struct rxe_pool *pool = elem->pool;
+
+ clear_bit(elem->index - pool->index.min_index, pool->index.table);
+ rb_erase(&elem->index_node, &pool->index.tree);
+}
+
+void __rxe_drop_index(struct rxe_pool_entry *elem)
{
- struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
write_lock_irqsave(&pool->pool_lock, flags);
- clear_bit(elem->index - pool->min_index, pool->table);
- rb_erase(&elem->node, &pool->tree);
+ __rxe_drop_index_nl(elem);
write_unlock_irqrestore(&pool->pool_lock, flags);
}
-void *rxe_alloc(struct rxe_pool *pool)
+void *rxe_alloc_nl(struct rxe_pool *pool)
{
+ struct rxe_type_info *info = &rxe_type_info[pool->type];
struct rxe_pool_entry *elem;
- unsigned long flags;
+ u8 *obj;
might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
- read_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != RXE_POOL_STATE_VALID) {
- read_unlock_irqrestore(&pool->pool_lock, flags);
+ if (pool->state != RXE_POOL_STATE_VALID)
return NULL;
- }
+
kref_get(&pool->ref_cnt);
- read_unlock_irqrestore(&pool->pool_lock, flags);
if (!ib_device_try_get(&pool->rxe->ib_dev))
goto out_put_pool;
@@ -323,16 +356,17 @@ void *rxe_alloc(struct rxe_pool *pool)
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
goto out_cnt;
- elem = kzalloc(rxe_type_info[pool->type].size,
- (pool->flags & RXE_POOL_ATOMIC) ?
- GFP_ATOMIC : GFP_KERNEL);
- if (!elem)
+ obj = kzalloc(info->size, (pool->flags & RXE_POOL_ATOMIC) ?
+ GFP_ATOMIC : GFP_KERNEL);
+ if (!obj)
goto out_cnt;
+ elem = (struct rxe_pool_entry *)(obj + info->elem_offset);
+
elem->pool = pool;
kref_init(&elem->ref_cnt);
- return elem;
+ return obj;
out_cnt:
atomic_dec(&pool->num_elem);
@@ -342,11 +376,21 @@ out_put_pool:
return NULL;
}
-int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
+void *rxe_alloc(struct rxe_pool *pool)
{
+ u8 *obj;
unsigned long flags;
- might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
+ read_lock_irqsave(&pool->pool_lock, flags);
+ obj = rxe_alloc_nl(pool);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return obj;
+}
+
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
+{
+ unsigned long flags;
read_lock_irqsave(&pool->pool_lock, flags);
if (pool->state != RXE_POOL_STATE_VALID) {
@@ -380,12 +424,17 @@ void rxe_elem_release(struct kref *kref)
struct rxe_pool_entry *elem =
container_of(kref, struct rxe_pool_entry, ref_cnt);
struct rxe_pool *pool = elem->pool;
+ struct rxe_type_info *info = &rxe_type_info[pool->type];
+ u8 *obj;
if (pool->cleanup)
pool->cleanup(elem);
- if (!(pool->flags & RXE_POOL_NO_ALLOC))
- kfree(elem);
+ if (!(pool->flags & RXE_POOL_NO_ALLOC)) {
+ obj = (u8 *)elem - info->elem_offset;
+ kfree(obj);
+ }
+
atomic_dec(&pool->num_elem);
ib_device_put(&pool->rxe->ib_dev);
rxe_pool_put(pool);
@@ -393,8 +442,10 @@ void rxe_elem_release(struct kref *kref)
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
{
- struct rb_node *node = NULL;
- struct rxe_pool_entry *elem = NULL;
+ struct rxe_type_info *info = &rxe_type_info[pool->type];
+ struct rb_node *node;
+ struct rxe_pool_entry *elem;
+ u8 *obj = NULL;
unsigned long flags;
read_lock_irqsave(&pool->pool_lock, flags);
@@ -402,45 +453,49 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
if (pool->state != RXE_POOL_STATE_VALID)
goto out;
- node = pool->tree.rb_node;
+ node = pool->index.tree.rb_node;
while (node) {
- elem = rb_entry(node, struct rxe_pool_entry, node);
+ elem = rb_entry(node, struct rxe_pool_entry, index_node);
if (elem->index > index)
node = node->rb_left;
else if (elem->index < index)
node = node->rb_right;
- else {
- kref_get(&elem->ref_cnt);
+ else
break;
- }
+ }
+
+ if (node) {
+ kref_get(&elem->ref_cnt);
+ obj = (u8 *)elem - info->elem_offset;
+ } else {
+ obj = NULL;
}
out:
read_unlock_irqrestore(&pool->pool_lock, flags);
- return node ? elem : NULL;
+ return obj;
}
-void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+void *rxe_pool_get_key_nl(struct rxe_pool *pool, void *key)
{
- struct rb_node *node = NULL;
- struct rxe_pool_entry *elem = NULL;
+ struct rxe_type_info *info = &rxe_type_info[pool->type];
+ struct rb_node *node;
+ struct rxe_pool_entry *elem;
+ u8 *obj = NULL;
int cmp;
- unsigned long flags;
-
- read_lock_irqsave(&pool->pool_lock, flags);
if (pool->state != RXE_POOL_STATE_VALID)
goto out;
- node = pool->tree.rb_node;
+ node = pool->key.tree.rb_node;
while (node) {
- elem = rb_entry(node, struct rxe_pool_entry, node);
+ elem = rb_entry(node, struct rxe_pool_entry, key_node);
- cmp = memcmp((u8 *)elem + pool->key_offset,
- key, pool->key_size);
+ cmp = memcmp((u8 *)elem + pool->key.key_offset,
+ key, pool->key.key_size);
if (cmp > 0)
node = node->rb_left;
@@ -450,10 +505,25 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
break;
}
- if (node)
+ if (node) {
kref_get(&elem->ref_cnt);
+ obj = (u8 *)elem - info->elem_offset;
+ } else {
+ obj = NULL;
+ }
out:
+ return obj;
+}
+
+void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+{
+ u8 *obj = NULL;
+ unsigned long flags;
+
+ read_lock_irqsave(&pool->pool_lock, flags);
+ obj = rxe_pool_get_key_nl(pool, key);
read_unlock_irqrestore(&pool->pool_lock, flags);
- return node ? elem : NULL;
+
+ return obj;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 432745ffc8d4..373e08554c1c 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -36,6 +36,7 @@ struct rxe_pool_entry;
struct rxe_type_info {
const char *name;
size_t size;
+ size_t elem_offset;
void (*cleanup)(struct rxe_pool_entry *obj);
enum rxe_pool_flags flags;
u32 max_index;
@@ -56,8 +57,11 @@ struct rxe_pool_entry {
struct kref ref_cnt;
struct list_head list;
- /* only used if indexed or keyed */
- struct rb_node node;
+ /* only used if keyed */
+ struct rb_node key_node;
+
+ /* only used if indexed */
+ struct rb_node index_node;
u32 index;
};
@@ -74,15 +78,22 @@ struct rxe_pool {
unsigned int max_elem;
atomic_t num_elem;
- /* only used if indexed or keyed */
- struct rb_root tree;
- unsigned long *table;
- size_t table_size;
- u32 max_index;
- u32 min_index;
- u32 last;
- size_t key_offset;
- size_t key_size;
+ /* only used if indexed */
+ struct {
+ struct rb_root tree;
+ unsigned long *table;
+ size_t table_size;
+ u32 last;
+ u32 max_index;
+ u32 min_index;
+ } index;
+
+ /* only used if keyed */
+ struct {
+ struct rb_root tree;
+ size_t key_offset;
+ size_t key_size;
+ } key;
};
/* initialize a pool of objects with given limit on
@@ -98,31 +109,70 @@ void rxe_pool_cleanup(struct rxe_pool *pool);
/* allocate an object from pool */
void *rxe_alloc(struct rxe_pool *pool);
+/* allocate an object from pool - no lock */
+void *rxe_alloc_nl(struct rxe_pool *pool);
+
/* connect already allocated object to pool */
-int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
+
+#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem)
/* assign an index to an indexed object and insert object into
- * pool's rb tree
+ * pool's rb tree with and without holding the pool_lock
+ */
+void __rxe_add_index(struct rxe_pool_entry *elem);
+
+#define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem)
+
+void __rxe_add_index_nl(struct rxe_pool_entry *elem);
+
+#define rxe_add_index_nl(obj) __rxe_add_index_nl(&(obj)->pelem)
+
+/* drop an index and remove object from rb tree
+ * with and without holding the pool_lock
*/
-void rxe_add_index(void *elem);
+void __rxe_drop_index(struct rxe_pool_entry *elem);
-/* drop an index and remove object from rb tree */
-void rxe_drop_index(void *elem);
+#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem)
+
+void __rxe_drop_index_nl(struct rxe_pool_entry *elem);
+
+#define rxe_drop_index_nl(obj) __rxe_drop_index_nl(&(obj)->pelem)
/* assign a key to a keyed object and insert object into
- * pool's rb tree
+ * pool's rb tree with and without holding pool_lock
*/
-void rxe_add_key(void *elem, void *key);
+void __rxe_add_key(struct rxe_pool_entry *elem, void *key);
+
+#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key)
+
+void __rxe_add_key_nl(struct rxe_pool_entry *elem, void *key);
+
+#define rxe_add_key_nl(obj, key) __rxe_add_key_nl(&(obj)->pelem, key)
+
+/* remove elem from rb tree with and without holding pool_lock */
+void __rxe_drop_key(struct rxe_pool_entry *elem);
-/* remove elem from rb tree */
-void rxe_drop_key(void *elem);
+#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem)
-/* lookup an indexed object from index. takes a reference on object */
+void __rxe_drop_key_nl(struct rxe_pool_entry *elem);
+
+#define rxe_drop_key_nl(obj) __rxe_drop_key_nl(&(obj)->pelem)
+
+/* lookup an indexed object from index with and without holding pool_lock.
+ * takes a reference on object
+ */
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
-/* lookup keyed object from key. takes a reference on the object */
+void *rxe_pool_get_index_nl(struct rxe_pool *pool, u32 index);
+
+/* lookup keyed object from key with and without holding pool_lock.
+ * takes a reference on the objecti
+ */
void *rxe_pool_get_key(struct rxe_pool *pool, void *key);
+void *rxe_pool_get_key_nl(struct rxe_pool *pool, void *key);
+
/* cleanup an object when all references are dropped */
void rxe_elem_release(struct kref *kref);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 656a5b4be847..65c8df812aeb 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -62,6 +62,17 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
struct rxe_port *port;
int port_num = init->port_num;
+ switch(init->qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
if (!init->recv_cq || !init->send_cq) {
pr_warn("missing cq\n");
goto err1;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index a031514e2f41..7483a33bcec5 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -106,12 +106,12 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
return IB_LINK_LAYER_ETHERNET;
}
-static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
+static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
{
- struct rxe_dev *rxe = to_rdev(uctx->device);
- struct rxe_ucontext *uc = to_ruc(uctx);
+ struct rxe_dev *rxe = to_rdev(ibuc->device);
+ struct rxe_ucontext *uc = to_ruc(ibuc);
- return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem);
+ return rxe_add_to_pool(&rxe->uc_pool, uc);
}
static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
@@ -145,7 +145,7 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
- return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem);
+ return rxe_add_to_pool(&rxe->pd_pool, pd);
}
static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
@@ -169,7 +169,7 @@ static int rxe_create_ah(struct ib_ah *ibah,
if (err)
return err;
- err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem);
+ err = rxe_add_to_pool(&rxe->ah_pool, ah);
if (err)
return err;
@@ -273,7 +273,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
if (err)
goto err1;
- err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem);
+ err = rxe_add_to_pool(&rxe->srq_pool, srq);
if (err)
goto err1;
@@ -774,7 +774,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
if (err)
return err;
- return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem);
+ return rxe_add_to_pool(&rxe->cq_pool, cq);
}
static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index adda78996219..368959ae9a8c 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -653,7 +653,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
{
struct siw_sqe *orq_e = orq_get_tail(qp);
- if (orq_e && READ_ONCE(orq_e->flags) == 0)
+ if (READ_ONCE(orq_e->flags) == 0)
return orq_e;
return NULL;
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index ee95cf29179d..81a294269592 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -357,7 +357,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
sizeof(base_dev->iw_ifname));
/* Disable TCP port mapping */
- base_dev->iw_driver_flags = IW_F_NO_PORT_MAP,
+ base_dev->iw_driver_flags = IW_F_NO_PORT_MAP;
sdev->attrs.max_qp = SIW_MAX_QP;
sdev->attrs.max_qp_wr = SIW_MAX_QP_WR;
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 875d36d4b1c6..ddb2e66f9f13 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -199,26 +199,26 @@ void siw_qp_llp_write_space(struct sock *sk)
static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
{
- irq_size = roundup_pow_of_two(irq_size);
- orq_size = roundup_pow_of_two(orq_size);
-
- qp->attrs.irq_size = irq_size;
- qp->attrs.orq_size = orq_size;
-
- qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
- if (!qp->irq) {
- siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size);
- qp->attrs.irq_size = 0;
- return -ENOMEM;
+ if (irq_size) {
+ irq_size = roundup_pow_of_two(irq_size);
+ qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
+ if (!qp->irq) {
+ qp->attrs.irq_size = 0;
+ return -ENOMEM;
+ }
}
- qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
- if (!qp->orq) {
- siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size);
- qp->attrs.orq_size = 0;
- qp->attrs.irq_size = 0;
- vfree(qp->irq);
- return -ENOMEM;
+ if (orq_size) {
+ orq_size = roundup_pow_of_two(orq_size);
+ qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
+ if (!qp->orq) {
+ qp->attrs.orq_size = 0;
+ qp->attrs.irq_size = 0;
+ vfree(qp->irq);
+ return -ENOMEM;
+ }
}
+ qp->attrs.irq_size = irq_size;
+ qp->attrs.orq_size = orq_size;
siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
return 0;
}
@@ -288,13 +288,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
if (ctrl & MPA_V2_RDMA_WRITE_RTR)
wqe->sqe.opcode = SIW_OP_WRITE;
else if (ctrl & MPA_V2_RDMA_READ_RTR) {
- struct siw_sqe *rreq;
+ struct siw_sqe *rreq = NULL;
wqe->sqe.opcode = SIW_OP_READ;
spin_lock(&qp->orq_lock);
- rreq = orq_get_free(qp);
+ if (qp->attrs.orq_size)
+ rreq = orq_get_free(qp);
if (rreq) {
siw_read_to_orq(rreq, &wqe->sqe);
qp->orq_put++;
@@ -877,135 +878,88 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
rreq->num_sge = 1;
}
-/*
- * Must be called with SQ locked.
- * To avoid complete SQ starvation by constant inbound READ requests,
- * the active IRQ will not be served after qp->irq_burst, if the
- * SQ has pending work.
- */
-int siw_activate_tx(struct siw_qp *qp)
+static int siw_activate_tx_from_sq(struct siw_qp *qp)
{
- struct siw_sqe *irqe, *sqe;
+ struct siw_sqe *sqe;
struct siw_wqe *wqe = tx_wqe(qp);
int rv = 1;
- irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
-
- if (irqe->flags & SIW_WQE_VALID) {
- sqe = sq_get_next(qp);
-
- /*
- * Avoid local WQE processing starvation in case
- * of constant inbound READ request stream
- */
- if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
- qp->irq_burst = 0;
- goto skip_irq;
- }
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
-
- /* start READ RESPONSE */
- wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
- wqe->sqe.flags = 0;
- if (irqe->num_sge) {
- wqe->sqe.num_sge = 1;
- wqe->sqe.sge[0].length = irqe->sge[0].length;
- wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
- wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
- } else {
- wqe->sqe.num_sge = 0;
- }
-
- /* Retain original RREQ's message sequence number for
- * potential error reporting cases.
- */
- wqe->sqe.sge[1].length = irqe->sge[1].length;
-
- wqe->sqe.rkey = irqe->rkey;
- wqe->sqe.raddr = irqe->raddr;
+ sqe = sq_get_next(qp);
+ if (!sqe)
+ return 0;
- wqe->processed = 0;
- qp->irq_get++;
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
- /* mark current IRQ entry free */
- smp_store_mb(irqe->flags, 0);
+ /* First copy SQE to kernel private memory */
+ memcpy(&wqe->sqe, sqe, sizeof(*sqe));
+ if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ rv = -EINVAL;
goto out;
}
- sqe = sq_get_next(qp);
- if (sqe) {
-skip_irq:
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
-
- /* First copy SQE to kernel private memory */
- memcpy(&wqe->sqe, sqe, sizeof(*sqe));
-
- if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ if (wqe->sqe.flags & SIW_WQE_INLINE) {
+ if (wqe->sqe.opcode != SIW_OP_SEND &&
+ wqe->sqe.opcode != SIW_OP_WRITE) {
rv = -EINVAL;
goto out;
}
- if (wqe->sqe.flags & SIW_WQE_INLINE) {
- if (wqe->sqe.opcode != SIW_OP_SEND &&
- wqe->sqe.opcode != SIW_OP_WRITE) {
- rv = -EINVAL;
- goto out;
- }
- if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
- rv = -EINVAL;
- goto out;
- }
- wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
- wqe->sqe.sge[0].lkey = 0;
- wqe->sqe.num_sge = 1;
+ if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
+ rv = -EINVAL;
+ goto out;
}
- if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
- /* A READ cannot be fenced */
- if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode ==
- SIW_OP_READ_LOCAL_INV)) {
- siw_dbg_qp(qp, "cannot fence read\n");
- rv = -EINVAL;
- goto out;
- }
- spin_lock(&qp->orq_lock);
+ wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
+ wqe->sqe.sge[0].lkey = 0;
+ wqe->sqe.num_sge = 1;
+ }
+ if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
+ /* A READ cannot be fenced */
+ if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode ==
+ SIW_OP_READ_LOCAL_INV)) {
+ siw_dbg_qp(qp, "cannot fence read\n");
+ rv = -EINVAL;
+ goto out;
+ }
+ spin_lock(&qp->orq_lock);
- if (!siw_orq_empty(qp)) {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
+ if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
+ }
+ spin_unlock(&qp->orq_lock);
- } else if (wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
- struct siw_sqe *rreq;
+ } else if (wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
+ struct siw_sqe *rreq;
- wqe->sqe.num_sge = 1;
+ if (unlikely(!qp->attrs.orq_size)) {
+ /* We negotiated not to send READ req's */
+ rv = -EINVAL;
+ goto out;
+ }
+ wqe->sqe.num_sge = 1;
- spin_lock(&qp->orq_lock);
+ spin_lock(&qp->orq_lock);
- rreq = orq_get_free(qp);
- if (rreq) {
- /*
- * Make an immediate copy in ORQ to be ready
- * to process loopback READ reply
- */
- siw_read_to_orq(rreq, &wqe->sqe);
- qp->orq_put++;
- } else {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
+ rreq = orq_get_free(qp);
+ if (rreq) {
+ /*
+ * Make an immediate copy in ORQ to be ready
+ * to process loopback READ reply
+ */
+ siw_read_to_orq(rreq, &wqe->sqe);
+ qp->orq_put++;
+ } else {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
}
-
- /* Clear SQE, can be re-used by application */
- smp_store_mb(sqe->flags, 0);
- qp->sq_get++;
- } else {
- rv = 0;
+ spin_unlock(&qp->orq_lock);
}
+
+ /* Clear SQE, can be re-used by application */
+ smp_store_mb(sqe->flags, 0);
+ qp->sq_get++;
out:
if (unlikely(rv < 0)) {
siw_dbg_qp(qp, "error %d\n", rv);
@@ -1015,6 +969,65 @@ out:
}
/*
+ * Must be called with SQ locked.
+ * To avoid complete SQ starvation by constant inbound READ requests,
+ * the active IRQ will not be served after qp->irq_burst, if the
+ * SQ has pending work.
+ */
+int siw_activate_tx(struct siw_qp *qp)
+{
+ struct siw_sqe *irqe;
+ struct siw_wqe *wqe = tx_wqe(qp);
+
+ if (!qp->attrs.irq_size)
+ return siw_activate_tx_from_sq(qp);
+
+ irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
+
+ if (!(irqe->flags & SIW_WQE_VALID))
+ return siw_activate_tx_from_sq(qp);
+
+ /*
+ * Avoid local WQE processing starvation in case
+ * of constant inbound READ request stream
+ */
+ if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
+ qp->irq_burst = 0;
+ return siw_activate_tx_from_sq(qp);
+ }
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
+
+ /* start READ RESPONSE */
+ wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
+ wqe->sqe.flags = 0;
+ if (irqe->num_sge) {
+ wqe->sqe.num_sge = 1;
+ wqe->sqe.sge[0].length = irqe->sge[0].length;
+ wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
+ wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
+ } else {
+ wqe->sqe.num_sge = 0;
+ }
+
+ /* Retain original RREQ's message sequence number for
+ * potential error reporting cases.
+ */
+ wqe->sqe.sge[1].length = irqe->sge[1].length;
+
+ wqe->sqe.rkey = irqe->rkey;
+ wqe->sqe.raddr = irqe->raddr;
+
+ wqe->processed = 0;
+ qp->irq_get++;
+
+ /* mark current IRQ entry free */
+ smp_store_mb(irqe->flags, 0);
+
+ return 1;
+}
+
+/*
* Check if current CQ state qualifies for calling CQ completion
* handler. Must be called with CQ lock held.
*/
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index 4bd1f1f84057..60116f20653c 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -680,6 +680,10 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
}
spin_lock_irqsave(&qp->sq_lock, flags);
+ if (unlikely(!qp->attrs.irq_size)) {
+ run_sq = 0;
+ goto error_irq;
+ }
if (tx_work->wr_status == SIW_WR_IDLE) {
/*
* immediately schedule READ response w/o
@@ -712,8 +716,9 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
/* RRESP now valid as current TX wqe or placed into IRQ */
smp_store_mb(resp->flags, SIW_WQE_VALID);
} else {
- pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp),
- qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size);
+error_irq:
+ pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n",
+ qp_id(qp), qp->attrs.irq_size);
siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
RDMAP_ETYPE_REMOTE_OPERATION,
@@ -740,6 +745,9 @@ static int siw_orqe_start_rx(struct siw_qp *qp)
struct siw_sqe *orqe;
struct siw_wqe *wqe = NULL;
+ if (unlikely(!qp->attrs.orq_size))
+ return -EPROTO;
+
/* make sure ORQ indices are current */
smp_mb();
@@ -796,8 +804,8 @@ int siw_proc_rresp(struct siw_qp *qp)
*/
rv = siw_orqe_start_rx(qp);
if (rv) {
- pr_warn("siw: [QP %u]: ORQ empty at idx %d\n",
- qp_id(qp), qp->orq_get % qp->attrs.orq_size);
+ pr_warn("siw: [QP %u]: ORQ empty, size %d\n",
+ qp_id(qp), qp->attrs.orq_size);
goto error_term;
}
rv = siw_rresp_check_ntoh(srx, frx);
@@ -1290,11 +1298,13 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error)
wc_status);
siw_wqe_put_mem(wqe, SIW_OP_READ);
- if (!error)
+ if (!error) {
rv = siw_check_tx_fence(qp);
- else
- /* Disable current ORQ eleement */
- WRITE_ONCE(orq_get_current(qp)->flags, 0);
+ } else {
+ /* Disable current ORQ element */
+ if (qp->attrs.orq_size)
+ WRITE_ONCE(orq_get_current(qp)->flags, 0);
+ }
break;
case RDMAP_RDMA_READ_REQ:
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index d19d8325588b..7989c4043db4 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -1107,8 +1107,8 @@ next_wqe:
/*
* RREQ may have already been completed by inbound RRESP!
*/
- if (tx_type == SIW_OP_READ ||
- tx_type == SIW_OP_READ_LOCAL_INV) {
+ if ((tx_type == SIW_OP_READ ||
+ tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) {
/* Cleanup pending entry in ORQ */
qp->orq_put--;
qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 68fd053fc774..e389d44e5591 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -365,13 +365,23 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
if (rv)
goto err_out;
+ num_sqe = attrs->cap.max_send_wr;
+ num_rqe = attrs->cap.max_recv_wr;
+
/* All queue indices are derived from modulo operations
* on a free running 'get' (consumer) and 'put' (producer)
* unsigned counter. Having queue sizes at power of two
* avoids handling counter wrap around.
*/
- num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr);
- num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr);
+ if (num_sqe)
+ num_sqe = roundup_pow_of_two(num_sqe);
+ else {
+ /* Zero sized SQ is not supported */
+ rv = -EINVAL;
+ goto err_out;
+ }
+ if (num_rqe)
+ num_rqe = roundup_pow_of_two(num_rqe);
if (udata)
qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe));
@@ -379,7 +389,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe));
if (qp->sendq == NULL) {
- siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe);
rv = -ENOMEM;
goto err_out_xa;
}
@@ -413,7 +422,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe));
if (qp->recvq == NULL) {
- siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe);
rv = -ENOMEM;
goto err_out_xa;
}
@@ -966,9 +974,9 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
unsigned long flags;
int rv = 0;
- if (qp->srq) {
+ if (qp->srq || qp->attrs.rq_size == 0) {
*bad_wr = wr;
- return -EOPNOTSUPP; /* what else from errno.h? */
+ return -EINVAL;
}
if (!rdma_is_kernel_res(&qp->base_qp.res)) {
siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 86e4ed64e4e2..e3e4447c0f51 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -275,7 +275,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
memset(&av, 0, sizeof(av));
av.type = rdma_ah_find_type(priv->ca, priv->port);
- rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)),
+ rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid));
rdma_ah_set_port_num(&av, priv->port);
rdma_ah_set_sl(&av, mcast->mcmember.sl);
rdma_ah_set_static_rate(&av, mcast->mcmember.rate);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 2ba27221ea85..7305ed8976c2 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -71,7 +71,6 @@ static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp
return param_set_int(val, kp);
}
-
static inline bool
isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
{
@@ -79,7 +78,6 @@ isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
cmd->prot_op != TARGET_PROT_NORMAL);
}
-
static void
isert_qp_event_callback(struct ib_event *e, void *context)
{
@@ -232,8 +230,10 @@ isert_create_device_ib_res(struct isert_device *device)
}
/* Check signature cap */
- device->pi_capable = ib_dev->attrs.device_cap_flags &
- IB_DEVICE_INTEGRITY_HANDOVER ? true : false;
+ if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER)
+ device->pi_capable = true;
+ else
+ device->pi_capable = false;
return 0;
}
@@ -1993,7 +1993,7 @@ isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_domain *domain)
if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT ||
se_cmd->prot_type == TARGET_DIF_TYPE2_PROT)
domain->sig.dif.ref_remap = true;
-};
+}
static int
isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index 4933085a864a..cecf0f7cadf9 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -233,7 +233,7 @@ static void vema_get_class_port_info(struct opa_vnic_vema_port *port,
port_info = (struct opa_class_port_info *)rsp_mad->data;
memcpy(port_info, &port->class_port_info, sizeof(*port_info));
- port_info->base_version = OPA_MGMT_BASE_VERSION,
+ port_info->base_version = OPA_MGMT_BASE_VERSION;
port_info->class_version = OPA_EMA_CLASS_VERSION;
/*
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
index ba00f0de14ca..b6a0abf40589 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
@@ -408,6 +408,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess)
"%s", str);
if (err) {
pr_err("kobject_init_and_add: %d\n", err);
+ kobject_put(&sess->kobj);
return err;
}
err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group);
@@ -419,6 +420,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess)
&sess->kobj, "stats");
if (err) {
pr_err("kobject_init_and_add: %d\n", err);
+ kobject_put(&sess->stats->kobj_stats);
goto remove_group;
}
@@ -469,15 +471,12 @@ int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt)
return sysfs_create_group(&clt->dev.kobj, &rtrs_clt_attr_group);
}
-void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt)
+void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt)
{
+ sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group);
+
if (clt->kobj_paths) {
kobject_del(clt->kobj_paths);
kobject_put(clt->kobj_paths);
}
}
-
-void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt)
-{
- sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group);
-}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 67f86c405a26..7644c3f627ca 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -178,18 +178,18 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
}
/**
- * __rtrs_clt_change_state() - change the session state through session state
+ * rtrs_clt_change_state() - change the session state through session state
* machine.
*
* @sess: client session to change the state of.
* @new_state: state to change to.
*
- * returns true if successful, false if the requested state can not be set.
+ * returns true if sess's state is changed to new state, otherwise return false.
*
* Locks:
* state_wq lock must be hold.
*/
-static bool __rtrs_clt_change_state(struct rtrs_clt_sess *sess,
+static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess,
enum rtrs_clt_state new_state)
{
enum rtrs_clt_state old_state;
@@ -286,7 +286,7 @@ static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess,
spin_lock_irq(&sess->state_wq.lock);
if (sess->state == old_state)
- changed = __rtrs_clt_change_state(sess, new_state);
+ changed = rtrs_clt_change_state(sess, new_state);
spin_unlock_irq(&sess->state_wq.lock);
return changed;
@@ -494,7 +494,7 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc)
int err;
struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
- WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F);
+ WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0);
iu = container_of(wc->wr_cqe, struct rtrs_iu,
cqe);
err = rtrs_iu_post_recv(&con->c, iu);
@@ -514,7 +514,7 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
u32 buf_id;
int err;
- WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F);
+ WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0);
iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
@@ -621,12 +621,12 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
} else if (imm_type == RTRS_HB_MSG_IMM) {
WARN_ON(con->c.cid);
rtrs_send_hb_ack(&sess->s);
- if (sess->flags == RTRS_MSG_NEW_RKEY_F)
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F)
return rtrs_clt_recv_done(con, wc);
} else if (imm_type == RTRS_HB_ACK_IMM) {
WARN_ON(con->c.cid);
sess->s.hb_missed_cnt = 0;
- if (sess->flags == RTRS_MSG_NEW_RKEY_F)
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F)
return rtrs_clt_recv_done(con, wc);
} else {
rtrs_wrn(con->c.sess, "Unknown IMM type %u\n",
@@ -654,7 +654,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE ||
wc->wc_flags & IB_WC_WITH_IMM));
WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done);
- if (sess->flags == RTRS_MSG_NEW_RKEY_F) {
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F) {
if (wc->wc_flags & IB_WC_WITH_INVALIDATE)
return rtrs_clt_recv_done(con, wc);
@@ -664,7 +664,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
case IB_WC_RDMA_WRITE:
/*
* post_send() RDMA write completions of IO reqs (read/write)
- * and hb
*/
break;
@@ -680,7 +679,7 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size)
struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
for (i = 0; i < q_size; i++) {
- if (sess->flags == RTRS_MSG_NEW_RKEY_F) {
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F) {
struct rtrs_iu *iu = &con->rsp_ius[i];
err = rtrs_iu_post_recv(&con->c, iu);
@@ -1318,6 +1317,12 @@ out_err:
static void free_permits(struct rtrs_clt *clt)
{
+ if (clt->permits_map) {
+ size_t sz = clt->queue_depth;
+
+ wait_event(clt->permits_wait,
+ find_first_bit(clt->permits_map, sz) >= sz);
+ }
kfree(clt->permits_map);
clt->permits_map = NULL;
kfree(clt->permits);
@@ -1353,21 +1358,14 @@ static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess,
bool changed;
spin_lock_irq(&sess->state_wq.lock);
- *old_state = sess->state;
- changed = __rtrs_clt_change_state(sess, new_state);
+ if (old_state)
+ *old_state = sess->state;
+ changed = rtrs_clt_change_state(sess, new_state);
spin_unlock_irq(&sess->state_wq.lock);
return changed;
}
-static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess,
- enum rtrs_clt_state new_state)
-{
- enum rtrs_clt_state old_state;
-
- return rtrs_clt_change_state_get_old(sess, new_state, &old_state);
-}
-
static void rtrs_clt_hb_err_handler(struct rtrs_con *c)
{
struct rtrs_clt_con *con = container_of(c, typeof(*con), c);
@@ -1511,7 +1509,7 @@ static void destroy_con(struct rtrs_clt_con *con)
static int create_con_cq_qp(struct rtrs_clt_con *con)
{
struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
- u16 wr_queue_size;
+ u32 max_send_wr, max_recv_wr, cq_size;
int err, cq_vector;
struct rtrs_msg_rkey_rsp *rsp;
@@ -1523,7 +1521,8 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
* + 2 for drain and heartbeat
* in case qp gets into error state
*/
- wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2;
+ max_send_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2;
+ max_recv_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2;
/* We must be the first here */
if (WARN_ON(sess->s.dev))
return -EINVAL;
@@ -1555,25 +1554,29 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
/* Shared between connections */
sess->s.dev_ref++;
- wr_queue_size =
+ max_send_wr =
min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr,
/* QD * (REQ + RSP + FR REGS or INVS) + drain */
sess->queue_depth * 3 + 1);
+ max_recv_wr =
+ min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr,
+ sess->queue_depth * 3 + 1);
}
/* alloc iu to recv new rkey reply when server reports flags set */
- if (sess->flags == RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) {
- con->rsp_ius = rtrs_iu_alloc(wr_queue_size, sizeof(*rsp),
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) {
+ con->rsp_ius = rtrs_iu_alloc(max_recv_wr, sizeof(*rsp),
GFP_KERNEL, sess->s.dev->ib_dev,
DMA_FROM_DEVICE,
rtrs_clt_rdma_done);
if (!con->rsp_ius)
return -ENOMEM;
- con->queue_size = wr_queue_size;
+ con->queue_size = max_recv_wr;
}
+ cq_size = max_send_wr + max_recv_wr;
cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
- cq_vector, wr_queue_size, wr_queue_size,
- IB_POLL_SOFTIRQ);
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_SOFTIRQ);
/*
* In case of error we do not bother to clean previous allocations,
* since destroy_con_cq_qp() must be called.
@@ -1788,7 +1791,7 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con,
static void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait)
{
- if (rtrs_clt_change_state(sess, RTRS_CLT_CLOSING))
+ if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL))
queue_work(rtrs_wq, &sess->close_work);
if (wait)
flush_work(&sess->close_work);
@@ -2174,7 +2177,7 @@ static void rtrs_clt_close_work(struct work_struct *work)
cancel_delayed_work_sync(&sess->reconnect_dwork);
rtrs_clt_stop_and_destroy_conns(sess);
- rtrs_clt_change_state(sess, RTRS_CLT_CLOSED);
+ rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSED, NULL);
}
static int init_conns(struct rtrs_clt_sess *sess)
@@ -2226,7 +2229,7 @@ destroy:
* doing rdma_resolve_addr(), switch to CONNECTION_ERR state
* manually to keep reconnecting.
*/
- rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR);
+ rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
return err;
}
@@ -2243,7 +2246,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) {
rtrs_err(sess->clt, "Sess info request send failed: %s\n",
ib_wc_status_msg(wc->status));
- rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR);
+ rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
return;
}
@@ -2367,7 +2370,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
out:
rtrs_clt_update_wc_stats(con);
rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
- rtrs_clt_change_state(sess, state);
+ rtrs_clt_change_state_get_old(sess, state, NULL);
}
static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
@@ -2423,7 +2426,6 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
err = -ECONNRESET;
else
err = -ETIMEDOUT;
- goto out;
}
out:
@@ -2433,7 +2435,7 @@ out:
rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
if (unlikely(err))
/* If we've never taken async path because of malloc problems */
- rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR);
+ rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
return err;
}
@@ -2490,7 +2492,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work)
/* Stop everything */
rtrs_clt_stop_and_destroy_conns(sess);
msleep(RTRS_RECONNECT_BACKOFF);
- if (rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING)) {
+ if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING, NULL)) {
err = init_sess(sess);
if (err)
goto reconnect_again;
@@ -2499,7 +2501,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work)
return;
reconnect_again:
- if (rtrs_clt_change_state(sess, RTRS_CLT_RECONNECTING)) {
+ if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, NULL)) {
sess->stats->reconnects.fail_cnt++;
delay_ms = clt->reconnect_delay_sec * 1000;
queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
@@ -2565,11 +2567,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
clt->dev.class = rtrs_clt_dev_class;
clt->dev.release = rtrs_clt_dev_release;
err = dev_set_name(&clt->dev, "%s", sessname);
- if (err) {
- free_percpu(clt->pcpu_path);
- kfree(clt);
- return ERR_PTR(err);
- }
+ if (err)
+ goto err;
/*
* Suppress user space notification until
* sysfs files are created
@@ -2577,44 +2576,35 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
dev_set_uevent_suppress(&clt->dev, true);
err = device_register(&clt->dev);
if (err) {
- free_percpu(clt->pcpu_path);
put_device(&clt->dev);
- return ERR_PTR(err);
+ goto err;
}
clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj);
if (!clt->kobj_paths) {
- free_percpu(clt->pcpu_path);
- device_unregister(&clt->dev);
- return NULL;
+ err = -ENOMEM;
+ goto err_dev;
}
err = rtrs_clt_create_sysfs_root_files(clt);
if (err) {
- free_percpu(clt->pcpu_path);
kobject_del(clt->kobj_paths);
kobject_put(clt->kobj_paths);
- device_unregister(&clt->dev);
- return ERR_PTR(err);
+ goto err_dev;
}
dev_set_uevent_suppress(&clt->dev, false);
kobject_uevent(&clt->dev.kobj, KOBJ_ADD);
return clt;
-}
-
-static void wait_for_inflight_permits(struct rtrs_clt *clt)
-{
- if (clt->permits_map) {
- size_t sz = clt->queue_depth;
-
- wait_event(clt->permits_wait,
- find_first_bit(clt->permits_map, sz) >= sz);
- }
+err_dev:
+ device_unregister(&clt->dev);
+err:
+ free_percpu(clt->pcpu_path);
+ kfree(clt);
+ return ERR_PTR(err);
}
static void free_clt(struct rtrs_clt *clt)
{
- wait_for_inflight_permits(clt);
free_permits(clt);
free_percpu(clt->pcpu_path);
mutex_destroy(&clt->paths_ev_mutex);
@@ -2702,8 +2692,7 @@ close_all_sess:
rtrs_clt_close_conns(sess, true);
kobject_put(&sess->kobj);
}
- rtrs_clt_destroy_sysfs_root_files(clt);
- rtrs_clt_destroy_sysfs_root_folders(clt);
+ rtrs_clt_destroy_sysfs_root(clt);
free_clt(clt);
out:
@@ -2720,8 +2709,7 @@ void rtrs_clt_close(struct rtrs_clt *clt)
struct rtrs_clt_sess *sess, *tmp;
/* Firstly forbid sysfs access */
- rtrs_clt_destroy_sysfs_root_files(clt);
- rtrs_clt_destroy_sysfs_root_folders(clt);
+ rtrs_clt_destroy_sysfs_root(clt);
/* Now it is safe to iterate over all paths without locks */
list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) {
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index b8dbd701b3cb..a97a068c4c28 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -243,8 +243,7 @@ ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats,
/* rtrs-clt-sysfs.c */
int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt);
-void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt);
-void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt);
+void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt);
int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess);
void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 3f2918671dbe..d5621e6fad1b 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -303,8 +303,9 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
struct ib_send_wr *head);
int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con,
- u32 max_send_sge, int cq_vector, u16 cq_size,
- u16 wr_queue_size, enum ib_poll_context poll_ctx);
+ u32 max_send_sge, int cq_vector, int cq_size,
+ u32 max_send_wr, u32 max_recv_wr,
+ enum ib_poll_context poll_ctx);
void rtrs_cq_qp_destroy(struct rtrs_con *con);
void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index d2edff3b8f0d..0a3886629cae 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -51,6 +51,8 @@ static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj,
sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str));
rtrs_info(s, "disconnect for path %s requested\n", str);
+ /* first remove sysfs itself to avoid deadlock */
+ sysfs_remove_file_self(&sess->kobj, &attr->attr);
close_sess(sess);
return count;
@@ -234,6 +236,7 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess)
&sess->kobj, "stats");
if (err) {
rtrs_err(s, "kobject_init_and_add(): %d\n", err);
+ kobject_put(&sess->stats->kobj_stats);
return err;
}
err = sysfs_create_group(&sess->stats->kobj_stats,
@@ -290,8 +293,8 @@ remove_group:
sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group);
put_kobj:
kobject_del(&sess->kobj);
- kobject_put(&sess->kobj);
destroy_root:
+ kobject_put(&sess->kobj);
rtrs_srv_destroy_once_sysfs_root_folders(sess);
return err;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index c42fd470c4eb..918f1cf140cd 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -267,6 +267,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
WARN_ON_ONCE(rkey != wr->rkey);
wr->wr.opcode = IB_WR_RDMA_WRITE;
+ wr->wr.wr_cqe = &io_comp_cqe;
wr->wr.ex.imm_data = 0;
wr->wr.send_flags = 0;
@@ -294,6 +295,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
inv_wr.sg_list = NULL;
inv_wr.num_sge = 0;
inv_wr.opcode = IB_WR_SEND_WITH_INV;
+ inv_wr.wr_cqe = &io_comp_cqe;
inv_wr.send_flags = 0;
inv_wr.ex.invalidate_rkey = rkey;
}
@@ -304,6 +306,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
srv_mr = &sess->mrs[id->msg_id];
rwr.wr.opcode = IB_WR_REG_MR;
+ rwr.wr.wr_cqe = &local_reg_cqe;
rwr.wr.num_sge = 0;
rwr.mr = srv_mr->mr;
rwr.wr.send_flags = 0;
@@ -379,6 +382,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
if (need_inval) {
if (likely(sg_cnt)) {
+ inv_wr.wr_cqe = &io_comp_cqe;
inv_wr.sg_list = NULL;
inv_wr.num_sge = 0;
inv_wr.opcode = IB_WR_SEND_WITH_INV;
@@ -421,6 +425,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
srv_mr = &sess->mrs[id->msg_id];
rwr.wr.next = &imm_wr;
rwr.wr.opcode = IB_WR_REG_MR;
+ rwr.wr.wr_cqe = &local_reg_cqe;
rwr.wr.num_sge = 0;
rwr.wr.send_flags = 0;
rwr.mr = srv_mr->mr;
@@ -651,7 +656,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
if (!srv_mr->iu) {
err = -ENOMEM;
rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err);
- goto free_iu;
+ goto dereg_mr;
}
}
/* Eventually dma addr for each chunk can be cached */
@@ -667,7 +672,6 @@ err:
srv_mr = &sess->mrs[mri];
sgt = &srv_mr->sgt;
mr = srv_mr->mr;
-free_iu:
rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1);
dereg_mr:
ib_dereg_mr(mr);
@@ -814,7 +818,7 @@ static int process_info_req(struct rtrs_srv_con *con,
rwr[mri].wr.opcode = IB_WR_REG_MR;
rwr[mri].wr.wr_cqe = &local_reg_cqe;
rwr[mri].wr.num_sge = 0;
- rwr[mri].wr.send_flags = mri ? 0 : IB_SEND_SIGNALED;
+ rwr[mri].wr.send_flags = 0;
rwr[mri].mr = mr;
rwr[mri].key = mr->rkey;
rwr[mri].access = (IB_ACCESS_LOCAL_WRITE |
@@ -1238,7 +1242,6 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
case IB_WC_SEND:
/*
* post_send() RDMA write completions of IO reqs (read/write)
- * and hb
*/
atomic_add(srv->queue_depth, &con->sq_wr_avail);
@@ -1586,7 +1589,7 @@ static int create_con(struct rtrs_srv_sess *sess,
struct rtrs_sess *s = &sess->s;
struct rtrs_srv_con *con;
- u16 cq_size, wr_queue_size;
+ u32 cq_size, wr_queue_size;
int err, cq_vector;
con = kzalloc(sizeof(*con), GFP_KERNEL);
@@ -1600,7 +1603,7 @@ static int create_con(struct rtrs_srv_sess *sess,
con->c.cm_id = cm_id;
con->c.sess = &sess->s;
con->c.cid = cid;
- atomic_set(&con->wr_cnt, 0);
+ atomic_set(&con->wr_cnt, 1);
if (con->c.cid == 0) {
/*
@@ -1630,7 +1633,8 @@ static int create_con(struct rtrs_srv_sess *sess,
/* TODO: SOFTIRQ can be faster, but be careful with softirq context */
err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_size,
- wr_queue_size, IB_POLL_WORKQUEUE);
+ wr_queue_size, wr_queue_size,
+ IB_POLL_WORKQUEUE);
if (err) {
rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err);
goto free_con;
@@ -1862,8 +1866,8 @@ reject_w_econnreset:
return rtrs_rdma_do_reject(cm_id, -ECONNRESET);
close_and_return_err:
- close_sess(sess);
mutex_unlock(&srv->paths_mutex);
+ close_sess(sess);
return err;
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index 2e3a849e0a77..d13aff0aa816 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -182,16 +182,16 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
u32 imm_data, enum ib_send_flags flags,
struct ib_send_wr *head)
{
- struct ib_send_wr wr;
+ struct ib_rdma_wr wr;
- wr = (struct ib_send_wr) {
- .wr_cqe = cqe,
- .send_flags = flags,
- .opcode = IB_WR_RDMA_WRITE_WITH_IMM,
- .ex.imm_data = cpu_to_be32(imm_data),
+ wr = (struct ib_rdma_wr) {
+ .wr.wr_cqe = cqe,
+ .wr.send_flags = flags,
+ .wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM,
+ .wr.ex.imm_data = cpu_to_be32(imm_data),
};
- return rtrs_post_send(con->qp, head, &wr);
+ return rtrs_post_send(con->qp, head, &wr.wr);
}
EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty);
@@ -231,14 +231,14 @@ static int create_cq(struct rtrs_con *con, int cq_vector, u16 cq_size,
}
static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
- u16 wr_queue_size, u32 max_sge)
+ u32 max_send_wr, u32 max_recv_wr, u32 max_sge)
{
struct ib_qp_init_attr init_attr = {NULL};
struct rdma_cm_id *cm_id = con->cm_id;
int ret;
- init_attr.cap.max_send_wr = wr_queue_size;
- init_attr.cap.max_recv_wr = wr_queue_size;
+ init_attr.cap.max_send_wr = max_send_wr;
+ init_attr.cap.max_recv_wr = max_recv_wr;
init_attr.cap.max_recv_sge = 1;
init_attr.event_handler = qp_event_handler;
init_attr.qp_context = con;
@@ -260,8 +260,9 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
}
int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
- u32 max_send_sge, int cq_vector, u16 cq_size,
- u16 wr_queue_size, enum ib_poll_context poll_ctx)
+ u32 max_send_sge, int cq_vector, int cq_size,
+ u32 max_send_wr, u32 max_recv_wr,
+ enum ib_poll_context poll_ctx)
{
int err;
@@ -269,7 +270,8 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
if (err)
return err;
- err = create_qp(con, sess->dev->ib_pd, wr_queue_size, max_send_sge);
+ err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr,
+ max_send_sge);
if (err) {
ib_free_cq(con->cq);
con->cq = NULL;
@@ -308,7 +310,7 @@ void rtrs_send_hb_ack(struct rtrs_sess *sess)
imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0);
err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
- IB_SEND_SIGNALED, NULL);
+ 0, NULL);
if (err) {
sess->hb_err_handler(usr_con);
return;
@@ -337,7 +339,7 @@ static void hb_work(struct work_struct *work)
}
imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0);
err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
- IB_SEND_SIGNALED, NULL);
+ 0, NULL);
if (err) {
sess->hb_err_handler(usr_con);
return;
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
index eb99856e8b30..e75cf9742e04 100644
--- a/include/rdma/rdma_counter.h
+++ b/include/rdma/rdma_counter.h
@@ -46,7 +46,8 @@ struct rdma_counter {
void rdma_counter_init(struct ib_device *dev);
void rdma_counter_release(struct ib_device *dev);
int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
- bool on, enum rdma_nl_counter_mask mask);
+ enum rdma_nl_counter_mask mask,
+ struct netlink_ext_ack *extack);
int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port);
int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);