diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-10-16 13:37:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-10-16 13:37:59 -0700 |
commit | c964ced7726294d40913f2127c3f185a92cb4a41 (patch) | |
tree | 935b9410c53a5c49dd16a3ec2f23eaee6e1c0265 | |
parent | 667b1d41b25b9b6b19c8af9d673ccb93b451b527 (diff) | |
parent | dc5006cfcf62bea88076a587344ba5e00e66d1c6 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma fixes from Jason Gunthorpe:
"Several miscellaneous fixes. A lot of bnxt_re activity, there will be
more rc patches there coming.
- Many bnxt_re bug fixes - Memory leaks, kasn, NULL pointer deref,
soft lockups, error unwinding and some small functional issues
- Error unwind bug in rdma netlink
- Two issues with incorrect VLAN detection for iWarp
- skb_splice_from_iter() splat in siw
- Give SRP slab caches unique names to resolve the merge window
WARN_ON regression"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
RDMA/bnxt_re: Fix the GID table length
RDMA/bnxt_re: Fix a bug while setting up Level-2 PBL pages
RDMA/bnxt_re: Change the sequence of updating the CQ toggle value
RDMA/bnxt_re: Fix an error path in bnxt_re_add_device
RDMA/bnxt_re: Avoid CPU lockups due fifo occupancy check loop
RDMA/bnxt_re: Fix a possible NULL pointer dereference
RDMA/bnxt_re: Return more meaningful error
RDMA/bnxt_re: Fix incorrect dereference of srq in async event
RDMA/bnxt_re: Fix out of bound check
RDMA/bnxt_re: Fix the max CQ WQEs for older adapters
RDMA/srpt: Make slab cache names unique
RDMA/irdma: Fix misspelling of "accept*"
RDMA/cxgb4: Fix RDMA_CM_EVENT_UNREACHABLE error for iWARP
RDMA/siw: Add sendpage_ok() check to disable MSG_SPLICE_PAGES
RDMA/core: Fix ENODEV error for iWARP test over vlan
RDMA/nldev: Fix NULL pointer dereferences issue in rdma_nl_notify_event
RDMA/bnxt_re: Fix the max WQEs used in Static WQE mode
RDMA/bnxt_re: Add a check for memory allocation
RDMA/bnxt_re: Fix incorrect AVID type in WQE structure
RDMA/bnxt_re: Fix a possible memory leak
-rw-r--r-- | drivers/infiniband/core/addr.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/nldev.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/hw_counters.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/ib_verbs.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/main.c | 47 | ||||
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/qplib_fp.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/qplib_fp.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/qplib_res.c | 21 | ||||
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/qplib_sp.c | 11 | ||||
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/qplib_sp.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 9 | ||||
-rw-r--r-- | drivers/infiniband/hw/irdma/cm.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_qp_tx.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.c | 80 |
15 files changed, 133 insertions, 61 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index be0743dac3ff..c4cf26f1d149 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -269,6 +269,8 @@ rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in) break; #endif } + if (!ret && dev && is_vlan_dev(dev)) + dev = vlan_dev_real_dev(dev); return ret ? ERR_PTR(ret) : dev; } diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 39f89a4b8649..7dc8e2ec62cc 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -2816,6 +2816,8 @@ int rdma_nl_notify_event(struct ib_device *device, u32 port_num, nlh = nlmsg_put(skb, 0, 0, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR), 0, 0); + if (!nlh) + goto err_free; switch (type) { case RDMA_REGISTER_EVENT: diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 128651c01595..1e63f8091748 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -366,7 +366,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } } - if (rdev->pacing.dbr_pacing) + if (rdev->pacing.dbr_pacing && bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) bnxt_re_copy_db_pacing_stats(rdev, stats); } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 460f33914825..e66ae9f22c71 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1307,7 +1307,11 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, 0 : BNXT_QPLIB_RESERVED_QP_WRS; entries = bnxt_re_init_depth(entries + diff + 1, uctx); sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); - sq->max_sw_wqe = bnxt_qplib_get_depth(sq, qplqp->wqe_mode, true); + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) + sq->max_sw_wqe = bnxt_qplib_get_depth(sq, qplqp->wqe_mode, true); + else + sq->max_sw_wqe = sq->max_wqe; + } sq->q_full_delta = diff + 1; /* diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 777068de4bbc..6715c96a3eee 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -188,8 +188,11 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) bnxt_re_set_db_offset(rdev); rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); - if (rc) + if (rc) { + kfree(rdev->chip_ctx); + rdev->chip_ctx = NULL; return rc; + } if (bnxt_qplib_determine_atomics(en_dev->pdev)) ibdev_info(&rdev->ibdev, @@ -531,6 +534,7 @@ static bool is_dbr_fifo_full(struct bnxt_re_dev *rdev) static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) { struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; + u32 retry_fifo_check = 1000; u32 fifo_occup; /* loop shouldn't run infintely as the occupancy usually goes @@ -544,6 +548,14 @@ static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) if (fifo_occup < pacing_data->pacing_th) break; + if (!retry_fifo_check--) { + dev_info_once(rdev_to_dev(rdev), + "%s: fifo_occup = 0x%xfifo_max_depth = 0x%x pacing_th = 0x%x\n", + __func__, fifo_occup, pacing_data->fifo_max_depth, + pacing_data->pacing_th); + break; + } + } } @@ -957,7 +969,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev); } -static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv, +static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev, struct bnxt_en_dev *en_dev) { struct bnxt_re_dev *rdev; @@ -973,6 +985,7 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv, rdev->nb.notifier_call = NULL; rdev->netdev = en_dev->net; rdev->en_dev = en_dev; + rdev->adev = adev; rdev->id = rdev->en_dev->pdev->devfn; INIT_LIST_HEAD(&rdev->qp_list); mutex_init(&rdev->qp_lock); @@ -1025,12 +1038,15 @@ static int bnxt_re_handle_unaffi_async_event(struct creq_func_event static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, struct bnxt_re_qp *qp) { - struct bnxt_re_srq *srq = container_of(qp->qplib_qp.srq, struct bnxt_re_srq, - qplib_srq); struct creq_qp_error_notification *err_event; + struct bnxt_re_srq *srq = NULL; struct ib_event event = {}; unsigned int flags; + if (qp->qplib_qp.srq) + srq = container_of(qp->qplib_qp.srq, struct bnxt_re_srq, + qplib_srq); + if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && rdma_is_kernel_res(&qp->ib_qp.res)) { flags = bnxt_re_lock_cqs(qp); @@ -1258,15 +1274,9 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, { struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq, qplib_cq); - u32 *cq_ptr; - if (cq->ib_cq.comp_handler) { - if (cq->uctx_cq_page) { - cq_ptr = (u32 *)cq->uctx_cq_page; - *cq_ptr = cq->qplib_cq.toggle; - } + if (cq->ib_cq.comp_handler) (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context); - } return 0; } @@ -1823,7 +1833,6 @@ static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev, */ rtnl_lock(); en_info->rdev = rdev; - rdev->adev = adev; rtnl_unlock(); } @@ -1840,7 +1849,7 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type) en_dev = en_info->en_dev; - rdev = bnxt_re_dev_add(aux_priv, en_dev); + rdev = bnxt_re_dev_add(adev, en_dev); if (!rdev || !rdev_to_dev(rdev)) { rc = -ENOMEM; goto exit; @@ -1865,12 +1874,14 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type) rdev->nb.notifier_call = NULL; pr_err("%s: Cannot register to netdevice_notifier", ROCE_DRV_MODULE_NAME); - return rc; + goto re_dev_unreg; } bnxt_re_setup_cc(rdev, true); return 0; +re_dev_unreg: + ib_unregister_device(&rdev->ibdev); re_dev_uninit: bnxt_re_update_en_info_rdev(NULL, en_info, adev); bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); @@ -2014,15 +2025,7 @@ static int bnxt_re_probe(struct auxiliary_device *adev, auxiliary_set_drvdata(adev, en_info); rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT); - if (rc) - goto err; mutex_unlock(&bnxt_re_mutex); - return 0; - -err: - mutex_unlock(&bnxt_re_mutex); - bnxt_re_remove(adev); - return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 42e98e5f94cb..2ebcb2de962b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -327,6 +327,7 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) case NQ_BASE_TYPE_CQ_NOTIFICATION: { struct nq_cn *nqcne = (struct nq_cn *)nqe; + struct bnxt_re_cq *cq_p; q_handle = le32_to_cpu(nqcne->cq_handle_low); q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high) @@ -337,6 +338,10 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) cq->toggle = (le16_to_cpu(nqe->info10_type) & NQ_CN_TOGGLE_MASK) >> NQ_CN_TOGGLE_SFT; cq->dbinfo.toggle = cq->toggle; + cq_p = container_of(cq, struct bnxt_re_cq, qplib_cq); + if (cq_p->uctx_cq_page) + *((u32 *)cq_p->uctx_cq_page) = cq->toggle; + bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA); spin_lock_bh(&cq->compl_lock); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index b62df8701950..820611a23943 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -170,7 +170,7 @@ struct bnxt_qplib_swqe { }; u32 q_key; u32 dst_qp; - u16 avid; + u32 avid; } send; /* Send Raw Ethernet and QP1 */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 3ffaef0c2651..7294221b3316 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -525,7 +525,7 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, /* failed with status */ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n", cookie, opcode, evnt->status); - rc = -EFAULT; + rc = -EIO; } return rc; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index dfc943fab87b..96ceec1e8199 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -244,6 +244,8 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, sginfo.pgsize = npde * pg_size; sginfo.npages = 1; rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo); + if (rc) + goto fail; /* Alloc PBL pages */ sginfo.npages = npbl; @@ -255,22 +257,9 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, dst_virt_ptr = (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr; src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr; - if (hwq_attr->type == HWQ_TYPE_MR) { - /* For MR it is expected that we supply only 1 contigous - * page i.e only 1 entry in the PDL that will contain - * all the PBLs for the user supplied memory region - */ - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; - i++) - dst_virt_ptr[0][i] = src_phys_ptr[i] | - flag; - } else { - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; - i++) - dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = - src_phys_ptr[i] | - PTU_PDE_VALID; - } + for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) + dst_virt_ptr[0][i] = src_phys_ptr[i] | flag; + /* Alloc or init PTEs */ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_2], hwq_attr->sginfo); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 4f75e7e5bcf7..e29fbbdab9fd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -140,6 +140,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, min_t(u32, sb->max_sge_var_wqe, BNXT_VAR_MAX_SGE) : 6; attr->max_cq = le32_to_cpu(sb->max_cq); attr->max_cq_wqes = le32_to_cpu(sb->max_cqe); + if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx)) + attr->max_cq_wqes = min_t(u32, BNXT_QPLIB_MAX_CQ_WQES, attr->max_cq_wqes); attr->max_cq_sges = attr->max_qp_sges; attr->max_mr = le32_to_cpu(sb->max_mr); attr->max_mw = le32_to_cpu(sb->max_mw); @@ -157,7 +159,14 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx)) attr->l2_db_size = (sb->l2_db_space_size + 1) * (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); - attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; + /* + * Read the max gid supported by HW. + * For each entry in HW GID in HW table, we consume 2 + * GID entries in the kernel GID table. So max_gid reported + * to stack can be up to twice the value reported by the HW, up to 256 gids. + */ + attr->max_sgid = le32_to_cpu(sb->max_gid); + attr->max_sgid = min_t(u32, BNXT_QPLIB_NUM_GIDS_SUPPORTED, 2 * attr->max_sgid); attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index acd9c14a31c4..ecf3f45fea74 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -56,6 +56,7 @@ struct bnxt_qplib_dev_attr { u32 max_qp_wqes; u32 max_qp_sges; u32 max_cq; +#define BNXT_QPLIB_MAX_CQ_WQES 0xfffff u32 max_cq_wqes; u32 max_cq_sges; u32 max_mr; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b3757c6a0457..8d753e6e0c71 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2086,7 +2086,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, err = -ENOMEM; if (n->dev->flags & IFF_LOOPBACK) { if (iptype == 4) - pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip); + pdev = __ip_dev_find(&init_net, *(__be32 *)peer_ip, false); else if (IS_ENABLED(CONFIG_IPV6)) for_each_netdev(&init_net, pdev) { if (ipv6_chk_addr(&init_net, @@ -2101,12 +2101,12 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, err = -ENODEV; goto out; } + if (is_vlan_dev(pdev)) + pdev = vlan_dev_real_dev(pdev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) { - dev_put(pdev); + if (!ep->l2t) goto out; - } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; @@ -2119,7 +2119,6 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, ep->rss_qid = cdev->rdev.lldi.rxq_ids[ cxgb4_port_idx(pdev) * step]; set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); - dev_put(pdev); } else { pdev = get_real_dev(n->dev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 36bb7e5ce638..ce8d821bdad8 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3631,7 +3631,7 @@ void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp) /** * irdma_accept - registered call for connection to be accepted * @cm_id: cm information for passive connection - * @conn_param: accpet parameters + * @conn_param: accept parameters */ int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 64ad9e0895bd..a034264c5669 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -331,6 +331,8 @@ static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, msg.msg_flags &= ~MSG_MORE; tcp_rate_check_app_limited(sk); + if (!sendpage_ok(page[i])) + msg.msg_flags &= ~MSG_SPLICE_PAGES; bvec_set_page(&bvec, page[i], bytes, offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9632afbd727b..5dfb4644446b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -68,6 +68,8 @@ MODULE_LICENSE("Dual BSD/GPL"); static u64 srpt_service_guid; static DEFINE_SPINLOCK(srpt_dev_lock); /* Protects srpt_dev_list. */ static LIST_HEAD(srpt_dev_list); /* List of srpt_device structures. */ +static DEFINE_MUTEX(srpt_mc_mutex); /* Protects srpt_memory_caches. */ +static DEFINE_XARRAY(srpt_memory_caches); /* See also srpt_memory_cache_entry */ static unsigned srp_max_req_size = DEFAULT_MAX_REQ_SIZE; module_param(srp_max_req_size, int, 0444); @@ -105,6 +107,63 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_process_wait_list(struct srpt_rdma_ch *ch); +/* Type of the entries in srpt_memory_caches. */ +struct srpt_memory_cache_entry { + refcount_t ref; + struct kmem_cache *c; +}; + +static struct kmem_cache *srpt_cache_get(unsigned int object_size) +{ + struct srpt_memory_cache_entry *e; + char name[32]; + void *res; + + guard(mutex)(&srpt_mc_mutex); + e = xa_load(&srpt_memory_caches, object_size); + if (e) { + refcount_inc(&e->ref); + return e->c; + } + snprintf(name, sizeof(name), "srpt-%u", object_size); + e = kmalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return NULL; + refcount_set(&e->ref, 1); + e->c = kmem_cache_create(name, object_size, /*align=*/512, 0, NULL); + if (!e->c) + goto free_entry; + res = xa_store(&srpt_memory_caches, object_size, e, GFP_KERNEL); + if (xa_is_err(res)) + goto destroy_cache; + return e->c; + +destroy_cache: + kmem_cache_destroy(e->c); + +free_entry: + kfree(e); + return NULL; +} + +static void srpt_cache_put(struct kmem_cache *c) +{ + struct srpt_memory_cache_entry *e = NULL; + unsigned long object_size; + + guard(mutex)(&srpt_mc_mutex); + xa_for_each(&srpt_memory_caches, object_size, e) + if (e->c == c) + break; + if (WARN_ON_ONCE(!e)) + return; + if (!refcount_dec_and_test(&e->ref)) + return; + WARN_ON_ONCE(xa_erase(&srpt_memory_caches, object_size) != e); + kmem_cache_destroy(e->c); + kfree(e); +} + /* * The only allowed channel state changes are those that change the channel * state into a state with a higher numerical value. Hence the new > prev test. @@ -2119,13 +2178,13 @@ static void srpt_release_channel_work(struct work_struct *w) ch->sport->sdev, ch->rq_size, ch->rsp_buf_cache, DMA_TO_DEVICE); - kmem_cache_destroy(ch->rsp_buf_cache); + srpt_cache_put(ch->rsp_buf_cache); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, sdev, ch->rq_size, ch->req_buf_cache, DMA_FROM_DEVICE); - kmem_cache_destroy(ch->req_buf_cache); + srpt_cache_put(ch->req_buf_cache); kref_put(&ch->kref, srpt_free_ch); } @@ -2245,8 +2304,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, INIT_LIST_HEAD(&ch->cmd_wait_list); ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size; - ch->rsp_buf_cache = kmem_cache_create("srpt-rsp-buf", ch->max_rsp_size, - 512, 0, NULL); + ch->rsp_buf_cache = srpt_cache_get(ch->max_rsp_size); if (!ch->rsp_buf_cache) goto free_ch; @@ -2280,8 +2338,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, alignment_offset = round_up(imm_data_offset, 512) - imm_data_offset; req_sz = alignment_offset + imm_data_offset + srp_max_req_size; - ch->req_buf_cache = kmem_cache_create("srpt-req-buf", req_sz, - 512, 0, NULL); + ch->req_buf_cache = srpt_cache_get(req_sz); if (!ch->req_buf_cache) goto free_rsp_ring; @@ -2478,7 +2535,7 @@ free_recv_ring: ch->req_buf_cache, DMA_FROM_DEVICE); free_recv_cache: - kmem_cache_destroy(ch->req_buf_cache); + srpt_cache_put(ch->req_buf_cache); free_rsp_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, @@ -2486,7 +2543,7 @@ free_rsp_ring: ch->rsp_buf_cache, DMA_TO_DEVICE); free_rsp_cache: - kmem_cache_destroy(ch->rsp_buf_cache); + srpt_cache_put(ch->rsp_buf_cache); free_ch: if (rdma_cm_id) @@ -3055,7 +3112,7 @@ static void srpt_free_srq(struct srpt_device *sdev) srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, sdev->srq_size, sdev->req_buf_cache, DMA_FROM_DEVICE); - kmem_cache_destroy(sdev->req_buf_cache); + srpt_cache_put(sdev->req_buf_cache); sdev->srq = NULL; } @@ -3082,8 +3139,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev) pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, sdev->device->attrs.max_srq_wr, dev_name(&device->dev)); - sdev->req_buf_cache = kmem_cache_create("srpt-srq-req-buf", - srp_max_req_size, 0, 0, NULL); + sdev->req_buf_cache = srpt_cache_get(srp_max_req_size); if (!sdev->req_buf_cache) goto free_srq; @@ -3105,7 +3161,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev) return 0; free_cache: - kmem_cache_destroy(sdev->req_buf_cache); + srpt_cache_put(sdev->req_buf_cache); free_srq: ib_destroy_srq(srq); |