aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/cma.c230
-rw-r--r--drivers/infiniband/core/cma_priv.h1
-rw-r--r--drivers/infiniband/core/rdma_core.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h2
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig2
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c4
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c4
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c248
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h13
-rw-r--r--drivers/infiniband/hw/irdma/cm.c11
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c8
-rw-r--r--drivers/infiniband/hw/irdma/hw.c33
-rw-r--r--drivers/infiniband/hw/irdma/main.h2
-rw-r--r--drivers/infiniband/hw/irdma/utils.c1
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c16
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h12
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c78
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c23
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c106
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c219
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c39
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h4
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c7
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c6
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c6
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c14
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c50
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h21
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c32
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c2
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c32
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.h15
-rw-r--r--include/rdma/rdma_cm.h1
51 files changed, 971 insertions, 379 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index fabca5e51e3d..46d06678dfbe 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -11,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
@@ -20,6 +21,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -168,6 +170,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
+static struct rb_root id_table = RB_ROOT;
+/* Serialize operations of id_table tree */
+static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
@@ -202,6 +207,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
}
}
+struct id_table_entry {
+ struct list_head id_list;
+ struct rb_node rb_node;
+};
+
struct cma_device {
struct list_head list;
struct ib_device *device;
@@ -420,11 +430,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
return hdr->ip_version >> 4;
}
-static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
{
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
+static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
+}
+
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
+}
+
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
struct in_device *in_dev = NULL;
@@ -445,6 +465,117 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}
+static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
+ struct id_table_entry *entry_b)
+{
+ struct rdma_id_private *id_priv = list_first_entry(
+ &entry_b->id_list, struct rdma_id_private, id_list_entry);
+ int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
+ struct sockaddr *sb = cma_dst_addr(id_priv);
+
+ if (ifindex_a != ifindex_b)
+ return (ifindex_a > ifindex_b) ? 1 : -1;
+
+ if (sa->sa_family != sb->sa_family)
+ return sa->sa_family - sb->sa_family;
+
+ if (sa->sa_family == AF_INET)
+ return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr,
+ (char *)&((struct sockaddr_in *)sb)->sin_addr,
+ sizeof(((struct sockaddr_in *)sa)->sin_addr));
+
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
+ &((struct sockaddr_in6 *)sb)->sin6_addr);
+}
+
+static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
+{
+ struct rb_node **new, *parent = NULL;
+ struct id_table_entry *this, *node;
+ unsigned long flags;
+ int result;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ new = &id_table.rb_node;
+ while (*new) {
+ this = container_of(*new, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(
+ node_id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(node_id_priv), this);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ list_add_tail(&node_id_priv->id_list_entry,
+ &this->id_list);
+ kfree(node);
+ goto unlock;
+ }
+ }
+
+ INIT_LIST_HEAD(&node->id_list);
+ list_add_tail(&node_id_priv->id_list_entry, &node->id_list);
+
+ rb_link_node(&node->rb_node, parent, new);
+ rb_insert_color(&node->rb_node, &id_table);
+
+unlock:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return 0;
+}
+
+static struct id_table_entry *
+node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
+{
+ struct rb_node *node = root->rb_node;
+ struct id_table_entry *data;
+ int result;
+
+ while (node) {
+ data = container_of(node, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(ifindex, sa, data);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+
+ return NULL;
+}
+
+static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
+{
+ struct id_table_entry *data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (list_empty(&id_priv->id_list_entry))
+ goto out;
+
+ data = node_from_ndev_ip(&id_table,
+ id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(id_priv));
+ if (!data)
+ goto out;
+
+ list_del_init(&id_priv->id_list_entry);
+ if (list_empty(&data->id_list)) {
+ rb_erase(&data->rb_node, &id_table);
+ kfree(data);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+}
+
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
@@ -481,16 +612,6 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-}
-
-static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
-}
-
static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
@@ -861,6 +982,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
INIT_LIST_HEAD(&id_priv->device_item);
+ INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
@@ -1883,6 +2005,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
cma_cancel_operation(id_priv, state);
rdma_restrack_del(&id_priv->res);
+ cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
@@ -3172,8 +3295,11 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
- else if (rdma_protocol_roce(id->device, id->port_num))
+ else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
+ if (!ret)
+ cma_add_id_to_tree(id_priv);
+ }
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
else
@@ -4922,10 +5048,87 @@ out:
return ret;
}
+static void cma_netevent_work_handler(struct work_struct *_work)
+{
+ struct rdma_id_private *id_priv =
+ container_of(_work, struct rdma_id_private, id.net_work);
+ struct rdma_cm_event event = {};
+
+ mutex_lock(&id_priv->handler_mutex);
+
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ __acquire(&id_priv->handler_mutex);
+ id_priv->cm_id.ib = NULL;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ return;
+ }
+
+out_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+}
+
+static int cma_netevent_callback(struct notifier_block *self,
+ unsigned long event, void *ctx)
+{
+ struct id_table_entry *ips_node = NULL;
+ struct rdma_id_private *current_id;
+ struct neighbour *neigh = ctx;
+ unsigned long flags;
+
+ if (event != NETEVENT_NEIGH_UPDATE)
+ return NOTIFY_DONE;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (neigh->tbl->family == AF_INET6) {
+ struct sockaddr_in6 neigh_sock_6;
+
+ neigh_sock_6.sin6_family = AF_INET6;
+ neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_6);
+ } else if (neigh->tbl->family == AF_INET) {
+ struct sockaddr_in neigh_sock_4;
+
+ neigh_sock_4.sin_family = AF_INET;
+ neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_4);
+ } else
+ goto out;
+
+ if (!ips_node)
+ goto out;
+
+ list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+ if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+ neigh->ha, ETH_ALEN))
+ continue;
+ INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
+ cma_id_get(current_id);
+ queue_work(cma_wq, &current_id->id.net_work);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return NOTIFY_DONE;
+}
+
static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
+static struct notifier_block cma_netevent_cb = {
+ .notifier_call = cma_netevent_callback
+};
+
static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
@@ -5148,6 +5351,7 @@ static int __init cma_init(void)
ib_sa_register_client(&sa_client);
register_netdevice_notifier(&cma_nb);
+ register_netevent_notifier(&cma_netevent_cb);
ret = ib_register_client(&cma_client);
if (ret)
@@ -5162,6 +5366,7 @@ static int __init cma_init(void)
err_ib:
ib_unregister_client(&cma_client);
err:
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
@@ -5174,6 +5379,7 @@ static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ib_unregister_client(&cma_client);
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 757a0ef79872..b7354c94cf1b 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -64,6 +64,7 @@ struct rdma_id_private {
struct list_head listen_item;
struct list_head listen_list;
};
+ struct list_head id_list_entry;
struct cma_device *cma_dev;
struct list_head mc_list;
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 94d83b665a2f..29b1ab1d5f93 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
- * access locks, since only a single one of them is is allowed
+ * access locks, since only a single one of them is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 79401e6c6aa9..785c37cae3c0 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -173,7 +173,7 @@ struct bnxt_re_dev {
/* Max of 2 lossless traffic class supported per port */
u16 cosq[2];
- /* QP for for handling QP1 packets */
+ /* QP for handling QP1 packets */
struct bnxt_re_gsi_context gsi_ctx;
struct bnxt_re_stats stats;
atomic_t nq_alloc_cnt;
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index 6eb739052121..14b92e12bf29 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_HFI1
tristate "Cornelis OPX Gen1 support"
- depends on X86_64 && INFINIBAND_RDMAVT && I2C
+ depends on X86_64 && INFINIBAND_RDMAVT && I2C && !UML
select MMU_NOTIFIER
select CRC32
select I2C_ALGOBIT
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 2e4cf2b11653..629beff053ad 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1179,8 +1179,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
goto done;
ret = init_user_ctxt(fd, uctxt);
- if (ret)
+ if (ret) {
+ hfi1_free_ctxt_rcv_groups(uctxt);
goto done;
+ }
user_init(uctxt);
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index d6bbdb8fcb50..5d9a7b09ca37 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -742,9 +742,7 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
kzalloc_node(sizeof(*tx->sdma_hdr),
GFP_KERNEL, priv->dd->node);
- netif_tx_napi_add(dev, &txq->napi,
- hfi1_ipoib_poll_tx_ring,
- NAPI_POLL_WEIGHT);
+ netif_napi_add_tx(dev, &txq->napi, hfi1_ipoib_poll_tx_ring);
}
return 0;
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
index 03b098a494b5..3dfa5aff2512 100644
--- a/drivers/infiniband/hw/hfi1/netdev_rx.c
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
* right now.
*/
set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
- netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
+ netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
rc = msix_netdev_request_rcd_irq(rxq->rcd);
if (rc)
goto bail_context_irq_failure;
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 136f9a99e1e0..7690f996d5e3 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -172,7 +172,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
}
/*
- * Read nbytes from "from" and and place them in the low bytes
+ * Read nbytes from "from" and place them in the low bytes
* of pbuf->carry. Other bytes are left as-is. Any previous
* value in pbuf->carry is lost.
*
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 2855e9ad4b32..f848eedc6a23 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -959,6 +959,7 @@ struct hns_roce_dev {
const struct hns_roce_hw *hw;
void *priv;
struct workqueue_struct *irq_workq;
+ struct work_struct ecc_work;
const struct hns_roce_dfx_hw *dfx;
u32 func_num;
u32 is_vf;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index ba3c742258ef..cbdafaac678a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -55,6 +55,42 @@ enum {
CMD_RST_PRC_EBUSY,
};
+enum ecc_resource_type {
+ ECC_RESOURCE_QPC,
+ ECC_RESOURCE_CQC,
+ ECC_RESOURCE_MPT,
+ ECC_RESOURCE_SRQC,
+ ECC_RESOURCE_GMV,
+ ECC_RESOURCE_QPC_TIMER,
+ ECC_RESOURCE_CQC_TIMER,
+ ECC_RESOURCE_SCCC,
+ ECC_RESOURCE_COUNT,
+};
+
+static const struct {
+ const char *name;
+ u8 read_bt0_op;
+ u8 write_bt0_op;
+} fmea_ram_res[] = {
+ { "ECC_RESOURCE_QPC",
+ HNS_ROCE_CMD_READ_QPC_BT0, HNS_ROCE_CMD_WRITE_QPC_BT0 },
+ { "ECC_RESOURCE_CQC",
+ HNS_ROCE_CMD_READ_CQC_BT0, HNS_ROCE_CMD_WRITE_CQC_BT0 },
+ { "ECC_RESOURCE_MPT",
+ HNS_ROCE_CMD_READ_MPT_BT0, HNS_ROCE_CMD_WRITE_MPT_BT0 },
+ { "ECC_RESOURCE_SRQC",
+ HNS_ROCE_CMD_READ_SRQC_BT0, HNS_ROCE_CMD_WRITE_SRQC_BT0 },
+ /* ECC_RESOURCE_GMV is handled by cmdq, not mailbox */
+ { "ECC_RESOURCE_GMV",
+ 0, 0 },
+ { "ECC_RESOURCE_QPC_TIMER",
+ HNS_ROCE_CMD_READ_QPC_TIMER_BT0, HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 },
+ { "ECC_RESOURCE_CQC_TIMER",
+ HNS_ROCE_CMD_READ_CQC_TIMER_BT0, HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 },
+ { "ECC_RESOURCE_SCCC",
+ HNS_ROCE_CMD_READ_SCCC_BT0, HNS_ROCE_CMD_WRITE_SCCC_BT0 },
+};
+
static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
struct ib_sge *sg)
{
@@ -5855,12 +5891,12 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
}
-static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
{
struct device *dev = hr_dev->dev;
struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
- int aeqe_found = 0;
+ irqreturn_t aeqe_found = IRQ_NONE;
int event_type;
u32 queue_num;
int sub_type;
@@ -5914,7 +5950,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
eq->event_type = event_type;
eq->sub_type = sub_type;
++eq->cons_index;
- aeqe_found = 1;
+ aeqe_found = IRQ_HANDLED;
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
@@ -5922,7 +5958,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
}
update_eq_db(eq);
- return aeqe_found;
+
+ return IRQ_RETVAL(aeqe_found);
}
static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
@@ -5937,11 +5974,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
-static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
{
struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
- int ceqe_found = 0;
+ irqreturn_t ceqe_found = IRQ_NONE;
u32 cqn;
while (ceqe) {
@@ -5955,21 +5992,21 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
hns_roce_cq_completion(hr_dev, cqn);
++eq->cons_index;
- ceqe_found = 1;
+ ceqe_found = IRQ_HANDLED;
ceqe = next_ceqe_sw_v2(eq);
}
update_eq_db(eq);
- return ceqe_found;
+ return IRQ_RETVAL(ceqe_found);
}
static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
{
struct hns_roce_eq *eq = eq_ptr;
struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work;
+ irqreturn_t int_work;
if (eq->type_flag == HNS_ROCE_CEQ)
/* Completion event interrupt */
@@ -5981,27 +6018,22 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
return IRQ_RETVAL(int_work);
}
-static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
+ u32 int_st)
{
- struct hns_roce_dev *hr_dev = dev_id;
- struct device *dev = hr_dev->dev;
- int int_work = 0;
- u32 int_st;
+ struct pci_dev *pdev = hr_dev->pci_dev;
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+ const struct hnae3_ae_ops *ops = ae_dev->ops;
+ irqreturn_t int_work = IRQ_NONE;
u32 int_en;
- /* Abnormal interrupt */
- int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
- struct pci_dev *pdev = hr_dev->pci_dev;
- struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
- const struct hnae3_ae_ops *ops = ae_dev->ops;
+ dev_err(hr_dev->dev, "AEQ overflow!\n");
- dev_err(dev, "AEQ overflow!\n");
-
- int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S;
- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG,
+ 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
/* Set reset level for reset_event() */
if (ops->set_default_reset_request)
@@ -6013,19 +6045,165 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
- int_work = 1;
- } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_RAS_INT_S)) {
- dev_err(dev, "RAS interrupt!\n");
+ int_work = IRQ_HANDLED;
+ } else {
+ dev_err(hr_dev->dev, "there is no basic abn irq found.\n");
+ }
+
+ return IRQ_RETVAL(int_work);
+}
- int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_RAS_INT_S;
- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+static int fmea_ram_ecc_query(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ int ret;
- int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
- roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_QUERY_RAM_ECC, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ ecc_info->is_ecc_err = hr_reg_read(req, QUERY_RAM_ECC_1BIT_ERR);
+ ecc_info->res_type = hr_reg_read(req, QUERY_RAM_ECC_RES_TYPE);
+ ecc_info->index = hr_reg_read(req, QUERY_RAM_ECC_TAG);
+
+ return 0;
+}
+
+static int fmea_recover_gmv(struct hns_roce_dev *hr_dev, u32 idx)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 addr_upper;
+ u32 addr_low;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, true);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read gmv, ret = %d.\n", ret);
+ return ret;
+ }
+
+ addr_low = hr_reg_read(req, CFG_GMV_BT_BA_L);
+ addr_upper = hr_reg_read(req, CFG_GMV_BT_BA_H);
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false);
+ hr_reg_write(req, CFG_GMV_BT_BA_L, addr_low);
+ hr_reg_write(req, CFG_GMV_BT_BA_H, addr_upper);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data)
+{
+ if (res_type == ECC_RESOURCE_QPC_TIMER ||
+ res_type == ECC_RESOURCE_CQC_TIMER ||
+ res_type == ECC_RESOURCE_SCCC)
+ return le64_to_cpu(*data);
+
+ return le64_to_cpu(*data) << PAGE_SHIFT;
+}
- int_work = 1;
+static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type,
+ u32 index)
+{
+ u8 write_bt0_op = fmea_ram_res[res_type].write_bt0_op;
+ u8 read_bt0_op = fmea_ram_res[res_type].read_bt0_op;
+ struct hns_roce_cmd_mailbox *mailbox;
+ u64 addr;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, read_bt0_op, index);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read fmea ram, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ addr = fmea_get_ram_res_addr(res_type, mailbox->buf);
+
+ ret = hns_roce_cmd_mbox(hr_dev, addr, 0, write_bt0_op, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to write fmea ram, ret = %d.\n",
+ ret);
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static void fmea_ram_ecc_recover(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ u32 res_type = ecc_info->res_type;
+ u32 index = ecc_info->index;
+ int ret;
+
+ BUILD_BUG_ON(ARRAY_SIZE(fmea_ram_res) != ECC_RESOURCE_COUNT);
+
+ if (res_type >= ECC_RESOURCE_COUNT) {
+ dev_err(hr_dev->dev, "unsupported fmea ram ecc type %u.\n",
+ res_type);
+ return;
+ }
+
+ if (res_type == ECC_RESOURCE_GMV)
+ ret = fmea_recover_gmv(hr_dev, index);
+ else
+ ret = fmea_recover_others(hr_dev, res_type, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to recover %s, index = %u, ret = %d.\n",
+ fmea_ram_res[res_type].name, index, ret);
+}
+
+static void fmea_ram_ecc_work(struct work_struct *ecc_work)
+{
+ struct hns_roce_dev *hr_dev =
+ container_of(ecc_work, struct hns_roce_dev, ecc_work);
+ struct fmea_ram_ecc ecc_info = {};
+
+ if (fmea_ram_ecc_query(hr_dev, &ecc_info)) {
+ dev_err(hr_dev->dev, "failed to query fmea ram ecc.\n");
+ return;
+ }
+
+ if (!ecc_info.is_ecc_err) {
+ dev_err(hr_dev->dev, "there is no fmea ram ecc err found.\n");
+ return;
+ }
+
+ fmea_ram_ecc_recover(hr_dev, &ecc_info);
+}
+
+static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+{
+ struct hns_roce_dev *hr_dev = dev_id;
+ irqreturn_t int_work = IRQ_NONE;
+ u32 int_st;
+
+ int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
+
+ if (int_st) {
+ int_work = abnormal_interrupt_basic(hr_dev, int_st);
+ } else if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ queue_work(hr_dev->irq_workq, &hr_dev->ecc_work);
+ int_work = IRQ_HANDLED;
} else {
- dev_err(dev, "There is no abnormal irq found!\n");
+ dev_err(hr_dev->dev, "there is no abnormal irq found.\n");
}
return IRQ_RETVAL(int_work);
@@ -6342,6 +6520,8 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
}
}
+ INIT_WORK(&hr_dev->ecc_work, fmea_ram_ecc_work);
+
hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
if (!hr_dev->irq_workq) {
dev_err(dev, "failed to create irq workqueue.\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 7ffb7824d268..f96debac30fe 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -250,6 +250,7 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
HNS_ROCE_OPC_EXT_CFG = 0x8512,
+ HNS_ROCE_QUERY_RAM_ECC = 0x8513,
HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
@@ -1107,6 +1108,11 @@ enum {
#define CFG_GMV_BT_BA_H CMQ_REQ_FIELD_LOC(51, 32)
#define CFG_GMV_BT_IDX CMQ_REQ_FIELD_LOC(95, 64)
+/* Fields of HNS_ROCE_QUERY_RAM_ECC */
+#define QUERY_RAM_ECC_1BIT_ERR CMQ_REQ_FIELD_LOC(31, 0)
+#define QUERY_RAM_ECC_RES_TYPE CMQ_REQ_FIELD_LOC(63, 32)
+#define QUERY_RAM_ECC_TAG CMQ_REQ_FIELD_LOC(95, 64)
+
struct hns_roce_cfg_sgid_tb {
__le32 table_idx_rsv;
__le32 vf_sgid_l;
@@ -1343,6 +1349,12 @@ struct hns_roce_dip {
struct list_head node; /* all dips are on a list */
};
+struct fmea_ram_ecc {
+ u32 is_ecc_err;
+ u32 res_type;
+ u32 index;
+};
+
/* only for RNR timeout issue of HIP08 */
#define HNS_ROCE_CLOCK_ADJUST 1000
#define HNS_ROCE_MAX_CQ_PERIOD 65
@@ -1382,7 +1394,6 @@ struct hns_roce_dip {
#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
#define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0
-#define HNS_ROCE_V2_VF_INT_ST_RAS_INT_S 1
#define HNS_ROCE_EQ_DB_CMD_AEQ 0x0
#define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 638bf4a1ed94..e4d837f82d75 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -1477,12 +1477,13 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port,
list_for_each_entry (listen_node, &cm_core->listen_list, list) {
memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
listen_port = listen_node->loc_port;
+ if (listen_port != dst_port ||
+ !(listener_state & listen_node->listener_state))
+ continue;
/* compare node pair, return node handle if a match */
- if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
- !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
- listen_port == dst_port &&
- vlan_id == listen_node->vlan_id &&
- (listener_state & listen_node->listener_state)) {
+ if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) ||
+ (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) &&
+ vlan_id == listen_node->vlan_id)) {
refcount_inc(&listen_node->refcnt);
spin_unlock_irqrestore(&cm_core->listen_list_lock,
flags);
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
index 58c0e181ca2b..a41e0d21143a 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -4872,10 +4872,12 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
sd_diff = sd_needed - hmc_fpm_misc->max_sds;
if (sd_diff > 128) {
- if (qpwanted > 128 && sd_diff > 144)
+ if (!(loop_count % 2) && qpwanted > 128) {
qpwanted /= 2;
- mrwanted /= 2;
- pblewanted /= 2;
+ } else {
+ mrwanted /= 2;
+ pblewanted /= 2;
+ }
continue;
}
if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF &&
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index dd3943d22dc6..4f132c6fb653 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -257,10 +257,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
iwqp->last_aeq = info->ae_id;
spin_unlock_irqrestore(&iwqp->lock, flags);
ctx_info = &iwqp->ctx_info;
- if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1))
- ctx_info->roce_info->err_rq_idx_valid = true;
- else
- ctx_info->iwarp_info->err_rq_idx_valid = true;
} else {
if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR)
continue;
@@ -370,16 +366,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
case IRDMA_AE_LCE_CQ_CATASTROPHIC:
case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
- if (rdma_protocol_roce(&iwdev->ibdev, 1))
- ctx_info->roce_info->err_rq_idx_valid = false;
- else
- ctx_info->iwarp_info->err_rq_idx_valid = false;
- fallthrough;
default:
- ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d\n",
- info->ae_id, info->qp, info->qp_cq_id);
+ ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n",
+ info->ae_id, info->qp, info->qp_cq_id, info->ae_src);
if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
- if (!info->sq && ctx_info->roce_info->err_rq_idx_valid) {
+ ctx_info->roce_info->err_rq_idx_valid = info->rq;
+ if (info->rq) {
ctx_info->roce_info->err_rq_idx = info->wqe_idx;
irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
ctx_info);
@@ -388,7 +380,8 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
irdma_cm_disconn(iwqp);
break;
}
- if (!info->sq && ctx_info->iwarp_info->err_rq_idx_valid) {
+ ctx_info->iwarp_info->err_rq_idx_valid = info->rq;
+ if (info->rq) {
ctx_info->iwarp_info->err_rq_idx = info->wqe_idx;
ctx_info->tcp_info_valid = false;
ctx_info->iwarp_info_valid = true;
@@ -1512,10 +1505,7 @@ static int irdma_hmc_setup(struct irdma_pci_f *rf)
int status;
u32 qpcnt;
- if (rf->rdma_ver == IRDMA_GEN_1)
- qpcnt = rsrc_limits_table[rf->limits_sel].qplimit * 2;
- else
- qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
+ qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
rf->sd_type = IRDMA_SD_TYPE_DIRECT;
status = irdma_cfg_fpm_val(&rf->sc_dev, qpcnt);
@@ -1543,7 +1533,7 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf)
rf->obj_mem.pa);
rf->obj_mem.va = NULL;
if (rf->rdma_ver != IRDMA_GEN_1) {
- kfree(rf->allocated_ws_nodes);
+ bitmap_free(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
}
kfree(rf->ceqlist);
@@ -1972,9 +1962,8 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
u32 ret;
if (rf->rdma_ver != IRDMA_GEN_1) {
- rf->allocated_ws_nodes =
- kcalloc(BITS_TO_LONGS(IRDMA_MAX_WS_NODES),
- sizeof(unsigned long), GFP_KERNEL);
+ rf->allocated_ws_nodes = bitmap_zalloc(IRDMA_MAX_WS_NODES,
+ GFP_KERNEL);
if (!rf->allocated_ws_nodes)
return -ENOMEM;
@@ -2023,7 +2012,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
return 0;
mem_rsrc_kzalloc_fail:
- kfree(rf->allocated_ws_nodes);
+ bitmap_free(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
return ret;
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index ef862bced20f..65e966ad3453 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -85,7 +85,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define IRDMA_NO_QSET 0xffff
#define IW_CFG_FPM_QP_COUNT 32768
-#define IRDMA_MAX_PAGES_PER_FMR 512
+#define IRDMA_MAX_PAGES_PER_FMR 262144
#define IRDMA_MIN_PAGES_PER_FMR 1
#define IRDMA_CQP_COMPL_RQ_WQE_FLUSHED 2
#define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index ab3c5208a123..fdf4cc88cb91 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -652,6 +652,7 @@ static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = {
};
static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = {
+ {0xffff, 0x8002, "Invalid State"},
{0xffff, 0x8006, "Flush No Wqe Pending"},
{0xffff, 0x8007, "Modify QP Bad Close"},
{0xffff, 0x8009, "LLP Closed"},
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index c4412ece5a6d..d78b11a41b6b 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -1776,11 +1776,11 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
spin_unlock_irqrestore(&iwcq->lock, flags);
irdma_cq_wq_destroy(iwdev->rf, cq);
- irdma_cq_free_rsrc(iwdev->rf, iwcq);
spin_lock_irqsave(&iwceq->ce_lock, flags);
irdma_sc_cleanup_ceqes(cq, ceq);
spin_unlock_irqrestore(&iwceq->ce_lock, flags);
+ irdma_cq_free_rsrc(iwdev->rf, iwcq);
return 0;
}
@@ -2605,7 +2605,7 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
palloc = &iwpbl->pble_alloc;
iwmr->page_cnt = max_num_sg;
err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt,
- true);
+ false);
if (err_code)
goto err_get_pble;
@@ -2641,8 +2641,16 @@ static int irdma_set_page(struct ib_mr *ibmr, u64 addr)
if (unlikely(iwmr->npages == iwmr->page_cnt))
return -ENOMEM;
- pbl = palloc->level1.addr;
- pbl[iwmr->npages++] = addr;
+ if (palloc->level == PBLE_LEVEL_2) {
+ struct irdma_pble_info *palloc_info =
+ palloc->level2.leaf + (iwmr->npages >> PBLE_512_SHIFT);
+
+ palloc_info->addr[iwmr->npages & (PBLE_PER_PAGE - 1)] = addr;
+ } else {
+ pbl = palloc->level1.addr;
+ pbl[iwmr->npages] = addr;
+ }
+ iwmr->npages++;
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 08371a80fdc2..be189e0525de 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -523,6 +523,10 @@ repoll:
"Requestor" : "Responder", cq->mcq.cqn);
mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
err_cqe->syndrome, err_cqe->vendor_err_synd);
+ if (wc->status != IB_WC_WR_FLUSH_ERR &&
+ (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
+ dev->umrc.state = MLX5_UMR_STATE_RECOVER;
+
if (opcode == MLX5_CQE_REQ_ERR) {
wq = &(*cur_qp)->sq;
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 998b67509a53..7460e0dfe6db 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -717,13 +717,23 @@ struct mlx5_ib_umr_context {
struct completion done;
};
+enum {
+ MLX5_UMR_STATE_ACTIVE,
+ MLX5_UMR_STATE_RECOVER,
+ MLX5_UMR_STATE_ERR,
+};
+
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
- /* control access to UMR QP
+ /* Protects from UMR QP overflow
*/
struct semaphore sem;
+ /* Protects from using UMR while the UMR is not active
+ */
+ struct mutex lock;
+ unsigned int state;
};
struct mlx5_cache_ent {
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 3a48364c0918..e00b94d1b1ea 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -176,6 +176,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ mutex_init(&dev->umrc.lock);
return 0;
@@ -195,6 +196,31 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
ib_dealloc_pd(dev->umrc.pd);
}
+static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct ib_qp_attr attr;
+ int err;
+
+ attr.qp_state = IB_QPS_RESET;
+ err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
+ if (err) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ goto err;
+ }
+
+ err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
+ if (err)
+ goto err;
+
+ umrc->state = MLX5_UMR_STATE_ACTIVE;
+ return 0;
+
+err:
+ umrc->state = MLX5_UMR_STATE_ERR;
+ return err;
+}
+
static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
struct mlx5r_umr_wqe *wqe, bool with_data)
{
@@ -231,7 +257,7 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
id.ib_cqe = cqe;
mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
- MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR);
+ MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
mlx5r_ring_db(qp, 1, ctrl);
@@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
mlx5r_umr_init_context(&umr_context);
down(&umrc->sem);
- err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
- with_data);
- if (err)
- mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
- else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
+ while (true) {
+ mutex_lock(&umrc->lock);
+ if (umrc->state == MLX5_UMR_STATE_ERR) {
+ mutex_unlock(&umrc->lock);
err = -EFAULT;
+ break;
+ }
+
+ if (umrc->state == MLX5_UMR_STATE_RECOVER) {
+ mutex_unlock(&umrc->lock);
+ usleep_range(3000, 5000);
+ continue;
+ }
+
+ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
+ with_data);
+ mutex_unlock(&umrc->lock);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
+ err);
+ break;
}
+
+ wait_for_completion(&umr_context.done);
+
+ if (umr_context.status == IB_WC_SUCCESS)
+ break;
+
+ if (umr_context.status == IB_WC_WR_FLUSH_ERR)
+ continue;
+
+ WARN_ON_ONCE(1);
+ mlx5_ib_warn(dev,
+ "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
+ umr_context.status);
+ mutex_lock(&umrc->lock);
+ err = mlx5r_umr_recover(dev);
+ mutex_unlock(&umrc->lock);
+ if (err)
+ mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
+ err);
+ err = -EFAULT;
+ break;
}
up(&umrc->sem);
return err;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index f0f43b6db89e..3ecb472c9217 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -3082,7 +3082,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
else
DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
- goto err0;
+ goto err1;
}
/* Index only, 18 bit long, lkey = itid << 8 | key */
@@ -3106,7 +3106,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
if (rc) {
DP_ERR(dev, "roce register tid returned an error %d\n", rc);
- goto err1;
+ goto err2;
}
mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
@@ -3115,8 +3115,10 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
return mr;
-err1:
+err2:
dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err1:
+ qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
err0:
kfree(mr);
return ERR_PTR(rc);
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index aa290928cf96..3937144b2ae5 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -153,7 +153,7 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
kinfo->spi_tidcnt += dd->rcvtidcnt % subctxt_cnt;
/*
* for this use, may be cfgctxts summed over all chips that
- * are are configured and present
+ * are configured and present
*/
kinfo->spi_nctxts = dd->cfgctxts;
/* unit (chip/board) our context is on */
@@ -851,7 +851,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
ret = -EPERM;
goto bail;
}
- /* don't allow them to later change to writeable with mprotect */
+ /* don't allow them to later change to writable with mprotect */
vma->vm_flags &= ~VM_MAYWRITE;
start = vma->vm_start;
@@ -941,7 +941,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
goto bail;
}
/*
- * Don't allow permission to later change to writeable
+ * Don't allow permission to later change to writable
* with mprotect.
*/
vma->vm_flags &= ~VM_MAYWRITE;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ceed302cf6a0..6861c6384f18 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2850,9 +2850,9 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
qib_7322_free_irq(dd);
kfree(dd->cspec->cntrs);
- kfree(dd->cspec->sendchkenable);
- kfree(dd->cspec->sendgrhchk);
- kfree(dd->cspec->sendibchk);
+ bitmap_free(dd->cspec->sendchkenable);
+ bitmap_free(dd->cspec->sendgrhchk);
+ bitmap_free(dd->cspec->sendibchk);
kfree(dd->cspec->msix_entries);
for (i = 0; i < dd->num_pports; i++) {
unsigned long flags;
@@ -6383,18 +6383,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
features = qib_7322_boardname(dd);
/* now that piobcnt2k and 4k set, we can allocate these */
- sbufcnt = dd->piobcnt2k + dd->piobcnt4k +
- NUM_VL15_BUFS + BITS_PER_LONG - 1;
- sbufcnt /= BITS_PER_LONG;
- dd->cspec->sendchkenable =
- kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendchkenable),
- GFP_KERNEL);
- dd->cspec->sendgrhchk =
- kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendgrhchk),
- GFP_KERNEL);
- dd->cspec->sendibchk =
- kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendibchk),
- GFP_KERNEL);
+ sbufcnt = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS;
+
+ dd->cspec->sendchkenable = bitmap_zalloc(sbufcnt, GFP_KERNEL);
+ dd->cspec->sendgrhchk = bitmap_zalloc(sbufcnt, GFP_KERNEL);
+ dd->cspec->sendibchk = bitmap_zalloc(sbufcnt, GFP_KERNEL);
if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk ||
!dd->cspec->sendibchk) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index d1a72e89e297..45211008449f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1106,8 +1106,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
if (!qib_cpulist_count) {
u32 count = num_online_cpus();
- qib_cpulist = kcalloc(BITS_TO_LONGS(count), sizeof(long),
- GFP_KERNEL);
+ qib_cpulist = bitmap_zalloc(count, GFP_KERNEL);
if (qib_cpulist)
qib_cpulist_count = count;
}
@@ -1279,7 +1278,7 @@ static void __exit qib_ib_cleanup(void)
#endif
qib_cpulist_count = 0;
- kfree(qib_cpulist);
+ bitmap_free(qib_cpulist);
WARN_ON(!xa_empty(&qib_dev_table));
qib_dev_cleanup();
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index 81b810d006c0..1dc3ccf0cf1f 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -587,7 +587,7 @@ static int epb_access(struct qib_devdata *dd, int sdnum, int claim)
/* Need to release */
u64 pollval;
/*
- * The only writeable bits are the request and CS.
+ * The only writable bits are the request and CS.
* Both should be clear
*/
u64 newval = 0;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index e212929369df..67a1b4562dc2 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -482,7 +482,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
if (err)
goto out_free_dev;
- if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
+ if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
usnic_err("IOMMU of %s does not support cache coherency\n",
dev_name(dev));
err = -EINVAL;
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 642b52539ac3..b1a0ab3cd4bd 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -19,16 +19,16 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
}
if (cqe > rxe->attr.max_cqe) {
- pr_warn("cqe(%d) > max_cqe(%d)\n",
- cqe, rxe->attr.max_cqe);
+ pr_debug("cqe(%d) > max_cqe(%d)\n",
+ cqe, rxe->attr.max_cqe);
goto err1;
}
if (cq) {
count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (cqe < count) {
- pr_warn("cqe(%d) < current # elements in queue (%d)",
- cqe, count);
+ pr_debug("cqe(%d) < current # elements in queue (%d)",
+ cqe, count);
goto err1;
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 0e022ae1b8a5..336164843db4 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -145,7 +145,7 @@ static inline int rcv_wqe_size(int max_sge)
max_sge * sizeof(struct ib_sge);
}
-void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res);
+void free_rd_atomic_resource(struct resp_res *res);
static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
{
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index fc3942e04a1f..9a5c2af6a56f 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -687,7 +687,7 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (atomic_read(&mr->num_mw) > 0)
return -EINVAL;
- rxe_put(mr);
+ rxe_cleanup(mr);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 2e1fa844fabf..bb6a1edaaee8 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -33,6 +33,8 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
spin_lock_init(&mw->lock);
+ rxe_finalize(mw);
+
return 0;
}
@@ -40,7 +42,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
{
struct rxe_mw *mw = to_rmw(ibmw);
- rxe_put(mw);
+ rxe_cleanup(mw);
return 0;
}
@@ -113,7 +115,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
(IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
!(mr->access & IB_ACCESS_LOCAL_WRITE))) {
pr_err_once(
- "attempt to bind an writeable MW to an MR without local write access\n");
+ "attempt to bind an writable MW to an MR without local write access\n");
return -EINVAL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 19b14826385b..f50620f5a0a1 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -6,6 +6,7 @@
#include "rxe.h"
+#define RXE_POOL_TIMEOUT (200)
#define RXE_POOL_ALIGN (16)
static const struct rxe_type_info {
@@ -136,10 +137,14 @@ void *rxe_alloc(struct rxe_pool *pool)
elem->pool = pool;
elem->obj = obj;
kref_init(&elem->ref_cnt);
+ init_completion(&elem->complete);
- err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
+ /* allocate index in array but leave pointer as NULL so it
+ * can't be looked up until rxe_finalize() is called
+ */
+ err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
&pool->next, GFP_KERNEL);
- if (err)
+ if (err < 0)
goto err_free;
return obj;
@@ -151,9 +156,11 @@ err_cnt:
return NULL;
}
-int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
+ bool sleepable)
{
int err;
+ gfp_t gfp_flags;
if (WARN_ON(pool->type == RXE_TYPE_MR))
return -EINVAL;
@@ -164,10 +171,19 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
elem->pool = pool;
elem->obj = (u8 *)elem - pool->elem_offset;
kref_init(&elem->ref_cnt);
-
- err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
- &pool->next, GFP_KERNEL);
- if (err)
+ init_completion(&elem->complete);
+
+ /* AH objects are unique in that the create_ah verb
+ * can be called in atomic context. If the create_ah
+ * call is not sleepable use GFP_ATOMIC.
+ */
+ gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC;
+
+ if (sleepable)
+ might_sleep();
+ err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
+ &pool->next, gfp_flags);
+ if (err < 0)
goto err_cnt;
return 0;
@@ -181,16 +197,15 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
{
struct rxe_pool_elem *elem;
struct xarray *xa = &pool->xa;
- unsigned long flags;
void *obj;
- xa_lock_irqsave(xa, flags);
+ rcu_read_lock();
elem = xa_load(xa, index);
if (elem && kref_get_unless_zero(&elem->ref_cnt))
obj = elem->obj;
else
obj = NULL;
- xa_unlock_irqrestore(xa, flags);
+ rcu_read_unlock();
return obj;
}
@@ -198,17 +213,74 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
static void rxe_elem_release(struct kref *kref)
{
struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
- struct rxe_pool *pool = elem->pool;
- xa_erase(&pool->xa, elem->index);
+ complete(&elem->complete);
+}
+
+int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable)
+{
+ struct rxe_pool *pool = elem->pool;
+ struct xarray *xa = &pool->xa;
+ static int timeout = RXE_POOL_TIMEOUT;
+ int ret, err = 0;
+ void *xa_ret;
+
+ if (sleepable)
+ might_sleep();
+
+ /* erase xarray entry to prevent looking up
+ * the pool elem from its index
+ */
+ xa_ret = xa_erase(xa, elem->index);
+ WARN_ON(xa_err(xa_ret));
+
+ /* if this is the last call to rxe_put complete the
+ * object. It is safe to touch obj->elem after this since
+ * it is freed below
+ */
+ __rxe_put(elem);
+
+ /* wait until all references to the object have been
+ * dropped before final object specific cleanup and
+ * return to rdma-core
+ */
+ if (sleepable) {
+ if (!completion_done(&elem->complete) && timeout) {
+ ret = wait_for_completion_timeout(&elem->complete,
+ timeout);
+
+ /* Shouldn't happen. There are still references to
+ * the object but, rather than deadlock, free the
+ * object or pass back to rdma-core.
+ */
+ if (WARN_ON(!ret))
+ err = -EINVAL;
+ }
+ } else {
+ unsigned long until = jiffies + timeout;
+
+ /* AH objects are unique in that the destroy_ah verb
+ * can be called in atomic context. This delay
+ * replaces the wait_for_completion call above
+ * when the destroy_ah call is not sleepable
+ */
+ while (!completion_done(&elem->complete) &&
+ time_before(jiffies, until))
+ mdelay(1);
+
+ if (WARN_ON(!completion_done(&elem->complete)))
+ err = -EINVAL;
+ }
if (pool->cleanup)
pool->cleanup(elem);
if (pool->type == RXE_TYPE_MR)
- kfree(elem->obj);
+ kfree_rcu(elem->obj);
atomic_dec(&pool->num_elem);
+
+ return err;
}
int __rxe_get(struct rxe_pool_elem *elem)
@@ -220,3 +292,11 @@ int __rxe_put(struct rxe_pool_elem *elem)
{
return kref_put(&elem->ref_cnt, rxe_elem_release);
}
+
+void __rxe_finalize(struct rxe_pool_elem *elem)
+{
+ void *xa_ret;
+
+ xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL);
+ WARN_ON(xa_err(xa_ret));
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 0860660d65ec..9d83cb32092f 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -24,6 +24,7 @@ struct rxe_pool_elem {
void *obj;
struct kref ref_cnt;
struct list_head list;
+ struct completion complete;
u32 index;
};
@@ -57,21 +58,28 @@ void rxe_pool_cleanup(struct rxe_pool *pool);
void *rxe_alloc(struct rxe_pool *pool);
/* connect already allocated object to pool */
-int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem);
-
-#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem)
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
+ bool sleepable);
+#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem, true)
+#define rxe_add_to_pool_ah(pool, obj, sleepable) __rxe_add_to_pool(pool, \
+ &(obj)->elem, sleepable)
/* lookup an indexed object from index. takes a reference on object */
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
int __rxe_get(struct rxe_pool_elem *elem);
-
#define rxe_get(obj) __rxe_get(&(obj)->elem)
int __rxe_put(struct rxe_pool_elem *elem);
-
#define rxe_put(obj) __rxe_put(&(obj)->elem)
+int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable);
+#define rxe_cleanup(obj) __rxe_cleanup(&(obj)->elem, true)
+#define rxe_cleanup_ah(obj, sleepable) __rxe_cleanup(&(obj)->elem, sleepable)
+
#define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt)
+void __rxe_finalize(struct rxe_pool_elem *elem);
+#define rxe_finalize(obj) __rxe_finalize(&(obj)->elem)
+
#endif /* RXE_POOL_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 22e9b85344c3..65d75eea460f 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -120,17 +120,15 @@ static void free_rd_atomic_resources(struct rxe_qp *qp)
for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
struct resp_res *res = &qp->resp.resources[i];
- free_rd_atomic_resource(qp, res);
+ free_rd_atomic_resource(res);
}
kfree(qp->resp.resources);
qp->resp.resources = NULL;
}
}
-void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
+void free_rd_atomic_resource(struct resp_res *res)
{
- if (res->type == RXE_ATOMIC_MASK)
- kfree_skb(res->atomic.skb);
res->type = 0;
}
@@ -142,7 +140,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp)
if (qp->resp.resources) {
for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
res = &qp->resp.resources[i];
- free_rd_atomic_resource(qp, res);
+ free_rd_atomic_resource(res);
}
}
}
@@ -804,13 +802,15 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
if (qp->rq.queue)
rxe_queue_cleanup(qp->rq.queue);
- atomic_dec(&qp->scq->num_wq);
- if (qp->scq)
+ if (qp->scq) {
+ atomic_dec(&qp->scq->num_wq);
rxe_put(qp->scq);
+ }
- atomic_dec(&qp->rcq->num_wq);
- if (qp->rcq)
+ if (qp->rcq) {
+ atomic_dec(&qp->rcq->num_wq);
rxe_put(qp->rcq);
+ }
if (qp->pd)
rxe_put(qp->pd);
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 6227112ef7a2..ed44042782fa 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -7,9 +7,6 @@
#ifndef RXE_QUEUE_H
#define RXE_QUEUE_H
-/* for definition of shared struct rxe_queue_buf */
-#include <uapi/rdma/rdma_user_rxe.h>
-
/* Implements a simple circular buffer that is shared between user
* and the driver and can be resized. The requested element size is
* rounded up to a power of 2 and the number of elements in the buffer
@@ -53,6 +50,8 @@ enum queue_type {
QUEUE_TYPE_FROM_DRIVER,
};
+struct rxe_queue_buf;
+
struct rxe_queue {
struct rxe_dev *rxe;
struct rxe_queue_buf *buf;
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 9d98237389cf..dec4ddaca70f 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -15,8 +15,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
u32 opcode);
static inline void retry_first_write_send(struct rxe_qp *qp,
- struct rxe_send_wqe *wqe,
- unsigned int mask, int npsn)
+ struct rxe_send_wqe *wqe, int npsn)
{
int i;
@@ -83,7 +82,7 @@ static void req_retry(struct rxe_qp *qp)
if (mask & WR_WRITE_OR_SEND_MASK) {
npsn = (qp->comp.psn - wqe->first_psn) &
BTH_PSN_MASK;
- retry_first_write_send(qp, wqe, mask, npsn);
+ retry_first_write_send(qp, wqe, npsn);
}
if (mask & WR_READ_MASK) {
@@ -581,9 +580,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
wqe->status = IB_WC_SUCCESS;
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
- if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- qp->sq_sig_type == IB_SIGNAL_ALL_WR)
- rxe_run_task(&qp->comp.task, 1);
+ /* There is no ack coming for local work requests
+ * which can lead to a deadlock. So go ahead and complete
+ * it now.
+ */
+ rxe_run_task(&qp->comp.task, 1);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index f4f6ee5d81fe..28033849d404 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -21,6 +21,7 @@ enum resp_states {
RESPST_CHK_RKEY,
RESPST_EXECUTE,
RESPST_READ_REPLY,
+ RESPST_ATOMIC_REPLY,
RESPST_COMPLETE,
RESPST_ACKNOWLEDGE,
RESPST_CLEANUP,
@@ -55,6 +56,7 @@ static char *resp_state_name[] = {
[RESPST_CHK_RKEY] = "CHK_RKEY",
[RESPST_EXECUTE] = "EXECUTE",
[RESPST_READ_REPLY] = "READ_REPLY",
+ [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
[RESPST_COMPLETE] = "COMPLETE",
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
[RESPST_CLEANUP] = "CLEANUP",
@@ -448,7 +450,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (rkey_is_mw(rkey)) {
mw = rxe_lookup_mw(qp, access, rkey);
if (!mw) {
- pr_err("%s: no MW matches rkey %#x\n", __func__, rkey);
+ pr_debug("%s: no MW matches rkey %#x\n",
+ __func__, rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -468,7 +471,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
- pr_err("%s: no MR matches rkey %#x\n", __func__, rkey);
+ pr_debug("%s: no MR matches rkey %#x\n",
+ __func__, rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -549,49 +553,106 @@ out:
return rc;
}
+static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ int type)
+{
+ struct resp_res *res;
+ u32 pkts;
+
+ res = &qp->resp.resources[qp->resp.res_head];
+ rxe_advance_resp_resource(qp);
+ free_rd_atomic_resource(res);
+
+ res->type = type;
+ res->replay = 0;
+
+ switch (type) {
+ case RXE_READ_MASK:
+ res->read.va = qp->resp.va + qp->resp.offset;
+ res->read.va_org = qp->resp.va + qp->resp.offset;
+ res->read.resid = qp->resp.resid;
+ res->read.length = qp->resp.resid;
+ res->read.rkey = qp->resp.rkey;
+
+ pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
+ res->first_psn = pkt->psn;
+ res->cur_psn = pkt->psn;
+ res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
+
+ res->state = rdatm_res_state_new;
+ break;
+ case RXE_ATOMIC_MASK:
+ res->first_psn = pkt->psn;
+ res->last_psn = pkt->psn;
+ res->cur_psn = pkt->psn;
+ break;
+ }
+
+ return res;
+}
+
/* Guarantee atomicity of atomic operations at the machine level. */
static DEFINE_SPINLOCK(atomic_ops_lock);
-static enum resp_states process_atomic(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt)
+static enum resp_states atomic_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
{
u64 *vaddr;
enum resp_states ret;
struct rxe_mr *mr = qp->resp.mr;
+ struct resp_res *res = qp->resp.res;
+ u64 value;
- if (mr->state != RXE_MR_STATE_VALID) {
- ret = RESPST_ERR_RKEY_VIOLATION;
- goto out;
+ if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
+ qp->resp.res = res;
}
- vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64));
+ if (!res->replay) {
+ if (mr->state != RXE_MR_STATE_VALID) {
+ ret = RESPST_ERR_RKEY_VIOLATION;
+ goto out;
+ }
- /* check vaddr is 8 bytes aligned. */
- if (!vaddr || (uintptr_t)vaddr & 7) {
- ret = RESPST_ERR_MISALIGNED_ATOMIC;
- goto out;
- }
+ vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset,
+ sizeof(u64));
- spin_lock_bh(&atomic_ops_lock);
+ /* check vaddr is 8 bytes aligned. */
+ if (!vaddr || (uintptr_t)vaddr & 7) {
+ ret = RESPST_ERR_MISALIGNED_ATOMIC;
+ goto out;
+ }
- qp->resp.atomic_orig = *vaddr;
+ spin_lock_bh(&atomic_ops_lock);
+ res->atomic.orig_val = value = *vaddr;
- if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
- if (*vaddr == atmeth_comp(pkt))
- *vaddr = atmeth_swap_add(pkt);
- } else {
- *vaddr += atmeth_swap_add(pkt);
- }
+ if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
+ if (value == atmeth_comp(pkt))
+ value = atmeth_swap_add(pkt);
+ } else {
+ value += atmeth_swap_add(pkt);
+ }
- spin_unlock_bh(&atomic_ops_lock);
+ *vaddr = value;
+ spin_unlock_bh(&atomic_ops_lock);
+
+ qp->resp.msn++;
- ret = RESPST_NONE;
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+ }
+
+ ret = RESPST_ACKNOWLEDGE;
out:
return ret;
}
static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt,
struct rxe_pkt_info *ack,
int opcode,
int payload,
@@ -629,7 +690,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
}
if (ack->mask & RXE_ATMACK_MASK)
- atmack_set_orig(ack, qp->resp.atomic_orig);
+ atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
err = rxe_prepare(&qp->pri_av, ack, skb);
if (err) {
@@ -640,34 +701,6 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
return skb;
}
-static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt)
-{
- struct resp_res *res;
- u32 pkts;
-
- res = &qp->resp.resources[qp->resp.res_head];
- rxe_advance_resp_resource(qp);
- free_rd_atomic_resource(qp, res);
-
- res->type = RXE_READ_MASK;
- res->replay = 0;
- res->read.va = qp->resp.va + qp->resp.offset;
- res->read.va_org = qp->resp.va + qp->resp.offset;
- res->read.resid = qp->resp.resid;
- res->read.length = qp->resp.resid;
- res->read.rkey = qp->resp.rkey;
-
- pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
- res->first_psn = pkt->psn;
- res->cur_psn = pkt->psn;
- res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
-
- res->state = rdatm_res_state_new;
-
- return res;
-}
-
/**
* rxe_recheck_mr - revalidate MR from rkey and get a reference
* @qp: the qp
@@ -738,7 +771,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
struct rxe_mr *mr;
if (!res) {
- res = rxe_prepare_read_res(qp, req_pkt);
+ res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
qp->resp.res = res;
}
@@ -771,7 +804,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
payload = min_t(int, res->read.resid, mtu);
- skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload,
+ skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
res->cur_psn, AETH_ACK_UNLIMITED);
if (!skb)
return RESPST_ERR_RNR;
@@ -858,9 +891,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp->resp.msn++;
return RESPST_READ_REPLY;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
- err = process_atomic(qp, pkt);
- if (err)
- return err;
+ return RESPST_ATOMIC_REPLY;
} else {
/* Unreachable */
WARN_ON_ONCE(1);
@@ -997,14 +1028,13 @@ finish:
return RESPST_CLEANUP;
}
-static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
- u8 syndrome, u32 psn)
+static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
{
int err = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
+ skb = prepare_ack_packet(qp, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
0, psn, syndrome);
if (!skb) {
err = -ENOMEM;
@@ -1019,40 +1049,29 @@ err1:
return err;
}
-static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
- u8 syndrome)
+static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
{
- int rc = 0;
+ int err = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- struct resp_res *res;
- skb = prepare_ack_packet(qp, pkt, &ack_pkt,
- IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn,
- syndrome);
+ skb = prepare_ack_packet(qp, &ack_pkt, IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE,
+ 0, psn, syndrome);
if (!skb) {
- rc = -ENOMEM;
+ err = -ENOMEM;
goto out;
}
- res = &qp->resp.resources[qp->resp.res_head];
- free_rd_atomic_resource(qp, res);
- rxe_advance_resp_resource(qp);
-
- skb_get(skb);
- res->type = RXE_ATOMIC_MASK;
- res->atomic.skb = skb;
- res->first_psn = ack_pkt.psn;
- res->last_psn = ack_pkt.psn;
- res->cur_psn = ack_pkt.psn;
+ err = rxe_xmit_packet(qp, &ack_pkt, skb);
+ if (err)
+ pr_err_ratelimited("Failed sending atomic ack\n");
- rc = rxe_xmit_packet(qp, &ack_pkt, skb);
- if (rc) {
- pr_err_ratelimited("Failed sending ack\n");
- rxe_put(qp);
- }
+ /* have to clear this since it is used to trigger
+ * long read replies
+ */
+ qp->resp.res = NULL;
out:
- return rc;
+ return err;
}
static enum resp_states acknowledge(struct rxe_qp *qp,
@@ -1062,11 +1081,11 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
return RESPST_CLEANUP;
if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
- send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn);
+ send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
else if (pkt->mask & RXE_ATOMIC_MASK)
- send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED);
+ send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (bth_ack(pkt))
- send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn);
+ send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
return RESPST_CLEANUP;
}
@@ -1119,7 +1138,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
if (pkt->mask & RXE_SEND_MASK ||
pkt->mask & RXE_WRITE_MASK) {
/* SEND. Ack again and cleanup. C9-105. */
- send_ack(qp, pkt, AETH_ACK_UNLIMITED, prev_psn);
+ send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
return RESPST_CLEANUP;
} else if (pkt->mask & RXE_READ_MASK) {
struct resp_res *res;
@@ -1173,14 +1192,11 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
/* Find the operation in our list of responder resources. */
res = find_resource(qp, pkt->psn);
if (res) {
- skb_get(res->atomic.skb);
- /* Resend the result. */
- rc = rxe_xmit_packet(qp, pkt, res->atomic.skb);
- if (rc) {
- pr_err("Failed resending result. This flow is not handled - skb ignored\n");
- rc = RESPST_CLEANUP;
- goto out;
- }
+ res->replay = 1;
+ res->cur_psn = pkt->psn;
+ qp->resp.res = res;
+ rc = RESPST_ATOMIC_REPLY;
+ goto out;
}
/* Resource not found. Class D error. Drop the request. */
@@ -1316,6 +1332,9 @@ int rxe_responder(void *arg)
case RESPST_READ_REPLY:
state = read_reply(qp, pkt);
break;
+ case RESPST_ATOMIC_REPLY:
+ state = atomic_reply(qp, pkt);
+ break;
case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt);
break;
@@ -1327,7 +1346,7 @@ int rxe_responder(void *arg)
break;
case RESPST_ERR_PSN_OUT_OF_SEQ:
/* RC only - Class B. Drop packet. */
- send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
+ send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
state = RESPST_CLEANUP;
break;
@@ -1349,7 +1368,7 @@ int rxe_responder(void *arg)
if (qp_type(qp) == IB_QPT_RC) {
rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
/* RC - class B */
- send_ack(qp, pkt, AETH_RNR_NAK |
+ send_ack(qp, AETH_RNR_NAK |
(~AETH_TYPE_MASK &
qp->attr.min_rnr_timer),
pkt->psn);
@@ -1438,7 +1457,7 @@ int rxe_responder(void *arg)
case RESPST_ERROR:
qp->resp.goto_error = 0;
- pr_warn("qp#%d moved to error state\n", qp_num(qp));
+ pr_debug("qp#%d moved to error state\n", qp_num(qp));
rxe_qp_error(qp);
goto exit;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9d995854a174..151c6280abd5 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -115,7 +115,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
{
struct rxe_ucontext *uc = to_ruc(ibuc);
- rxe_put(uc);
+ rxe_cleanup(uc);
}
static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
@@ -149,7 +149,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rxe_pd *pd = to_rpd(ibpd);
- rxe_put(pd);
+ rxe_cleanup(pd);
return 0;
}
@@ -176,7 +176,8 @@ static int rxe_create_ah(struct ib_ah *ibah,
if (err)
return err;
- err = rxe_add_to_pool(&rxe->ah_pool, ah);
+ err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
+ init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
if (err)
return err;
@@ -188,7 +189,7 @@ static int rxe_create_ah(struct ib_ah *ibah,
err = copy_to_user(&uresp->ah_num, &ah->ah_num,
sizeof(uresp->ah_num));
if (err) {
- rxe_put(ah);
+ rxe_cleanup(ah);
return -EFAULT;
}
} else if (ah->is_user) {
@@ -197,6 +198,8 @@ static int rxe_create_ah(struct ib_ah *ibah,
}
rxe_init_av(init_attr->ah_attr, &ah->av);
+ rxe_finalize(ah);
+
return 0;
}
@@ -228,7 +231,8 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);
- rxe_put(ah);
+ rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);
+
return 0;
}
@@ -308,12 +312,13 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
if (err)
- goto err_put;
+ goto err_cleanup;
return 0;
-err_put:
- rxe_put(srq);
+err_cleanup:
+ rxe_cleanup(srq);
+
return err;
}
@@ -362,7 +367,7 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rxe_srq *srq = to_rsrq(ibsrq);
- rxe_put(srq);
+ rxe_cleanup(srq);
return 0;
}
@@ -429,10 +434,11 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
if (err)
goto qp_init;
+ rxe_finalize(qp);
return 0;
qp_init:
- rxe_put(qp);
+ rxe_cleanup(qp);
return err;
}
@@ -485,7 +491,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (ret)
return ret;
- rxe_put(qp);
+ rxe_cleanup(qp);
return 0;
}
@@ -803,7 +809,7 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
rxe_cq_disable(cq);
- rxe_put(cq);
+ rxe_cleanup(cq);
return 0;
}
@@ -898,6 +904,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
rxe_get(pd);
rxe_mr_init_dma(pd, access, mr);
+ rxe_finalize(mr);
return &mr->ibmr;
}
@@ -926,11 +933,13 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
if (err)
goto err3;
+ rxe_finalize(mr);
+
return &mr->ibmr;
err3:
rxe_put(pd);
- rxe_put(mr);
+ rxe_cleanup(mr);
err2:
return ERR_PTR(err);
}
@@ -958,11 +967,13 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
if (err)
goto err2;
+ rxe_finalize(mr);
+
return &mr->ibmr;
err2:
rxe_put(pd);
- rxe_put(mr);
+ rxe_cleanup(mr);
err1:
return ERR_PTR(err);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index ac464e68c923..628e40c1714b 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -9,7 +9,6 @@
#include <linux/interrupt.h>
#include <linux/workqueue.h>
-#include <rdma/rdma_user_rxe.h>
#include "rxe_pool.h"
#include "rxe_task.h"
#include "rxe_hw_counters.h"
@@ -155,7 +154,7 @@ struct resp_res {
union {
struct {
- struct sk_buff *skb;
+ u64 orig_val;
} atomic;
struct {
u64 va_org;
@@ -189,7 +188,6 @@ struct rxe_resp_info {
u32 resid;
u32 rkey;
u32 length;
- u64 atomic_orig;
/* SRQ only */
struct {
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 17f34d584cd9..f88d2971c2c6 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -725,11 +725,11 @@ static int siw_proc_mpareply(struct siw_cep *cep)
enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR;
rv = siw_recv_mpa_rr(cep);
- if (rv != -EAGAIN)
- siw_cancel_mpatimer(cep);
if (rv)
goto out_err;
+ siw_cancel_mpatimer(cep);
+
rep = &cep->mpa.hdr;
if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) {
@@ -895,7 +895,8 @@ static int siw_proc_mpareply(struct siw_cep *cep)
}
out_err:
- siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
+ if (rv != -EAGAIN)
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
return rv;
}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 09316072b789..8dedae7ae79e 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1167,7 +1167,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
err_out:
siw_dbg(base_cq->device, "CQ creation failed: %d", rv);
- if (cq && cq->queue) {
+ if (cq->queue) {
struct siw_ucontext *ctx =
rdma_udata_to_drv_context(udata, struct siw_ucontext,
base_ucontext);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2a8961b685c2..a4904371e2db 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1664,8 +1664,10 @@ static void ipoib_napi_add(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC);
- netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE);
+ netif_napi_add_weight(dev, &priv->recv_napi, ipoib_rx_poll,
+ IPOIB_NUM_WC);
+ netif_napi_add_weight(dev, &priv->send_napi, ipoib_tx_poll,
+ MAX_SEND_CQE);
}
static void ipoib_napi_del(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index c08f2d9133b6..a00ca117303a 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -246,6 +246,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
device = ib_conn->device;
ib_dev = device->ib_device;
+ /* +1 for drain */
if (ib_conn->pi_support)
max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
else
@@ -267,7 +268,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
init_attr.qp_context = (void *)ib_conn;
init_attr.send_cq = ib_conn->cq;
init_attr.recv_cq = ib_conn->cq;
- init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
+ /* +1 for drain */
+ init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS + 1;
init_attr.cap.max_send_sge = 2;
init_attr.cap.max_recv_sge = 1;
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -485,7 +487,7 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
iser_conn, err);
/* block until all flush errors are consumed */
- ib_drain_sq(ib_conn->qp);
+ ib_drain_qp(ib_conn->qp);
}
return 1;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
index 385a19846c24..1e6ffafa2db3 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
@@ -32,11 +32,7 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con)
void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats)
{
- struct rtrs_clt_stats_pcpu *s;
-
- s = get_cpu_ptr(stats->pcpu_stats);
- s->rdma.failover_cnt++;
- put_cpu_ptr(stats->pcpu_stats);
+ this_cpu_inc(stats->pcpu_stats->rdma.failover_cnt);
}
int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf)
@@ -169,12 +165,8 @@ int rtrs_clt_reset_all_stats(struct rtrs_clt_stats *s, bool enable)
static inline void rtrs_clt_update_rdma_stats(struct rtrs_clt_stats *stats,
size_t size, int d)
{
- struct rtrs_clt_stats_pcpu *s;
-
- s = get_cpu_ptr(stats->pcpu_stats);
- s->rdma.dir[d].cnt++;
- s->rdma.dir[d].size_total += size;
- put_cpu_ptr(stats->pcpu_stats);
+ this_cpu_inc(stats->pcpu_stats->rdma.dir[d].cnt);
+ this_cpu_add(stats->pcpu_stats->rdma.dir[d].size_total, size);
}
void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir)
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 9809c3883979..baecde41d126 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -740,25 +740,25 @@ struct path_it {
struct rtrs_clt_path *(*next_path)(struct path_it *it);
};
-/**
- * list_next_or_null_rr_rcu - get next list element in round-robin fashion.
+/*
+ * rtrs_clt_get_next_path_or_null - get clt path from the list or return NULL
* @head: the head for the list.
- * @ptr: the list head to take the next element from.
- * @type: the type of the struct this is embedded in.
- * @memb: the name of the list_head within the struct.
+ * @clt_path: The element to take the next clt_path from.
*
- * Next element returned in round-robin fashion, i.e. head will be skipped,
+ * Next clt path returned in round-robin fashion, i.e. head will be skipped,
* but if list is observed as empty, NULL will be returned.
*
- * This primitive may safely run concurrently with the _rcu list-mutation
+ * This function may safely run concurrently with the _rcu list-mutation
* primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
*/
-#define list_next_or_null_rr_rcu(head, ptr, type, memb) \
-({ \
- list_next_or_null_rcu(head, ptr, type, memb) ?: \
- list_next_or_null_rcu(head, READ_ONCE((ptr)->next), \
- type, memb); \
-})
+static inline struct rtrs_clt_path *
+rtrs_clt_get_next_path_or_null(struct list_head *head, struct rtrs_clt_path *clt_path)
+{
+ return list_next_or_null_rcu(head, &clt_path->s.entry, typeof(*clt_path), s.entry) ?:
+ list_next_or_null_rcu(head,
+ READ_ONCE((&clt_path->s.entry)->next),
+ typeof(*clt_path), s.entry);
+}
/**
* get_next_path_rr() - Returns path in round-robin fashion.
@@ -789,10 +789,8 @@ static struct rtrs_clt_path *get_next_path_rr(struct path_it *it)
path = list_first_or_null_rcu(&clt->paths_list,
typeof(*path), s.entry);
else
- path = list_next_or_null_rr_rcu(&clt->paths_list,
- &path->s.entry,
- typeof(*path),
- s.entry);
+ path = rtrs_clt_get_next_path_or_null(&clt->paths_list, path);
+
rcu_assign_pointer(*ppcpu_path, path);
return path;
@@ -1403,8 +1401,7 @@ static int alloc_permits(struct rtrs_clt_sess *clt)
unsigned int chunk_bits;
int err, i;
- clt->permits_map = kcalloc(BITS_TO_LONGS(clt->queue_depth),
- sizeof(long), GFP_KERNEL);
+ clt->permits_map = bitmap_zalloc(clt->queue_depth, GFP_KERNEL);
if (!clt->permits_map) {
err = -ENOMEM;
goto out_err;
@@ -1426,7 +1423,7 @@ static int alloc_permits(struct rtrs_clt_sess *clt)
return 0;
err_map:
- kfree(clt->permits_map);
+ bitmap_free(clt->permits_map);
clt->permits_map = NULL;
out_err:
return err;
@@ -1434,13 +1431,11 @@ out_err:
static void free_permits(struct rtrs_clt_sess *clt)
{
- if (clt->permits_map) {
- size_t sz = clt->queue_depth;
-
+ if (clt->permits_map)
wait_event(clt->permits_wait,
- find_first_bit(clt->permits_map, sz) >= sz);
- }
- kfree(clt->permits_map);
+ bitmap_empty(clt->permits_map, clt->queue_depth));
+
+ bitmap_free(clt->permits_map);
clt->permits_map = NULL;
kfree(clt->permits);
clt->permits = NULL;
@@ -2277,8 +2272,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_path *clt_path)
* removed. If @sess is the last element, then @next is NULL.
*/
rcu_read_lock();
- next = list_next_or_null_rr_rcu(&clt->paths_list, &clt_path->s.entry,
- typeof(*next), s.entry);
+ next = rtrs_clt_get_next_path_or_null(&clt->paths_list, clt_path);
rcu_read_unlock();
/*
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 9a1e5c2ae55c..ac0df734eba8 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -23,6 +23,17 @@
#define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
__stringify(RTRS_PROTO_VER_MINOR)
+/*
+ * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
+ * and the minimum chunk size is 4096 (2^12).
+ * So the maximum sess_queue_depth is 65536 (2^16) in theory.
+ * But mempool_create, create_qp and ib_post_send fail with
+ * "cannot allocate memory" error if sess_queue_depth is too big.
+ * Therefore the pratical max value of sess_queue_depth is
+ * somewhere between 1 and 65534 and it depends on the system.
+ */
+#define MAX_SESS_QUEUE_DEPTH 65535
+
enum rtrs_imm_const {
MAX_IMM_TYPE_BITS = 4,
MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
@@ -46,16 +57,6 @@ enum {
MAX_PATHS_NUM = 128,
- /*
- * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
- * and the minimum chunk size is 4096 (2^12).
- * So the maximum sess_queue_depth is 65536 (2^16) in theory.
- * But mempool_create, create_qp and ib_post_send fail with
- * "cannot allocate memory" error if sess_queue_depth is too big.
- * Therefore the pratical max value of sess_queue_depth is
- * somewhere between 1 and 65534 and it depends on the system.
- */
- MAX_SESS_QUEUE_DEPTH = 65535,
MIN_CHUNK_SIZE = 8192,
RTRS_HB_INTERVAL_MS = 5000,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
index 44b1c1652131..2aff1213a19d 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
@@ -14,9 +14,14 @@
int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable)
{
if (enable) {
- struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats;
+ int cpu;
+ struct rtrs_srv_stats_rdma_stats *r;
+
+ for_each_possible_cpu(cpu) {
+ r = per_cpu_ptr(stats->rdma_stats, cpu);
+ memset(r, 0, sizeof(*r));
+ }
- memset(r, 0, sizeof(*r));
return 0;
}
@@ -25,11 +30,22 @@ int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable)
ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page)
{
- struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats;
+ int cpu;
+ struct rtrs_srv_stats_rdma_stats sum;
+ struct rtrs_srv_stats_rdma_stats *r;
+
+ memset(&sum, 0, sizeof(sum));
+
+ for_each_possible_cpu(cpu) {
+ r = per_cpu_ptr(stats->rdma_stats, cpu);
+
+ sum.dir[READ].cnt += r->dir[READ].cnt;
+ sum.dir[READ].size_total += r->dir[READ].size_total;
+ sum.dir[WRITE].cnt += r->dir[WRITE].cnt;
+ sum.dir[WRITE].size_total += r->dir[WRITE].size_total;
+ }
- return sysfs_emit(page, "%lld %lld %lld %lldn %u\n",
- (s64)atomic64_read(&r->dir[READ].cnt),
- (s64)atomic64_read(&r->dir[READ].size_total),
- (s64)atomic64_read(&r->dir[WRITE].cnt),
- (s64)atomic64_read(&r->dir[WRITE].size_total), 0);
+ return sysfs_emit(page, "%llu %llu %llu %llu\n",
+ sum.dir[READ].cnt, sum.dir[READ].size_total,
+ sum.dir[WRITE].cnt, sum.dir[WRITE].size_total);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index b94ae12c2795..2a3c9ac64a42 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -220,6 +220,8 @@ static void rtrs_srv_path_stats_release(struct kobject *kobj)
stats = container_of(kobj, struct rtrs_srv_stats, kobj_stats);
+ free_percpu(stats->rdma_stats);
+
kfree(stats);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 24024bce2566..34c03bde5064 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -11,7 +11,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
#include <linux/module.h>
-#include <linux/mempool.h>
#include "rtrs-srv.h"
#include "rtrs-log.h"
@@ -26,11 +25,7 @@ MODULE_LICENSE("GPL");
#define DEFAULT_SESS_QUEUE_DEPTH 512
#define MAX_HDR_SIZE PAGE_SIZE
-/* We guarantee to serve 10 paths at least */
-#define CHUNK_POOL_SZ 10
-
static struct rtrs_rdma_dev_pd dev_pd;
-static mempool_t *chunk_pool;
struct class *rtrs_dev_class;
static struct rtrs_srv_ib_ctx ib_ctx;
@@ -1358,7 +1353,7 @@ static void free_srv(struct rtrs_srv_sess *srv)
WARN_ON(refcount_read(&srv->refcount));
for (i = 0; i < srv->queue_depth; i++)
- mempool_free(srv->chunks[i], chunk_pool);
+ __free_pages(srv->chunks[i], get_order(max_chunk_size));
kfree(srv->chunks);
mutex_destroy(&srv->paths_mutex);
mutex_destroy(&srv->paths_ev_mutex);
@@ -1411,7 +1406,8 @@ static struct rtrs_srv_sess *get_or_create_srv(struct rtrs_srv_ctx *ctx,
goto err_free_srv;
for (i = 0; i < srv->queue_depth; i++) {
- srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL);
+ srv->chunks[i] = alloc_pages(GFP_KERNEL,
+ get_order(max_chunk_size));
if (!srv->chunks[i])
goto err_free_chunks;
}
@@ -1424,7 +1420,7 @@ static struct rtrs_srv_sess *get_or_create_srv(struct rtrs_srv_ctx *ctx,
err_free_chunks:
while (i--)
- mempool_free(srv->chunks[i], chunk_pool);
+ __free_pages(srv->chunks[i], get_order(max_chunk_size));
kfree(srv->chunks);
err_free_srv:
@@ -1513,6 +1509,7 @@ static void free_path(struct rtrs_srv_path *srv_path)
kobject_del(&srv_path->kobj);
kobject_put(&srv_path->kobj);
} else {
+ free_percpu(srv_path->stats->rdma_stats);
kfree(srv_path->stats);
kfree(srv_path);
}
@@ -1755,13 +1752,17 @@ static struct rtrs_srv_path *__alloc_path(struct rtrs_srv_sess *srv,
if (!srv_path->stats)
goto err_free_sess;
+ srv_path->stats->rdma_stats = alloc_percpu(struct rtrs_srv_stats_rdma_stats);
+ if (!srv_path->stats->rdma_stats)
+ goto err_free_stats;
+
srv_path->stats->srv_path = srv_path;
srv_path->dma_addr = kcalloc(srv->queue_depth,
sizeof(*srv_path->dma_addr),
GFP_KERNEL);
if (!srv_path->dma_addr)
- goto err_free_stats;
+ goto err_free_percpu;
srv_path->s.con = kcalloc(con_num, sizeof(*srv_path->s.con),
GFP_KERNEL);
@@ -1813,6 +1814,8 @@ err_free_con:
kfree(srv_path->s.con);
err_free_dma_addr:
kfree(srv_path->dma_addr);
+err_free_percpu:
+ free_percpu(srv_path->stats->rdma_stats);
err_free_stats:
kfree(srv_path->stats);
err_free_sess:
@@ -2266,14 +2269,10 @@ static int __init rtrs_server_init(void)
err);
return err;
}
- chunk_pool = mempool_create_page_pool(sess_queue_depth * CHUNK_POOL_SZ,
- get_order(max_chunk_size));
- if (!chunk_pool)
- return -ENOMEM;
rtrs_dev_class = class_create(THIS_MODULE, "rtrs-server");
if (IS_ERR(rtrs_dev_class)) {
err = PTR_ERR(rtrs_dev_class);
- goto out_chunk_pool;
+ goto out_err;
}
rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0);
if (!rtrs_wq) {
@@ -2285,9 +2284,7 @@ static int __init rtrs_server_init(void)
out_dev_class:
class_destroy(rtrs_dev_class);
-out_chunk_pool:
- mempool_destroy(chunk_pool);
-
+out_err:
return err;
}
@@ -2295,7 +2292,6 @@ static void __exit rtrs_server_exit(void)
{
destroy_workqueue(rtrs_wq);
class_destroy(rtrs_dev_class);
- mempool_destroy(chunk_pool);
rtrs_rdma_dev_pd_deinit(&dev_pd);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
index 6292e87f6afd..186a63c217df 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
@@ -12,6 +12,7 @@
#include <linux/device.h>
#include <linux/refcount.h>
+#include <linux/percpu.h>
#include "rtrs-pri.h"
/*
@@ -29,15 +30,15 @@ enum rtrs_srv_state {
*/
struct rtrs_srv_stats_rdma_stats {
struct {
- atomic64_t cnt;
- atomic64_t size_total;
+ u64 cnt;
+ u64 size_total;
} dir[2];
};
struct rtrs_srv_stats {
- struct kobject kobj_stats;
- struct rtrs_srv_stats_rdma_stats rdma_stats;
- struct rtrs_srv_path *srv_path;
+ struct kobject kobj_stats;
+ struct rtrs_srv_stats_rdma_stats __percpu *rdma_stats;
+ struct rtrs_srv_path *srv_path;
};
struct rtrs_srv_con {
@@ -130,8 +131,8 @@ void close_path(struct rtrs_srv_path *srv_path);
static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s,
size_t size, int d)
{
- atomic64_inc(&s->rdma_stats.dir[d].cnt);
- atomic64_add(size, &s->rdma_stats.dir[d].size_total);
+ this_cpu_inc(s->rdma_stats->dir[d].cnt);
+ this_cpu_add(s->rdma_stats->dir[d].size_total, size);
}
/* functions which are implemented in rtrs-srv-stats.c */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index d989f030fae0..5b18e2e36ee6 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -108,6 +108,7 @@ struct rdma_cm_id {
enum rdma_ucm_port_space ps;
enum ib_qp_type qp_type;
u32 port_num;
+ struct work_struct net_work;
};
struct rdma_cm_id *