diff options
Diffstat (limited to 'drivers/infiniband/core')
| -rw-r--r-- | drivers/infiniband/core/cma.c | 34 | ||||
| -rw-r--r-- | drivers/infiniband/core/cma_priv.h | 11 | ||||
| -rw-r--r-- | drivers/infiniband/core/counters.c | 40 | ||||
| -rw-r--r-- | drivers/infiniband/core/device.c | 1 | ||||
| -rw-r--r-- | drivers/infiniband/core/iwpm_util.c | 2 | ||||
| -rw-r--r-- | drivers/infiniband/core/nldev.c | 278 | ||||
| -rw-r--r-- | drivers/infiniband/core/rw.c | 66 | ||||
| -rw-r--r-- | drivers/infiniband/core/sa_query.c | 6 | ||||
| -rw-r--r-- | drivers/infiniband/core/sysfs.c | 58 | ||||
| -rw-r--r-- | drivers/infiniband/core/umem_dmabuf.c | 54 | ||||
| -rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 3 | ||||
| -rw-r--r-- | drivers/infiniband/core/verbs.c | 49 |
12 files changed, 480 insertions, 122 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 704ce595542c..835ac54d4a24 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -453,7 +453,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); - list_add_tail(&id_priv->list, &cma_dev->id_list); + list_add_tail(&id_priv->device_item, &cma_dev->id_list); trace_cm_id_attach(id_priv, cma_dev->device); } @@ -470,7 +470,7 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); - list_del(&id_priv->list); + list_del_init(&id_priv->device_item); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; id_priv->id.device = NULL; @@ -854,6 +854,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, init_completion(&id_priv->comp); refcount_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); + INIT_LIST_HEAD(&id_priv->device_item); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); @@ -1647,7 +1648,7 @@ static struct rdma_id_private *cma_find_listener( return id_priv; list_for_each_entry(id_priv_dev, &id_priv->listen_list, - listen_list) { + listen_item) { if (id_priv_dev->id.device == cm_id->device && cma_match_net_dev(&id_priv_dev->id, net_dev, req)) @@ -1756,14 +1757,15 @@ static void _cma_cancel_listens(struct rdma_id_private *id_priv) * Remove from listen_any_list to prevent added devices from spawning * additional listen requests. */ - list_del(&id_priv->list); + list_del_init(&id_priv->listen_any_item); while (!list_empty(&id_priv->listen_list)) { - dev_id_priv = list_entry(id_priv->listen_list.next, - struct rdma_id_private, listen_list); + dev_id_priv = + list_first_entry(&id_priv->listen_list, + struct rdma_id_private, listen_item); /* sync with device removal to avoid duplicate destruction */ - list_del_init(&dev_id_priv->list); - list_del(&dev_id_priv->listen_list); + list_del_init(&dev_id_priv->device_item); + list_del_init(&dev_id_priv->listen_item); mutex_unlock(&lock); rdma_destroy_id(&dev_id_priv->id); @@ -2564,7 +2566,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) goto err_listen; - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list); return 0; err_listen: /* Caller must destroy this after releasing lock */ @@ -2580,13 +2582,13 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) int ret; mutex_lock(&lock); - list_add_tail(&id_priv->list, &listen_any_list); + list_add_tail(&id_priv->listen_any_item, &listen_any_list); list_for_each_entry(cma_dev, &dev_list, list) { ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) { /* Prevent racing with cma_process_remove() */ if (to_destroy) - list_del_init(&to_destroy->list); + list_del_init(&to_destroy->device_item); goto err_listen; } } @@ -4895,7 +4897,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) - list_for_each_entry(id_priv, &cma_dev->id_list, list) { + list_for_each_entry(id_priv, &cma_dev->id_list, device_item) { ret = cma_netdev_change(ndev, id_priv); if (ret) goto out; @@ -4955,10 +4957,10 @@ static void cma_process_remove(struct cma_device *cma_dev) mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { struct rdma_id_private *id_priv = list_first_entry( - &cma_dev->id_list, struct rdma_id_private, list); + &cma_dev->id_list, struct rdma_id_private, device_item); - list_del(&id_priv->listen_list); - list_del_init(&id_priv->list); + list_del_init(&id_priv->listen_item); + list_del_init(&id_priv->device_item); cma_id_get(id_priv); mutex_unlock(&lock); @@ -5035,7 +5037,7 @@ static int cma_add_one(struct ib_device *device) mutex_lock(&lock); list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) { + list_for_each_entry(id_priv, &listen_any_list, listen_any_item) { ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) goto free_listen; diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index f92f101ea981..757a0ef79872 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -55,8 +55,15 @@ struct rdma_id_private { struct rdma_bind_list *bind_list; struct hlist_node node; - struct list_head list; /* listen_any_list or cma_device.list */ - struct list_head listen_list; /* per device listens */ + union { + struct list_head device_item; /* On cma_device->id_list */ + struct list_head listen_any_item; /* On listen_any_list */ + }; + union { + /* On rdma_id_private->listen_list */ + struct list_head listen_item; + struct list_head listen_list; + }; struct cma_device *cma_dev; struct list_head mc_list; diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index df9e6c5e4ddf..af59486fe418 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -106,6 +106,38 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter, return ret; } +int rdma_counter_modify(struct ib_device *dev, u32 port, + unsigned int index, bool enable) +{ + struct rdma_hw_stats *stats; + int ret = 0; + + if (!dev->ops.modify_hw_stat) + return -EOPNOTSUPP; + + stats = ib_get_hw_stats_port(dev, port); + if (!stats || index >= stats->num_counters || + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) + return -EINVAL; + + mutex_lock(&stats->lock); + + if (enable != test_bit(index, stats->is_disabled)) + goto out; + + ret = dev->ops.modify_hw_stat(dev, port, index, enable); + if (ret) + goto out; + + if (enable) + clear_bit(index, stats->is_disabled); + else + set_bit(index, stats->is_disabled); +out: + mutex_unlock(&stats->lock); + return ret; +} + static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, struct ib_qp *qp, enum rdma_nl_counter_mode mode) @@ -165,7 +197,7 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, return counter; err_mode: - kfree(counter->stats); + rdma_free_hw_stats_struct(counter->stats); err_stats: rdma_restrack_put(&counter->res); kfree(counter); @@ -186,7 +218,7 @@ static void rdma_counter_free(struct rdma_counter *counter) mutex_unlock(&port_counter->lock); rdma_restrack_del(&counter->res); - kfree(counter->stats); + rdma_free_hw_stats_struct(counter->stats); kfree(counter); } @@ -618,7 +650,7 @@ void rdma_counter_init(struct ib_device *dev) fail: for (i = port; i >= rdma_start_port(dev); i--) { port_counter = &dev->port_data[port].port_counter; - kfree(port_counter->hstats); + rdma_free_hw_stats_struct(port_counter->hstats); port_counter->hstats = NULL; mutex_destroy(&port_counter->lock); } @@ -631,7 +663,7 @@ void rdma_counter_release(struct ib_device *dev) rdma_for_each_port(dev, port) { port_counter = &dev->port_data[port].port_counter; - kfree(port_counter->hstats); + rdma_free_hw_stats_struct(port_counter->hstats); mutex_destroy(&port_counter->lock); } } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f4814bb7f082..22a4adda7981 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); SET_DEVICE_OP(dev_ops, modify_flow_action_esp); + SET_DEVICE_OP(dev_ops, modify_hw_stat); SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_qp); SET_DEVICE_OP(dev_ops, modify_srq); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 54f4feb604d8..358a2db38d23 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -762,7 +762,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; - const char *err_str = ""; + const char *err_str; int ret = -EINVAL; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e9b4b2cccaa0..fedc0fa6ebf9 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -154,6 +154,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -968,14 +970,21 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, if (!table_attr) return -EMSGSIZE; - for (i = 0; i < st->num_counters; i++) - if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + mutex_lock(&st->lock); + for (i = 0; i < st->num_counters; i++) { + if (test_bit(i, st->is_disabled)) + continue; + if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, + st->value[i])) goto err; + } + mutex_unlock(&st->lock); nla_nest_end(msg, table_attr); return 0; err: + mutex_unlock(&st->lock); nla_nest_cancel(msg, table_attr); return -EMSGSIZE; } @@ -1888,24 +1897,111 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } +static int nldev_stat_set_mode_doit(struct sk_buff *msg, + struct netlink_ext_ack *extack, + struct nlattr *tb[], + struct ib_device *device, u32 port) +{ + u32 mode, mask = 0, qpn, cntn = 0; + int ret; + + /* Currently only counter for QP is supported */ + if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + return -EINVAL; + + mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); + if (mode == RDMA_COUNTER_MODE_AUTO) { + if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) + mask = nla_get_u32( + tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); + return rdma_counter_set_auto_mode(device, port, mask, extack); + } + + if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) + return -EINVAL; + + qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); + if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { + cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); + ret = rdma_counter_bind_qpn(device, port, qpn, cntn); + if (ret) + return ret; + } else { + ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn); + if (ret) + return ret; + } + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { + ret = -EMSGSIZE; + goto err_fill; + } + + return 0; + +err_fill: + rdma_counter_unbind_qpn(device, port, qpn, cntn); + return ret; +} + +static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], + struct ib_device *device, + u32 port) +{ + struct rdma_hw_stats *stats; + int rem, i, index, ret = 0; + struct nlattr *entry_attr; + unsigned long *target; + + stats = ib_get_hw_stats_port(device, port); + if (!stats) + return -EINVAL; + + target = kcalloc(BITS_TO_LONGS(stats->num_counters), + sizeof(*stats->is_disabled), GFP_KERNEL); + if (!target) + return -ENOMEM; + + nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], + rem) { + index = nla_get_u32(entry_attr); + if ((index >= stats->num_counters) || + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) { + ret = -EINVAL; + goto out; + } + + set_bit(index, target); + } + + for (i = 0; i < stats->num_counters; i++) { + if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL)) + continue; + + ret = rdma_counter_modify(device, port, i, test_bit(i, target)); + if (ret) + goto out; + } + +out: + kfree(target); + return ret; +} + static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - u32 index, port, mode, mask = 0, qpn, cntn = 0; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; struct sk_buff *msg; + u32 index, port; int ret; - ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); - /* Currently only counter for QP is supported */ - if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || - !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || - !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE]) - return -EINVAL; - - if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, + extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); @@ -1916,59 +2012,49 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; - goto err; + goto err_put_device; + } + + if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] && + !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { + ret = -EINVAL; + goto err_put_device; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_put_device; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { + ret = -EMSGSIZE; + goto err_free_msg; + } - mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); - if (mode == RDMA_COUNTER_MODE_AUTO) { - if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) - mask = nla_get_u32( - tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); - ret = rdma_counter_set_auto_mode(device, port, mask, extack); - if (ret) - goto err_msg; - } else { - if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) - goto err_msg; - qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { - cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); - ret = rdma_counter_bind_qpn(device, port, qpn, cntn); - } else { - ret = rdma_counter_bind_qpn_alloc(device, port, - qpn, &cntn); - } + if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) { + ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); if (ret) - goto err_msg; + goto err_free_msg; + } - if (fill_nldev_handle(msg, device) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { - ret = -EMSGSIZE; - goto err_fill; - } + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { + ret = nldev_stat_set_counter_dynamic_doit(tb, device, port); + if (ret) + goto err_free_msg; } nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); -err_fill: - rdma_counter_unbind_qpn(device, port, qpn, cntn); -err_msg: +err_free_msg: nlmsg_free(msg); -err: +err_put_device: ib_device_put(device); return ret; } @@ -2103,9 +2189,13 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, goto err_stats; } for (i = 0; i < num_cnts; i++) { + if (test_bit(i, stats->is_disabled)) + continue; + v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, + stats->descs[i].name, v)) { ret = -EMSGSIZE; goto err_table; } @@ -2253,6 +2343,99 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, return ret; } +static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry; + struct rdma_hw_stats *stats; + struct ib_device *device; + struct sk_buff *msg; + u32 devid, port; + int ret, i; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) + return -EINVAL; + + devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), devid); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + + stats = ib_get_hw_stats_port(device, port); + if (!stats) { + ret = -EINVAL; + goto err; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + + nlh = nlmsg_put( + msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS), + 0, 0); + + ret = -EMSGSIZE; + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) + goto err_msg; + + table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + if (!table) + goto err_msg; + + mutex_lock(&stats->lock); + for (i = 0; i < stats->num_counters; i++) { + entry = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); + if (!entry) + goto err_msg_table; + + if (nla_put_string(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, + stats->descs[i].name) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) + goto err_msg_entry; + + if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && + (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, + !test_bit(i, stats->is_disabled)))) + goto err_msg_entry; + + nla_nest_end(msg, entry); + } + mutex_unlock(&stats->lock); + + nla_nest_end(msg, table); + nlmsg_end(msg, nlh); + ib_device_put(device); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); + +err_msg_entry: + nla_nest_cancel(msg, entry); +err_msg_table: + mutex_unlock(&stats->lock); + nla_nest_cancel(msg, table); +err_msg: + nlmsg_free(msg); +err: + ib_device_put(device); + return ret; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -2342,6 +2525,9 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .dump = nldev_res_get_mr_raw_dumpit, .flags = RDMA_NL_ADMIN_PERM, }, + [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { + .doit = nldev_stat_get_counter_status_doit, + }, }; void __init nldev_init(void) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 5221cce65675..5a3bd41b331c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -282,15 +282,22 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, ib_dma_unmap_sg(dev, sg, sg_cnt, dir); } -static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, - u32 sg_cnt, enum dma_data_direction dir) +static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt, + enum dma_data_direction dir) { - if (is_pci_p2pdma_page(sg_page(sg))) { + int nents; + + if (is_pci_p2pdma_page(sg_page(sgt->sgl))) { if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) return 0; - return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl, + sgt->orig_nents, dir); + if (!nents) + return -EIO; + sgt->nents = nents; + return 0; } - return ib_dma_map_sg(dev, sg, sg_cnt, dir); + return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0); } /** @@ -313,12 +320,16 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; + struct sg_table sgt = { + .sgl = sg, + .orig_nents = sg_cnt, + }; int ret; - ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); - if (!ret) - return -ENOMEM; - sg_cnt = ret; + ret = rdma_rw_map_sgtable(dev, &sgt, dir); + if (ret) + return ret; + sg_cnt = sgt.nents; /* * Skip to the S/G entry that sg_offset falls into: @@ -354,7 +365,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, return ret; out_unmap_sg: - rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -385,6 +396,14 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, struct ib_device *dev = qp->pd->device; u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, qp->integrity_en); + struct sg_table sgt = { + .sgl = sg, + .orig_nents = sg_cnt, + }; + struct sg_table prot_sgt = { + .sgl = prot_sg, + .orig_nents = prot_sg_cnt, + }; struct ib_rdma_wr *rdma_wr; int count = 0, ret; @@ -394,18 +413,14 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -EINVAL; } - ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); - if (!ret) - return -ENOMEM; - sg_cnt = ret; + ret = rdma_rw_map_sgtable(dev, &sgt, dir); + if (ret) + return ret; if (prot_sg_cnt) { - ret = rdma_rw_map_sg(dev, prot_sg, prot_sg_cnt, dir); - if (!ret) { - ret = -ENOMEM; + ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir); + if (ret) goto out_unmap_sg; - } - prot_sg_cnt = ret; } ctx->type = RDMA_RW_SIG_MR; @@ -426,10 +441,11 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs)); - ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg, - prot_sg_cnt, NULL, SZ_4K); + ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg, + prot_sgt.nents, NULL, SZ_4K); if (unlikely(ret)) { - pr_err("failed to map PI sg (%u)\n", sg_cnt + prot_sg_cnt); + pr_err("failed to map PI sg (%u)\n", + sgt.nents + prot_sgt.nents); goto out_destroy_sig_mr; } @@ -468,10 +484,10 @@ out_destroy_sig_mr: out_free_ctx: kfree(ctx->reg); out_unmap_prot_sg: - if (prot_sg_cnt) - rdma_rw_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); + if (prot_sgt.nents) + rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir); out_unmap_sg: - rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_signature_init); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a20b8108e160..74ecd7456a11 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -706,8 +706,9 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, /* Construct the family header first */ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); - memcpy(header->device_name, dev_name(&query->port->agent->device->dev), - LS_DEVICE_NAME_MAX); + strscpy_pad(header->device_name, + dev_name(&query->port->agent->device->dev), + LS_DEVICE_NAME_MAX); header->port_num = query->port->port_num; if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && @@ -2261,7 +2262,6 @@ err1: void ib_sa_cleanup(void) { cancel_delayed_work(&ib_nl_timed_work); - flush_workqueue(ib_nl_wq); destroy_workqueue(ib_nl_wq); mcast_cleanup(); ib_unregister_client(&sa_client); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 6146c3c1cbe5..a3f84b50c46a 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -755,9 +755,9 @@ static void ib_port_release(struct kobject *kobj) for (i = 0; i != ARRAY_SIZE(port->groups); i++) kfree(port->groups[i].attrs); if (port->hw_stats_data) - kfree(port->hw_stats_data->stats); + rdma_free_hw_stats_struct(port->hw_stats_data->stats); kfree(port->hw_stats_data); - kfree(port); + kvfree(port); } static void ib_port_gid_attr_release(struct kobject *kobj) @@ -895,7 +895,7 @@ alloc_hw_stats_device(struct ib_device *ibdev) stats = ibdev->ops.alloc_hw_device_stats(ibdev); if (!stats) return ERR_PTR(-ENOMEM); - if (!stats->names || stats->num_counters <= 0) + if (!stats->descs || stats->num_counters <= 0) goto err_free_stats; /* @@ -911,7 +911,6 @@ alloc_hw_stats_device(struct ib_device *ibdev) if (!data->group.attrs) goto err_free_data; - mutex_init(&stats->lock); data->group.name = "hw_counters"; data->stats = stats; return data; @@ -919,14 +918,14 @@ alloc_hw_stats_device(struct ib_device *ibdev) err_free_data: kfree(data); err_free_stats: - kfree(stats); + rdma_free_hw_stats_struct(stats); return ERR_PTR(-ENOMEM); } void ib_device_release_hw_stats(struct hw_stats_device_data *data) { kfree(data->group.attrs); - kfree(data->stats); + rdma_free_hw_stats_struct(data->stats); kfree(data); } @@ -934,7 +933,8 @@ int ib_setup_device_attrs(struct ib_device *ibdev) { struct hw_stats_device_attribute *attr; struct hw_stats_device_data *data; - int i, ret; + bool opstat_skipped = false; + int i, ret, pos = 0; data = alloc_hw_stats_device(ibdev); if (IS_ERR(data)) { @@ -955,16 +955,23 @@ int ib_setup_device_attrs(struct ib_device *ibdev) data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { - attr = &data->attrs[i]; + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { + opstat_skipped = true; + continue; + } + + WARN_ON(opstat_skipped); + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); - attr->attr.attr.name = data->stats->names[i]; + attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_device_show; attr->show = show_hw_stats; - data->group.attrs[i] = &attr->attr.attr; + data->group.attrs[pos] = &attr->attr.attr; + pos++; } - attr = &data->attrs[i]; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; @@ -972,7 +979,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev) attr->show = show_stats_lifespan; attr->attr.store = hw_stat_device_store; attr->store = set_stats_lifespan; - data->group.attrs[i] = &attr->attr.attr; + data->group.attrs[pos] = &attr->attr.attr; for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++) if (!ibdev->groups[i]) { ibdev->groups[i] = &data->group; @@ -994,7 +1001,7 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num); if (!stats) return ERR_PTR(-ENOMEM); - if (!stats->names || stats->num_counters <= 0) + if (!stats->descs || stats->num_counters <= 0) goto err_free_stats; /* @@ -1010,7 +1017,6 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) if (!group->attrs) goto err_free_data; - mutex_init(&stats->lock); group->name = "hw_counters"; data->stats = stats; return data; @@ -1018,7 +1024,7 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) err_free_data: kfree(data); err_free_stats: - kfree(stats); + rdma_free_hw_stats_struct(stats); return ERR_PTR(-ENOMEM); } @@ -1027,7 +1033,8 @@ static int setup_hw_port_stats(struct ib_port *port, { struct hw_stats_port_attribute *attr; struct hw_stats_port_data *data; - int i, ret; + bool opstat_skipped = false; + int i, ret, pos = 0; data = alloc_hw_stats_port(port, group); if (IS_ERR(data)) @@ -1045,16 +1052,23 @@ static int setup_hw_port_stats(struct ib_port *port, data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { - attr = &data->attrs[i]; + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { + opstat_skipped = true; + continue; + } + + WARN_ON(opstat_skipped); + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); - attr->attr.attr.name = data->stats->names[i]; + attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_port_show; attr->show = show_hw_stats; - group->attrs[i] = &attr->attr.attr; + group->attrs[pos] = &attr->attr.attr; + pos++; } - attr = &data->attrs[i]; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; @@ -1062,7 +1076,7 @@ static int setup_hw_port_stats(struct ib_port *port, attr->show = show_stats_lifespan; attr->attr.store = hw_stat_port_store; attr->store = set_stats_lifespan; - group->attrs[i] = &attr->attr.attr; + group->attrs[pos] = &attr->attr.attr; port->hw_stats_data = data; return 0; @@ -1189,7 +1203,7 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, struct ib_port *p; int ret; - p = kzalloc(struct_size(p, attrs_list, + p = kvzalloc(struct_size(p, attrs_list, attr->gid_tbl_len + attr->pkey_tbl_len), GFP_KERNEL); if (!p) diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index e824baf4640d..f0760741f281 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -6,9 +6,12 @@ #include <linux/dma-buf.h> #include <linux/dma-resv.h> #include <linux/dma-mapping.h> +#include <linux/module.h> #include "uverbs.h" +MODULE_IMPORT_NS(DMA_BUF); + int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) { struct sg_table *sgt; @@ -163,12 +166,63 @@ out_release_dmabuf: } EXPORT_SYMBOL(ib_umem_dmabuf_get); +static void +ib_umem_dmabuf_unsupported_move_notify(struct dma_buf_attachment *attach) +{ + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; + + ibdev_warn_ratelimited(umem_dmabuf->umem.ibdev, + "Invalidate callback should not be called when memory is pinned\n"); +} + +static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = { + .allow_peer2peer = true, + .move_notify = ib_umem_dmabuf_unsupported_move_notify, +}; + +struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access) +{ + struct ib_umem_dmabuf *umem_dmabuf; + int err; + + umem_dmabuf = ib_umem_dmabuf_get(device, offset, size, fd, access, + &ib_umem_dmabuf_attach_pinned_ops); + if (IS_ERR(umem_dmabuf)) + return umem_dmabuf; + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = dma_buf_pin(umem_dmabuf->attach); + if (err) + goto err_release; + umem_dmabuf->pinned = 1; + + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) + goto err_unpin; + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + return umem_dmabuf; + +err_unpin: + dma_buf_unpin(umem_dmabuf->attach); +err_release: + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + ib_umem_release(&umem_dmabuf->umem); + return ERR_PTR(err); +} +EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned); + void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; dma_resv_lock(dmabuf->resv, NULL); ib_umem_dmabuf_unmap_pages(umem_dmabuf); + if (umem_dmabuf->pinned) + dma_buf_unpin(umem_dmabuf->attach); dma_resv_unlock(dmabuf->resv); dma_buf_detach(dmabuf, umem_dmabuf->attach); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 740e6b2efe0e..d1345d76d9b1 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -837,11 +837,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) new_mr->device = new_pd->device; new_mr->pd = new_pd; new_mr->type = IB_MR_TYPE_USER; - new_mr->dm = NULL; - new_mr->sig_attrs = NULL; new_mr->uobject = uobj; atomic_inc(&new_pd->usecnt); - new_mr->iova = cmd.hca_va; new_uobj->object = new_mr; rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 89a2b21976d6..692d5ff657df 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2976,3 +2976,52 @@ bool __rdma_block_iter_next(struct ib_block_iter *biter) return true; } EXPORT_SYMBOL(__rdma_block_iter_next); + +/** + * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct + * for the drivers. + * @descs: array of static descriptors + * @num_counters: number of elements in array + * @lifespan: milliseconds between updates + */ +struct rdma_hw_stats *rdma_alloc_hw_stats_struct( + const struct rdma_stat_desc *descs, int num_counters, + unsigned long lifespan) +{ + struct rdma_hw_stats *stats; + + stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL); + if (!stats) + return NULL; + + stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters), + sizeof(*stats->is_disabled), GFP_KERNEL); + if (!stats->is_disabled) + goto err; + + stats->descs = descs; + stats->num_counters = num_counters; + stats->lifespan = msecs_to_jiffies(lifespan); + mutex_init(&stats->lock); + + return stats; + +err: + kfree(stats); + return NULL; +} +EXPORT_SYMBOL(rdma_alloc_hw_stats_struct); + +/** + * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats + * @stats: statistics to release + */ +void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats) +{ + if (!stats) + return; + + kfree(stats->is_disabled); + kfree(stats); +} +EXPORT_SYMBOL(rdma_free_hw_stats_struct); |