aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile7
-rw-r--r--drivers/infiniband/core/addr.c67
-rw-r--r--drivers/infiniband/core/agent.c4
-rw-r--r--drivers/infiniband/core/cache.c205
-rw-r--r--drivers/infiniband/core/cgroup.c62
-rw-r--r--drivers/infiniband/core/cm.c117
-rw-r--r--drivers/infiniband/core/cma.c338
-rw-r--r--drivers/infiniband/core/cma_configfs.c42
-rw-r--r--drivers/infiniband/core/core_priv.h158
-rw-r--r--drivers/infiniband/core/cq.c16
-rw-r--r--drivers/infiniband/core/device.c152
-rw-r--r--drivers/infiniband/core/fmr_pool.c49
-rw-r--r--drivers/infiniband/core/iwpm_util.c6
-rw-r--r--drivers/infiniband/core/mad.c84
-rw-r--r--drivers/infiniband/core/mad_rmpp.c16
-rw-r--r--drivers/infiniband/core/multicast.c27
-rw-r--r--drivers/infiniband/core/netlink.c7
-rw-r--r--drivers/infiniband/core/rdma_core.c627
-rw-r--r--drivers/infiniband/core/rdma_core.h78
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c39
-rw-r--r--drivers/infiniband/core/sa_query.c889
-rw-r--r--drivers/infiniband/core/security.c709
-rw-r--r--drivers/infiniband/core/sysfs.c8
-rw-r--r--drivers/infiniband/core/ucm.c43
-rw-r--r--drivers/infiniband/core/ucma.c24
-rw-r--r--drivers/infiniband/core/umem.c25
-rw-r--r--drivers/infiniband/core/umem_odp.c169
-rw-r--r--drivers/infiniband/core/umem_rbtree.c21
-rw-r--r--drivers/infiniband/core/user_mad.c52
-rw-r--r--drivers/infiniband/core/uverbs.h70
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1594
-rw-r--r--drivers/infiniband/core/uverbs_main.c500
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c85
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c275
-rw-r--r--drivers/infiniband/core/verbs.c198
35 files changed, 4572 insertions, 2191 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index edaae9f9853c..e3cdafff8ece 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -10,9 +10,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
- multicast.o mad.o smi.o agent.o mad_rmpp.o
+ multicast.o mad.o smi.o agent.o mad_rmpp.o \
+ security.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
+ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
ib_cm-y := cm.o
@@ -28,4 +30,5 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
+ rdma_core.o uverbs_std_types.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0f58f46dbad7..01236cef7bfb 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -88,7 +88,7 @@ static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
return false;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
- nlmsg_len(nlh), ib_nl_addr_policy);
+ nlmsg_len(nlh), ib_nl_addr_policy, NULL);
if (ret)
return false;
@@ -179,8 +179,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
}
/* Construct the family header first */
- header = (struct rdma_ls_ip_resolve_header *)
- skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
header->ifindex = dev_addr->bound_dev_if;
nla_put(skb, attrtype, size, daddr);
@@ -269,6 +268,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_addr->bound_dev_if = dev->ifindex;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
@@ -281,6 +281,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
&((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_addr->bound_dev_if = dev->ifindex;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
@@ -406,10 +407,10 @@ static int addr4_resolve(struct sockaddr_in *src_in,
fl4.saddr = src_ip;
fl4.flowi4_oif = addr->bound_dev_if;
rt = ip_route_output_key(addr->net, &fl4);
- if (IS_ERR(rt)) {
- ret = PTR_ERR(rt);
- goto out;
- }
+ ret = PTR_ERR_OR_ZERO(rt);
+ if (ret)
+ return ret;
+
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
@@ -424,8 +425,6 @@ static int addr4_resolve(struct sockaddr_in *src_in,
*prt = rt;
return 0;
-out:
- return ret;
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -444,17 +443,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(addr->net, NULL, &fl6);
- if ((ret = dst->error))
- goto put;
+ ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
+ if (ret < 0)
+ return ret;
rt = (struct rt6_info *)dst;
- if (ipv6_addr_any(&fl6.saddr)) {
- ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
- &fl6.daddr, 0, &fl6.saddr);
- if (ret)
- goto put;
-
+ if (ipv6_addr_any(&src_in->sin6_addr)) {
src_in->sin6_family = AF_INET6;
src_in->sin6_addr = fl6.saddr;
}
@@ -471,9 +465,6 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
*pdst = dst;
return 0;
-put:
- dst_release(dst);
- return ret;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
@@ -518,6 +509,11 @@ static int addr_resolve(struct sockaddr *src_in,
struct dst_entry *dst;
int ret;
+ if (!addr->net) {
+ pr_warn_ratelimited("%s: missing namespace\n", __func__);
+ return -EINVAL;
+ }
+
if (src_in->sa_family == AF_INET) {
struct rtable *rt = NULL;
const struct sockaddr_in *dst_in4 =
@@ -531,8 +527,12 @@ static int addr_resolve(struct sockaddr *src_in,
if (resolve_neigh)
ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
- ndev = rt->dst.dev;
- dev_hold(ndev);
+ if (addr->bound_dev_if) {
+ ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
+ } else {
+ ndev = rt->dst.dev;
+ dev_hold(ndev);
+ }
ip_rt_put(rt);
} else {
@@ -548,14 +548,27 @@ static int addr_resolve(struct sockaddr *src_in,
if (resolve_neigh)
ret = addr_resolve_neigh(dst, dst_in, addr, seq);
- ndev = dst->dev;
- dev_hold(ndev);
+ if (addr->bound_dev_if) {
+ ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
+ } else {
+ ndev = dst->dev;
+ dev_hold(ndev);
+ }
dst_release(dst);
}
- addr->bound_dev_if = ndev->ifindex;
- addr->net = dev_net(ndev);
+ if (ndev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip(dst_in, addr, NULL);
+ /*
+ * Put the loopback device and get the translated
+ * device instead.
+ */
+ dev_put(ndev);
+ ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
+ } else {
+ addr->bound_dev_if = ndev->ifindex;
+ }
dev_put(ndev);
return ret;
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 11dacd97a667..324ef85a13b6 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -137,13 +137,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
err2:
ib_free_send_mad(send_buf);
err1:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index ae04826e82fc..efc94304dee3 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -53,6 +53,7 @@ struct ib_update_work {
struct work_struct work;
struct ib_device *device;
u8 port_num;
+ bool enforce_security;
};
union ib_gid zgid;
@@ -314,14 +315,13 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
int ret = 0;
struct net_device *idev;
int empty;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
@@ -369,11 +369,10 @@ out_unlock:
int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -399,12 +398,11 @@ out_unlock:
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct net_device *ndev)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
bool deleted = false;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -428,10 +426,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (index < 0 || index >= table->sz)
return -EINVAL;
@@ -455,14 +452,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
unsigned long mask,
u8 *port, u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
u8 p;
int local_index;
unsigned long flags;
for (p = 0; p < ib_dev->phys_port_cnt; p++) {
- table = ports_table[p];
+ table = ib_dev->cache.ports[p].gid;
read_lock_irqsave(&table->rwlock, flags);
local_index = find_gid(table, gid, val, false, mask, NULL);
if (local_index >= 0) {
@@ -503,18 +499,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
u16 *index)
{
int local_index;
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
unsigned long flags;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev))
+ if (!rdma_is_port_valid(ib_dev, port))
return -ENOENT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -562,21 +556,17 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
void *context,
u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned int i;
unsigned long flags;
bool found = false;
- if (!ports_table)
- return -EOPNOTSUPP;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev) ||
+ if (!rdma_is_port_valid(ib_dev, port) ||
!rdma_protocol_roce(ib_dev, port))
return -EPROTONOSUPPORT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
@@ -668,14 +658,13 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
union ib_gid gid;
struct ib_gid_attr gid_attr;
struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
unsigned int gid_type;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
make_default_gid(ndev, &gid);
memset(&gid_attr, 0, sizeof(gid_attr));
@@ -766,71 +755,64 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
- struct ib_gid_table **table;
+ struct ib_gid_table *table;
int err = 0;
- table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
-
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
- table[port] =
+ table =
alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
- if (!table[port]) {
+ if (!table) {
err = -ENOMEM;
goto rollback_table_setup;
}
err = gid_table_reserve_default(ib_dev,
port + rdma_start_port(ib_dev),
- table[port]);
+ table);
if (err)
goto rollback_table_setup;
+ ib_dev->cache.ports[port].gid = table;
}
- ib_dev->cache.gid_cache = table;
return 0;
rollback_table_setup:
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
- release_gid_table(table[port]);
+ table);
+ release_gid_table(table);
}
- kfree(table);
return err;
}
static void gid_table_release_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
- release_gid_table(table[port]);
-
- kfree(table);
- ib_dev->cache.gid_cache = NULL;
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+ release_gid_table(table);
+ ib_dev->cache.ports[port].gid = NULL;
+ }
}
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
+ table);
+ }
}
static int gid_table_setup_one(struct ib_device *ib_dev)
@@ -860,12 +842,12 @@ int ib_get_cached_gid(struct ib_device *device,
{
int res;
unsigned long flags;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
- struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+ struct ib_gid_table *table;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ table = device->cache.ports[port_num - rdma_start_port(device)].gid;
read_lock_irqsave(&table->rwlock, flags);
res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
read_unlock_irqrestore(&table->rwlock, flags);
@@ -912,12 +894,12 @@ int ib_get_cached_pkey(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
@@ -930,6 +912,26 @@ int ib_get_cached_pkey(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_pkey);
+int ib_get_cached_subnet_prefix(struct ib_device *device,
+ u8 port_num,
+ u64 *sn_pfx)
+{
+ unsigned long flags;
+ int p;
+
+ if (port_num < rdma_start_port(device) ||
+ port_num > rdma_end_port(device))
+ return -EINVAL;
+
+ p = port_num - rdma_start_port(device);
+ read_lock_irqsave(&device->cache.lock, flags);
+ *sn_pfx = device->cache.ports[p].subnet_prefix;
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
+
int ib_find_cached_pkey(struct ib_device *device,
u8 port_num,
u16 pkey,
@@ -941,12 +943,12 @@ int ib_find_cached_pkey(struct ib_device *device,
int ret = -ENOENT;
int partial_ix = -1;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -981,12 +983,12 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
int i;
int ret = -ENOENT;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -1010,19 +1012,39 @@ int ib_get_cached_lmc(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)];
+ *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);
+int ib_get_cached_port_state(struct ib_device *device,
+ u8 port_num,
+ enum ib_port_state *port_state)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+ *port_state = device->cache.ports[port_num
+ - rdma_start_port(device)].port_state;
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_port_state);
+
static void ib_cache_update(struct ib_device *device,
- u8 port)
+ u8 port,
+ bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
@@ -1033,14 +1055,13 @@ static void ib_cache_update(struct ib_device *device,
int i;
int ret;
struct ib_gid_table *table;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
bool use_roce_gid_table =
rdma_cap_roce_gid_table(device, port);
- if (port < rdma_start_port(device) || port > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port))
return;
- table = ports_table[port - rdma_start_port(device)];
+ table = device->cache.ports[port - rdma_start_port(device)].gid;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
@@ -1092,9 +1113,10 @@ static void ib_cache_update(struct ib_device *device,
write_lock_irq(&device->cache.lock);
- old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
+ old_pkey_cache = device->cache.ports[port -
+ rdma_start_port(device)].pkey;
- device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
+ device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
if (!use_roce_gid_table) {
write_lock(&table->rwlock);
for (i = 0; i < gid_cache->table_len; i++) {
@@ -1104,10 +1126,19 @@ static void ib_cache_update(struct ib_device *device,
write_unlock(&table->rwlock);
}
- device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].port_state =
+ tprops->state;
+ device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
+ tprops->subnet_prefix;
write_unlock_irq(&device->cache.lock);
+ if (enforce_security)
+ ib_security_cache_change(device,
+ port,
+ tprops->subnet_prefix);
+
kfree(gid_cache);
kfree(old_pkey_cache);
kfree(tprops);
@@ -1124,7 +1155,9 @@ static void ib_cache_task(struct work_struct *_work)
struct ib_update_work *work =
container_of(_work, struct ib_update_work, work);
- ib_cache_update(work->device, work->port_num);
+ ib_cache_update(work->device,
+ work->port_num,
+ work->enforce_security);
kfree(work);
}
@@ -1145,6 +1178,12 @@ static void ib_cache_event(struct ib_event_handler *handler,
INIT_WORK(&work->work, ib_cache_task);
work->device = event->device;
work->port_num = event->element.port_num;
+ if (event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_GID_CHANGE)
+ work->enforce_security = true;
+ else
+ work->enforce_security = false;
+
queue_work(ib_wq, &work->work);
}
}
@@ -1157,25 +1196,20 @@ int ib_cache_setup_one(struct ib_device *device)
rwlock_init(&device->cache.lock);
- device->cache.pkey_cache =
- kzalloc(sizeof *device->cache.pkey_cache *
+ device->cache.ports =
+ kzalloc(sizeof(*device->cache.ports) *
(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
- device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
- (rdma_end_port(device) -
- rdma_start_port(device) + 1),
- GFP_KERNEL);
- if (!device->cache.pkey_cache ||
- !device->cache.lmc_cache) {
+ if (!device->cache.ports) {
err = -ENOMEM;
- goto free;
+ goto out;
}
err = gid_table_setup_one(device);
if (err)
- goto free;
+ goto out;
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
- ib_cache_update(device, p + rdma_start_port(device));
+ ib_cache_update(device, p + rdma_start_port(device), true);
INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
device, ib_cache_event);
@@ -1187,9 +1221,7 @@ int ib_cache_setup_one(struct ib_device *device)
err:
gid_table_cleanup_one(device);
-free:
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
+out:
return err;
}
@@ -1203,14 +1235,11 @@ void ib_cache_release_one(struct ib_device *device)
* all the device's resources when the cache could no
* longer be accessed.
*/
- if (device->cache.pkey_cache)
- for (p = 0;
- p <= rdma_end_port(device) - rdma_start_port(device); ++p)
- kfree(device->cache.pkey_cache[p]);
+ for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
+ kfree(device->cache.ports[p].pkey);
gid_table_release_one(device);
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
+ kfree(device->cache.ports);
}
void ib_cache_cleanup_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
new file mode 100644
index 000000000000..126ac5f99db7
--- /dev/null
+++ b/drivers/infiniband/core/cgroup.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 Parav Pandit <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "core_priv.h"
+
+/**
+ * ib_device_register_rdmacg - register with rdma cgroup.
+ * @device: device to register to participate in resource
+ * accounting by rdma cgroup.
+ *
+ * Register with the rdma cgroup. Should be called before
+ * exposing rdma device to user space applications to avoid
+ * resource accounting leak.
+ * Returns 0 on success or otherwise failure code.
+ */
+int ib_device_register_rdmacg(struct ib_device *device)
+{
+ device->cg_device.name = device->name;
+ return rdmacg_register_device(&device->cg_device);
+}
+
+/**
+ * ib_device_unregister_rdmacg - unregister with rdma cgroup.
+ * @device: device to unregister.
+ *
+ * Unregister with the rdma cgroup. Should be called after
+ * all the resources are deallocated, and after a stage when any
+ * other resource allocation by user application cannot be done
+ * for this device to avoid any leak in accounting.
+ */
+void ib_device_unregister_rdmacg(struct ib_device *device)
+{
+ rdmacg_unregister_device(&device->cg_device);
+}
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ return rdmacg_try_charge(&cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_try_charge);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ rdmacg_uncharge(cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_uncharge);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index cf1edfa1cbac..2b4d613a3474 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -228,7 +228,7 @@ struct cm_device {
struct cm_av {
struct cm_port *port;
union ib_gid dgid;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
};
@@ -241,7 +241,7 @@ struct cm_work {
__be32 local_id; /* Established / timewait */
__be32 remote_id;
struct ib_cm_event cm_event;
- struct ib_sa_path_rec path[0];
+ struct sa_path_rec path[0];
};
struct cm_timewait_info {
@@ -343,7 +343,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
ret = -ENODEV;
goto out;
}
- ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+ ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto out;
@@ -355,7 +355,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
ret = PTR_ERR(m);
goto out;
}
@@ -390,7 +390,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
return PTR_ERR(m);
}
m->ah = ah;
@@ -400,7 +400,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
@@ -440,7 +440,7 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
struct cm_device *cm_dev;
@@ -453,7 +453,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- path->gid_type, ndev, &p, NULL)) {
+ sa_conv_pathrec_to_gid_type(path),
+ ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
@@ -1172,8 +1173,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_req_param *param)
{
- struct ib_sa_path_rec *pri_path = param->primary_path;
- struct ib_sa_path_rec *alt_path = param->alternate_path;
+ struct sa_path_rec *pri_path = param->primary_path;
+ struct sa_path_rec *alt_path = param->alternate_path;
cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
@@ -1202,8 +1203,10 @@ static void cm_format_req(struct cm_req_msg *req_msg,
}
if (pri_path->hop_limit <= 1) {
- req_msg->primary_local_lid = pri_path->slid;
- req_msg->primary_remote_lid = pri_path->dlid;
+ req_msg->primary_local_lid =
+ htons(ntohl(sa_path_get_slid(pri_path)));
+ req_msg->primary_remote_lid =
+ htons(ntohl(sa_path_get_dlid(pri_path)));
} else {
/* Work-around until there's a way to obtain remote LID info */
req_msg->primary_local_lid = IB_LID_PERMISSIVE;
@@ -1223,8 +1226,10 @@ static void cm_format_req(struct cm_req_msg *req_msg,
if (alt_path) {
if (alt_path->hop_limit <= 1) {
- req_msg->alt_local_lid = alt_path->slid;
- req_msg->alt_remote_lid = alt_path->dlid;
+ req_msg->alt_local_lid =
+ htons(ntohl(sa_path_get_slid(alt_path)));
+ req_msg->alt_remote_lid =
+ htons(ntohl(sa_path_get_dlid(alt_path)));
} else {
req_msg->alt_local_lid = IB_LID_PERMISSIVE;
req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
@@ -1401,14 +1406,15 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
}
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
- struct ib_sa_path_rec *primary_path,
- struct ib_sa_path_rec *alt_path)
+ struct sa_path_rec *primary_path,
+ struct sa_path_rec *alt_path)
{
- memset(primary_path, 0, sizeof *primary_path);
primary_path->dgid = req_msg->primary_local_gid;
primary_path->sgid = req_msg->primary_remote_gid;
- primary_path->dlid = req_msg->primary_local_lid;
- primary_path->slid = req_msg->primary_remote_lid;
+ sa_path_set_dlid(primary_path,
+ htonl(ntohs(req_msg->primary_local_lid)));
+ sa_path_set_slid(primary_path,
+ htonl(ntohs(req_msg->primary_remote_lid)));
primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
primary_path->hop_limit = req_msg->primary_hop_limit;
primary_path->traffic_class = req_msg->primary_traffic_class;
@@ -1426,11 +1432,12 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
primary_path->service_id = req_msg->service_id;
if (req_msg->alt_local_lid) {
- memset(alt_path, 0, sizeof *alt_path);
alt_path->dgid = req_msg->alt_local_gid;
alt_path->sgid = req_msg->alt_remote_gid;
- alt_path->dlid = req_msg->alt_local_lid;
- alt_path->slid = req_msg->alt_remote_lid;
+ sa_path_set_dlid(alt_path,
+ htonl(ntohs(req_msg->alt_local_lid)));
+ sa_path_set_slid(alt_path,
+ htonl(ntohs(req_msg->alt_remote_lid)));
alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
alt_path->hop_limit = req_msg->alt_hop_limit;
alt_path->traffic_class = req_msg->alt_traffic_class;
@@ -1722,6 +1729,7 @@ static int cm_req_handler(struct cm_work *work)
struct cm_req_msg *req_msg;
union ib_gid gid;
struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1758,21 +1766,34 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv->id.service_mask = ~cpu_to_be64(0);
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
- cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
- memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+ memset(&work->path[0], 0, sizeof(work->path[0]));
+ memset(&work->path[1], 0, sizeof(work->path[1]));
+ grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
work->port->port_num,
- cm_id_priv->av.ah_attr.grh.sgid_index,
+ grh->sgid_index,
&gid, &gid_attr);
if (!ret) {
if (gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
+ } else {
+ work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
}
- work->path[0].gid_type = gid_attr.gid_type;
+ if (req_msg->alt_local_lid)
+ work->path[1].rec_type = work->path[0].rec_type;
+ cm_format_paths_from_req(req_msg, &work->path[0],
+ &work->path[1]);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ sa_path_set_dmac(&work->path[0],
+ cm_id_priv->av.ah_attr.roce.dmac);
+ work->path[0].hop_limit = grh->hop_limit;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
cm_id_priv);
}
@@ -1782,11 +1803,18 @@ static int cm_req_handler(struct cm_work *work)
&work->path[0].sgid,
&gid_attr);
if (!err && gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
+ } else {
+ work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
}
- work->path[0].gid_type = gid_attr.gid_type;
+ if (req_msg->alt_local_lid)
+ work->path[1].rec_type = work->path[0].rec_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
@@ -2811,7 +2839,7 @@ out:
static void cm_format_lap(struct cm_lap_msg *lap_msg,
struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *alternate_path,
+ struct sa_path_rec *alternate_path,
const void *private_data,
u8 private_data_len)
{
@@ -2822,8 +2850,10 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
/* todo: need remote CM response timeout */
cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
- lap_msg->alt_local_lid = alternate_path->slid;
- lap_msg->alt_remote_lid = alternate_path->dlid;
+ lap_msg->alt_local_lid =
+ htons(ntohl(sa_path_get_slid(alternate_path)));
+ lap_msg->alt_remote_lid =
+ htons(ntohl(sa_path_get_dlid(alternate_path)));
lap_msg->alt_local_gid = alternate_path->sgid;
lap_msg->alt_remote_gid = alternate_path->dgid;
cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
@@ -2841,7 +2871,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
}
int ib_send_cm_lap(struct ib_cm_id *cm_id,
- struct ib_sa_path_rec *alternate_path,
+ struct sa_path_rec *alternate_path,
const void *private_data,
u8 private_data_len)
{
@@ -2895,14 +2925,15 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
EXPORT_SYMBOL(ib_send_cm_lap);
static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *path,
+ struct sa_path_rec *path,
struct cm_lap_msg *lap_msg)
{
memset(path, 0, sizeof *path);
+ path->rec_type = SA_PATH_REC_TYPE_IB;
path->dgid = lap_msg->alt_local_gid;
path->sgid = lap_msg->alt_remote_gid;
- path->dlid = lap_msg->alt_local_lid;
- path->slid = lap_msg->alt_remote_lid;
+ sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid)));
+ sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid)));
path->flow_label = cm_lap_get_flow_label(lap_msg);
path->hop_limit = lap_msg->alt_hop_limit;
path->traffic_class = cm_lap_get_traffic_class(lap_msg);
@@ -3409,6 +3440,8 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
goto discard;
+ pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
+ state, ib_wc_status_msg(wc_status));
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
@@ -3706,7 +3739,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
atomic_long_inc(&port->counter_group[CM_RECV].
counter[attr_id - CM_ATTR_ID_OFFSET]);
- work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
+ work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths,
GFP_KERNEL);
if (!work) {
ib_free_recv_mad(mad_recv_wc);
@@ -3798,7 +3831,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
cm_id_priv->responder_resources;
qp_attr->min_rnr_timer = 0;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
@@ -3852,7 +3885,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
default:
break;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
qp_attr->path_mig_state = IB_MIG_REARM;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 3e70a9c5d79d..0eb393237ba2 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -198,6 +198,7 @@ struct cma_device {
atomic_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
+ u8 *default_roce_tos;
};
struct rdma_bind_list {
@@ -269,8 +270,7 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
int cma_get_default_gid_type(struct cma_device *cma_dev,
unsigned int port)
{
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
@@ -282,8 +282,7 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
{
unsigned long supported_gids;
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
@@ -297,6 +296,25 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
return 0;
}
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
+ u8 default_roce_tos)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] =
+ default_roce_tos;
+
+ return 0;
+}
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
{
return cma_dev->device;
@@ -343,6 +361,7 @@ struct rdma_id_private {
u32 options;
u8 srq;
u8 tos;
+ bool tos_set;
u8 reuseaddr;
u8 afonly;
enum ib_gid_type gid_type;
@@ -604,22 +623,11 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port))
ndev = dev_get_by_index(&init_net, bound_if_index);
- if (ndev && ndev->flags & IFF_LOOPBACK) {
- pr_info("detected loopback device\n");
- dev_put(ndev);
-
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- ndev = device->get_netdev(device, port);
- if (!ndev)
- return -ENODEV;
- }
- } else {
+ else
gid_type = IB_GID_TYPE_IB;
- }
+
ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, NULL);
@@ -709,6 +717,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
union ib_gid gid, sgid, *dgid;
u16 pkey, index;
u8 p;
+ enum ib_port_state port_state;
int i;
cma_dev = NULL;
@@ -724,6 +733,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
continue;
+ if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
+ continue;
for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
&gid, NULL);
i++) {
@@ -735,7 +746,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
}
if (!cma_dev && (gid.global.subnet_prefix ==
- dgid->global.subnet_prefix)) {
+ dgid->global.subnet_prefix) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
sgid = gid;
id_priv->id.port_num = p;
@@ -778,6 +790,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
+ id_priv->tos_set = false;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
@@ -905,7 +918,8 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
goto out;
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
- qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
+ rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index,
+ &sgid, NULL);
if (ret)
goto out;
@@ -1019,6 +1033,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
} else
ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
qp_attr_mask);
+ qp_attr->port_num = id_priv->id.port_num;
+ *qp_attr_mask |= IB_QP_PORT;
} else
ret = -ENOSYS;
@@ -1103,7 +1119,7 @@ static inline int cma_any_port(struct sockaddr *addr)
static void cma_save_ib_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
struct rdma_cm_id *listen_id,
- struct ib_sa_path_rec *path)
+ struct sa_path_rec *path)
{
struct sockaddr_ib *listen_ib, *ib;
@@ -1249,7 +1265,7 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
memcpy(&req->local_gid, &req_param->primary_path->sgid,
sizeof(req->local_gid));
req->has_gid = true;
- req->service_id = req_param->primary_path->service_id;
+ req->service_id = req_param->primary_path->service_id;
req->pkey = be16_to_cpu(req_param->primary_path->pkey);
if (req->pkey != req_param->bth_pkey)
pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
@@ -1689,6 +1705,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
return 0;
reject:
+ pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
@@ -1730,6 +1747,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
+ if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+ (id_priv->id.qp_type != IB_QPT_UD))
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1760,6 +1780,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
/* ignore event */
goto out;
case IB_CM_REJ_RECEIVED:
+ pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id,
+ ib_event->param.rej_rcvd.reason));
cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED;
@@ -1794,8 +1816,9 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct rdma_cm_id *id;
struct rdma_route *rt;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
+ struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
const __be64 service_id =
- ib_event->param.req_rcvd.primary_path->service_id;
+ ib_event->param.req_rcvd.primary_path->service_id;
int ret;
id = rdma_create_id(listen_id->route.addr.dev_addr.net,
@@ -1817,7 +1840,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (!rt->path_rec)
goto err;
- rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
+ rt->path_rec[0] = *path;
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
@@ -2266,10 +2289,11 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->tos = (u8) tos;
+ id_priv->tos_set = true;
}
EXPORT_SYMBOL(rdma_set_service_type);
-static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
+static void cma_query_handler(int status, struct sa_path_rec *path_rec,
void *context)
{
struct cma_work *work = context;
@@ -2285,6 +2309,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
}
queue_work(cma_wq, &work->work);
@@ -2294,18 +2320,24 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct cma_work *work)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct ib_sa_path_rec path_rec;
+ struct sa_path_rec path_rec;
ib_sa_comp_mask comp_mask;
struct sockaddr_in6 *sin6;
struct sockaddr_ib *sib;
memset(&path_rec, 0, sizeof path_rec);
+
+ if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num))
+ path_rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path_rec.rec_type = SA_PATH_REC_TYPE_IB;
rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
- path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
+ path_rec.service_id = rdma_get_service_id(&id_priv->id,
+ cma_dst_addr(id_priv));
comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
@@ -2419,7 +2451,7 @@ err1:
}
int rdma_set_ib_paths(struct rdma_cm_id *id,
- struct ib_sa_path_rec *path_rec, int num_paths)
+ struct sa_path_rec *path_rec, int num_paths)
{
struct rdma_id_private *id_priv;
int ret;
@@ -2467,14 +2499,12 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
struct net_device *dev;
prio = rt_tos2priority(tos);
- dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
- vlan_dev_real_dev(ndev) : ndev;
-
+ dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
#if IS_ENABLED(CONFIG_VLAN_8021Q)
- if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ if (is_vlan_dev(ndev))
return (vlan_dev_get_egress_qos_mask(ndev, prio) &
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
#endif
@@ -2500,6 +2530,10 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
work = kzalloc(sizeof *work, GFP_KERNEL);
@@ -2526,36 +2560,22 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
- if (ndev->flags & IFF_LOOPBACK) {
- dev_put(ndev);
- if (!id_priv->id.device->get_netdev) {
- ret = -EOPNOTSUPP;
- goto err2;
- }
-
- ndev = id_priv->id.device->get_netdev(id_priv->id.device,
- id_priv->id.port_num);
- if (!ndev) {
- ret = -ENODEV;
- goto err2;
- }
- }
-
- route->path_rec->net = &init_net;
- route->path_rec->ifindex = ndev->ifindex;
supported_gids = roce_gid_type_mask_support(id_priv->id.device,
id_priv->id.port_num);
- route->path_rec->gid_type =
- cma_route_gid_type(addr->dev_addr.network,
- supported_gids,
- id_priv->gid_type);
+ gid_type = cma_route_gid_type(addr->dev_addr.network,
+ supported_gids,
+ id_priv->gid_type);
+ route->path_rec->rec_type =
+ sa_conv_gid_to_pathrec_type(gid_type);
+ sa_path_set_ndev(route->path_rec, &init_net);
+ sa_path_set_ifindex(route->path_rec, ndev->ifindex);
}
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
+ sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
@@ -2563,8 +2583,10 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
&route->path_rec->dgid);
/* Use the hint from IP Stack to select GID Type */
- if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
- route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+
if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
/* TODO: get the hoplimit from the inet/inet6 device */
route->path_rec->hop_limit = addr->dev_addr.hoplimit;
@@ -2573,7 +2595,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
- route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
+ route->path_rec->sl = iboe_tos_to_sl(ndev, tos);
+ route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
@@ -2652,8 +2675,8 @@ static void cma_set_loopback(struct sockaddr *addr)
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev, *cur_dev;
- struct ib_port_attr port_attr;
union ib_gid gid;
+ enum ib_port_state port_state;
u16 pkey;
int ret;
u8 p;
@@ -2669,8 +2692,8 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
cma_dev = cur_dev;
for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
- if (!ib_query_port(cur_dev->device, p, &port_attr) &&
- port_attr.state == IB_PORT_ACTIVE) {
+ if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
goto port_found;
}
@@ -2720,8 +2743,14 @@ static void addr_handler(int status, struct sockaddr *src_addr,
goto out;
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
- if (!status && !id_priv->cma_dev)
+ if (!status && !id_priv->cma_dev) {
status = cma_acquire_dev(id_priv, NULL);
+ if (status)
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
+ status);
+ } else {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
+ }
if (status) {
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2833,20 +2862,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret)
+ if (ret) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return ret;
+ }
}
- if (cma_family(id_priv) != dst_addr->sa_family)
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
atomic_inc(&id_priv->refcount);
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
} else {
@@ -2962,6 +2997,43 @@ err:
return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
}
+static int cma_port_is_unique(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv)
+{
+ struct rdma_id_private *cur_id;
+ struct sockaddr *daddr = cma_dst_addr(id_priv);
+ struct sockaddr *saddr = cma_src_addr(id_priv);
+ __be16 dport = cma_port(daddr);
+
+ hlist_for_each_entry(cur_id, &bind_list->owners, node) {
+ struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
+ struct sockaddr *cur_saddr = cma_src_addr(cur_id);
+ __be16 cur_dport = cma_port(cur_daddr);
+
+ if (id_priv == cur_id)
+ continue;
+
+ /* different dest port -> unique */
+ if (!cma_any_port(cur_daddr) &&
+ (dport != cur_dport))
+ continue;
+
+ /* different src address -> unique */
+ if (!cma_any_addr(saddr) &&
+ !cma_any_addr(cur_saddr) &&
+ cma_addr_cmp(saddr, cur_saddr))
+ continue;
+
+ /* different dst address -> unique */
+ if (!cma_any_addr(cur_daddr) &&
+ cma_addr_cmp(daddr, cur_daddr))
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ return 0;
+}
+
static int cma_alloc_any_port(enum rdma_port_space ps,
struct rdma_id_private *id_priv)
{
@@ -2974,9 +3046,19 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
remaining = (high - low) + 1;
rover = prandom_u32() % remaining + low;
retry:
- if (last_used_port != rover &&
- !cma_ps_find(net, ps, (unsigned short)rover)) {
- int ret = cma_alloc_port(ps, id_priv, rover);
+ if (last_used_port != rover) {
+ struct rdma_bind_list *bind_list;
+ int ret;
+
+ bind_list = cma_ps_find(net, ps, (unsigned short)rover);
+
+ if (!bind_list) {
+ ret = cma_alloc_port(ps, id_priv, rover);
+ } else {
+ ret = cma_port_is_unique(bind_list, id_priv);
+ if (!ret)
+ cma_bind_port(bind_list, id_priv);
+ }
/*
* Remember previously used port number in order to avoid
* re-using same port immediately after it is closed.
@@ -3205,6 +3287,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
int ret;
+ struct sockaddr *daddr;
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
addr->sa_family != AF_IB)
@@ -3244,6 +3327,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err2;
+ daddr = cma_dst_addr(id_priv);
+ daddr->sa_family = addr->sa_family;
+
return 0;
err2:
if (id_priv->cma_dev)
@@ -3308,10 +3394,13 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (rep->status != IB_SIDR_SUCCESS) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = ib_event->param.sidr_rep_rcvd.status;
+ pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n",
+ event.status);
break;
}
ret = cma_set_qkey(id_priv, rep->qkey);
if (ret) {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret);
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = ret;
break;
@@ -3583,6 +3672,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
int ret;
+ if (!conn_param)
+ return -EINVAL;
+
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
return ret;
@@ -3760,10 +3852,17 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
if (!status)
status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+ else
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
+ status);
mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp)
+ if (!status && id_priv->id.qp) {
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
be16_to_cpu(multicast->rec.mlid));
+ if (status)
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
+ status);
+ }
mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
@@ -3833,63 +3932,10 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
}
}
-static void cma_query_sa_classport_info_cb(int status,
- struct ib_class_port_info *rec,
- void *context)
-{
- struct class_port_info_context *cb_ctx = context;
-
- WARN_ON(!context);
-
- if (status || !rec) {
- pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
- cb_ctx->device->name, cb_ctx->port_num, status);
- goto out;
- }
-
- memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
-
-out:
- complete(&cb_ctx->done);
-}
-
-static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
- struct ib_class_port_info *class_port_info)
-{
- struct class_port_info_context *cb_ctx;
- int ret;
-
- cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
- if (!cb_ctx)
- return -ENOMEM;
-
- cb_ctx->device = device;
- cb_ctx->class_port_info = class_port_info;
- cb_ctx->port_num = port_num;
- init_completion(&cb_ctx->done);
-
- ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
- CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
- GFP_KERNEL, cma_query_sa_classport_info_cb,
- cb_ctx, &cb_ctx->sa_query);
- if (ret < 0) {
- pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
- device->name, port_num, ret);
- goto out;
- }
-
- wait_for_completion(&cb_ctx->done);
-
-out:
- kfree(cb_ctx);
- return ret;
-}
-
static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct ib_sa_mcmember_rec rec;
- struct ib_class_port_info class_port_info;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
int ret;
@@ -3910,21 +3956,14 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
- ret = cma_query_sa_classport_info(id_priv->id.device,
- id_priv->id.port_num,
- &class_port_info);
-
- if (ret)
- return ret;
-
- if (!(ib_get_cpi_capmask2(&class_port_info) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
- pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
- "RDMA CM: SM doesn't support Send Only Full Member option\n",
- id_priv->id.device->name, id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
+ if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
+ (!ib_sa_sendonly_fullmem_support(&sa_client,
+ id_priv->id.device,
+ id_priv->id.port_num))) {
+ pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
+ "RDMA CM: SM doesn't support Send Only Full Member option\n",
+ id_priv->id.device->name, id_priv->id.port_num);
+ return -EOPNOTSUPP;
}
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
@@ -4229,15 +4268,21 @@ static void cma_add_one(struct ib_device *device)
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type) {
- kfree(cma_dev);
- return;
- }
+ if (!cma_dev->default_gid_type)
+ goto free_cma_dev;
+
+ cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_roce_tos),
+ GFP_KERNEL);
+ if (!cma_dev->default_roce_tos)
+ goto free_gid_type;
+
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
cma_dev->default_gid_type[i - rdma_start_port(device)] =
find_first_bit(&supported_gids, BITS_PER_LONG);
+ cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
}
init_completion(&cma_dev->comp);
@@ -4250,6 +4295,16 @@ static void cma_add_one(struct ib_device *device)
list_for_each_entry(id_priv, &listen_any_list, list)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
+
+ return;
+
+free_gid_type:
+ kfree(cma_dev->default_gid_type);
+
+free_cma_dev:
+ kfree(cma_dev);
+
+ return;
}
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
@@ -4318,6 +4373,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
mutex_unlock(&lock);
cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 41573df1d9fc..54076a3e8007 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -139,8 +139,50 @@ static ssize_t default_roce_mode_store(struct config_item *item,
CONFIGFS_ATTR(, default_roce_mode);
+static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ tos = cma_get_default_roce_tos(cma_dev, group->port_num);
+ cma_configfs_params_put(cma_dev);
+
+ return sprintf(buf, "%u\n", tos);
+}
+
+static ssize_t default_roce_tos_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = kstrtou8(buf, 0, &tos);
+ if (ret)
+ return ret;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ ret = cma_set_default_roce_tos(cma_dev, group->port_num, tos);
+ cma_configfs_params_put(cma_dev);
+
+ return ret ? ret : strnlen(buf, count);
+}
+
+CONFIGFS_ATTR(, default_roce_tos);
+
static struct configfs_attribute *cma_configfs_attributes[] = {
&attr_default_roce_mode,
+ &attr_default_roce_tos,
NULL,
};
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index d29372624f3a..11ae67514e13 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -35,8 +35,19 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/cgroup_rdma.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include "mad_priv.h"
+
+struct pkey_index_qp_list {
+ struct list_head pkey_index_list;
+ u16 pkey_index;
+ /* Lock to hold while iterating the qp_list. */
+ spinlock_t qp_list_lock;
+ struct list_head qp_list;
+};
#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
int cma_configfs_init(void);
@@ -62,6 +73,9 @@ int cma_get_default_gid_type(struct cma_device *cma_dev,
int cma_set_default_gid_type(struct cma_device *cma_dev,
unsigned int port,
enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
+int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
+ u8 default_roce_tos);
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
int ib_device_register_sysfs(struct ib_device *device,
@@ -121,6 +135,35 @@ int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+#ifdef CONFIG_CGROUP_RDMA
+int ib_device_register_rdmacg(struct ib_device *device);
+void ib_device_unregister_rdmacg(struct ib_device *device);
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+#else
+static inline int ib_device_register_rdmacg(struct ib_device *device)
+{ return 0; }
+
+static inline void ib_device_unregister_rdmacg(struct ib_device *device)
+{ }
+
+static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ return 0; }
+
+static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ }
+#endif
+
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
struct net_device *upper)
{
@@ -136,6 +179,16 @@ void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
+int ibnl_init(void);
+void ibnl_cleanup(void);
+
+/**
+ * Check if there are any listeners to the netlink group
+ * @group: the netlink group ID
+ * Returns 0 on success or a negative for no listeners.
+ */
+int ibnl_chk_listeners(unsigned int group);
+
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb);
int ib_nl_handle_set_timeout(struct sk_buff *skb,
@@ -143,4 +196,109 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct netlink_callback *cb);
+int ib_get_cached_subnet_prefix(struct ib_device *device,
+ u8 port_num,
+ u64 *sn_pfx);
+
+#ifdef CONFIG_SECURITY_INFINIBAND
+int ib_security_pkey_access(struct ib_device *dev,
+ u8 port_num,
+ u16 pkey_index,
+ void *sec);
+
+void ib_security_destroy_port_pkey_list(struct ib_device *device);
+
+void ib_security_cache_change(struct ib_device *device,
+ u8 port_num,
+ u64 subnet_prefix);
+
+int ib_security_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata);
+
+int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev);
+void ib_destroy_qp_security_begin(struct ib_qp_security *sec);
+void ib_destroy_qp_security_abort(struct ib_qp_security *sec);
+void ib_destroy_qp_security_end(struct ib_qp_security *sec);
+int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev);
+void ib_close_shared_qp_security(struct ib_qp_security *sec);
+int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
+ enum ib_qp_type qp_type);
+void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
+int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
+#else
+static inline int ib_security_pkey_access(struct ib_device *dev,
+ u8 port_num,
+ u16 pkey_index,
+ void *sec)
+{
+ return 0;
+}
+
+static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
+{
+}
+
+static inline void ib_security_cache_change(struct ib_device *device,
+ u8 port_num,
+ u64 subnet_prefix)
+{
+}
+
+static inline int ib_security_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata)
+{
+ return qp->device->modify_qp(qp->real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
+}
+
+static inline int ib_create_qp_security(struct ib_qp *qp,
+ struct ib_device *dev)
+{
+ return 0;
+}
+
+static inline void ib_destroy_qp_security_begin(struct ib_qp_security *sec)
+{
+}
+
+static inline void ib_destroy_qp_security_abort(struct ib_qp_security *sec)
+{
+}
+
+static inline void ib_destroy_qp_security_end(struct ib_qp_security *sec)
+{
+}
+
+static inline int ib_open_shared_qp_security(struct ib_qp *qp,
+ struct ib_device *dev)
+{
+ return 0;
+}
+
+static inline void ib_close_shared_qp_security(struct ib_qp_security *sec)
+{
+}
+
+static inline int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
+ enum ib_qp_type qp_type)
+{
+ return 0;
+}
+
+static inline void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
+{
+}
+
+static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
+ u16 pkey_index)
+{
+ return 0;
+}
+#endif
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a754fc727de5..f2ae75fa3128 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -29,7 +29,13 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
{
int i, n, completed = 0;
- while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+ /*
+ * budget might be (-1) if the caller does not
+ * want to bound this call, thus we need unsigned
+ * minimum here.
+ */
+ while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
+ budget - completed), cq->wc)) > 0) {
for (i = 0; i < n; i++) {
struct ib_wc *wc = &cq->wc[i];
@@ -58,8 +64,8 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
* %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
* context and does not ask for completion interrupts from the HCA.
*
- * Note: for compatibility reasons -1 can be passed in %budget for unlimited
- * polling. Do not use this feature in new code, it will be removed soon.
+ * Note: do not pass -1 as %budget unless it is guaranteed that the number
+ * of completions that will be processed is small.
*/
int ib_process_cq_direct(struct ib_cq *cq, int budget)
{
@@ -120,7 +126,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
- * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
@@ -196,7 +202,7 @@ void ib_free_cq(struct ib_cq *cq)
irq_poll_disable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
- flush_work(&cq->work);
+ cancel_work_sync(&cq->work);
break;
default:
WARN_ON_ONCE(1);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 571974cd3919..a5dfab6adf49 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -39,6 +39,8 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
+#include <linux/security.h>
+#include <linux/notifier.h>
#include <rdma/rdma_netlink.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
@@ -82,6 +84,14 @@ static LIST_HEAD(client_list);
static DEFINE_MUTEX(device_mutex);
static DECLARE_RWSEM(lists_rwsem);
+static int ib_security_change(struct notifier_block *nb, unsigned long event,
+ void *lsm_data);
+static void ib_policy_change_task(struct work_struct *work);
+static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task);
+
+static struct notifier_block ibdev_lsm_nb = {
+ .notifier_call = ib_security_change,
+};
static int ib_device_check_mandatory(struct ib_device *device)
{
@@ -172,8 +182,16 @@ static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- ib_cache_release_one(dev);
- kfree(dev->port_immutable);
+ WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
+ if (dev->reg_state == IB_DEV_UNREGISTERED) {
+ /*
+ * In IB_DEV_UNINITIALIZED state, cache or port table
+ * is not even created. Free cache and port table only when
+ * device reaches UNREGISTERED state.
+ */
+ ib_cache_release_one(dev);
+ kfree(dev->port_immutable);
+ }
kfree(dev);
}
@@ -317,6 +335,65 @@ void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len)
}
EXPORT_SYMBOL(ib_get_device_fw_str);
+static int setup_port_pkey_list(struct ib_device *device)
+{
+ int i;
+
+ /**
+ * device->port_pkey_list is indexed directly by the port number,
+ * Therefore it is declared as a 1 based array with potential empty
+ * slots at the beginning.
+ */
+ device->port_pkey_list = kcalloc(rdma_end_port(device) + 1,
+ sizeof(*device->port_pkey_list),
+ GFP_KERNEL);
+
+ if (!device->port_pkey_list)
+ return -ENOMEM;
+
+ for (i = 0; i < (rdma_end_port(device) + 1); i++) {
+ spin_lock_init(&device->port_pkey_list[i].list_lock);
+ INIT_LIST_HEAD(&device->port_pkey_list[i].pkey_list);
+ }
+
+ return 0;
+}
+
+static void ib_policy_change_task(struct work_struct *work)
+{
+ struct ib_device *dev;
+
+ down_read(&lists_rwsem);
+ list_for_each_entry(dev, &device_list, core_list) {
+ int i;
+
+ for (i = rdma_start_port(dev); i <= rdma_end_port(dev); i++) {
+ u64 sp;
+ int ret = ib_get_cached_subnet_prefix(dev,
+ i,
+ &sp);
+
+ WARN_ONCE(ret,
+ "ib_get_cached_subnet_prefix err: %d, this should never happen here\n",
+ ret);
+ if (!ret)
+ ib_security_cache_change(dev, i, sp);
+ }
+ }
+ up_read(&lists_rwsem);
+}
+
+static int ib_security_change(struct notifier_block *nb, unsigned long event,
+ void *lsm_data)
+{
+ if (event != LSM_POLICY_CHANGE)
+ return NOTIFY_DONE;
+
+ schedule_work(&ib_policy_change_work);
+
+ return NOTIFY_OK;
+}
+
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
@@ -333,6 +410,29 @@ int ib_register_device(struct ib_device *device,
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+ struct device *parent = device->dev.parent;
+
+ WARN_ON_ONCE(!parent);
+ WARN_ON_ONCE(device->dma_device);
+ if (device->dev.dma_ops) {
+ /*
+ * The caller provided custom DMA operations. Copy the
+ * DMA-related fields that are used by e.g. dma_alloc_coherent()
+ * into device->dev.
+ */
+ device->dma_device = &device->dev;
+ if (!device->dev.dma_mask)
+ device->dev.dma_mask = parent->dma_mask;
+ if (!device->dev.coherent_dma_mask)
+ device->dev.coherent_dma_mask =
+ parent->coherent_dma_mask;
+ } else {
+ /*
+ * The caller did not provide custom DMA operations. Use the
+ * DMA mapping operations of the parent device.
+ */
+ device->dma_device = parent;
+ }
mutex_lock(&device_mutex);
@@ -354,26 +454,36 @@ int ib_register_device(struct ib_device *device,
goto out;
}
+ ret = setup_port_pkey_list(device);
+ if (ret) {
+ pr_warn("Couldn't create per port_pkey_list\n");
+ goto out;
+ }
+
ret = ib_cache_setup_one(device);
if (ret) {
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
- goto out;
+ goto port_cleanup;
+ }
+
+ ret = ib_device_register_rdmacg(device);
+ if (ret) {
+ pr_warn("Couldn't register device with rdma cgroup\n");
+ goto cache_cleanup;
}
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
device->reg_state = IB_DEV_REGISTERED;
@@ -385,6 +495,14 @@ int ib_register_device(struct ib_device *device,
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
+ mutex_unlock(&device_mutex);
+ return 0;
+
+cache_cleanup:
+ ib_cache_cleanup_one(device);
+ ib_cache_release_one(device);
+port_cleanup:
+ kfree(device->port_immutable);
out:
mutex_unlock(&device_mutex);
return ret;
@@ -421,9 +539,13 @@ void ib_unregister_device(struct ib_device *device)
mutex_unlock(&device_mutex);
+ ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);
ib_cache_cleanup_one(device);
+ ib_security_destroy_port_pkey_list(device);
+ kfree(device->port_pkey_list);
+
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
@@ -659,7 +781,7 @@ int ib_query_port(struct ib_device *device,
union ib_gid gid;
int err;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
memset(port_attr, 0, sizeof(*port_attr));
@@ -825,7 +947,7 @@ int ib_modify_port(struct ib_device *device,
if (!device->modify_port)
return -ENOSYS;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
@@ -996,8 +1118,7 @@ static int __init ib_core_init(void)
return -ENOMEM;
ib_comp_wq = alloc_workqueue("ib-comp-wq",
- WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
- WQ_UNBOUND_MAX_ACTIVE);
+ WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
if (!ib_comp_wq) {
ret = -ENOMEM;
goto err;
@@ -1039,10 +1160,18 @@ static int __init ib_core_init(void)
goto err_sa;
}
+ ret = register_lsm_notifier(&ibdev_lsm_nb);
+ if (ret) {
+ pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
+ goto err_ibnl_clients;
+ }
+
ib_cache_setup();
return 0;
+err_ibnl_clients:
+ ib_remove_ibnl_clients();
err_sa:
ib_sa_cleanup();
err_mad:
@@ -1062,6 +1191,7 @@ err:
static void __exit ib_core_cleanup(void)
{
+ unregister_lsm_notifier(&ibdev_lsm_nb);
ib_cache_cleanup();
ib_remove_ibnl_clients();
ib_sa_cleanup();
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index cdfad5f26212..84d2615b5d4b 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -96,7 +96,8 @@ struct ib_fmr_pool {
void * arg);
void *flush_arg;
- struct task_struct *thread;
+ struct kthread_worker *worker;
+ struct kthread_work work;
atomic_t req_ser;
atomic_t flush_ser;
@@ -174,29 +175,19 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
}
-static int ib_fmr_cleanup_thread(void *pool_ptr)
+static void ib_fmr_cleanup_func(struct kthread_work *work)
{
- struct ib_fmr_pool *pool = pool_ptr;
+ struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work);
- do {
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
- ib_fmr_batch_release(pool);
-
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
- }
+ ib_fmr_batch_release(pool);
+ atomic_inc(&pool->flush_ser);
+ wake_up_interruptible(&pool->force_wait);
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
- !kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- } while (!kthread_should_stop());
+ if (pool->flush_function)
+ pool->flush_function(pool, pool->flush_arg);
- return 0;
+ if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0)
+ kthread_queue_work(pool->worker, &pool->work);
}
/**
@@ -265,15 +256,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
- pool->thread = kthread_run(ib_fmr_cleanup_thread,
- pool,
- "ib_fmr(%s)",
- device->name);
- if (IS_ERR(pool->thread)) {
- pr_warn(PFX "couldn't start cleanup thread\n");
- ret = PTR_ERR(pool->thread);
+ pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name);
+ if (IS_ERR(pool->worker)) {
+ pr_warn(PFX "couldn't start cleanup kthread worker\n");
+ ret = PTR_ERR(pool->worker);
goto out_free_pool;
}
+ kthread_init_work(&pool->work, ib_fmr_cleanup_func);
{
struct ib_pool_fmr *fmr;
@@ -338,7 +327,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
LIST_HEAD(fmr_list);
int i;
- kthread_stop(pool->thread);
+ kthread_destroy_worker(pool->worker);
ib_fmr_batch_release(pool);
i = 0;
@@ -388,7 +377,7 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
serial = atomic_inc_return(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
if (wait_event_interruptible(pool->force_wait,
atomic_read(&pool->flush_ser) - serial >= 0))
@@ -502,7 +491,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
list_add_tail(&fmr->list, &pool->dirty_list);
if (++pool->dirty_len >= pool->dirty_watermark) {
atomic_inc(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
}
}
}
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 3ef51a96bbf1..f13870e69ccd 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -472,12 +472,14 @@ int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
int ret;
const char *err_str = "";
- ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy);
+ ret = nlmsg_validate(cb->nlh, nlh_len, policy_max - 1, nlmsg_policy,
+ NULL);
if (ret) {
err_str = "Invalid attribute";
goto parse_nlmsg_error;
}
- ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy);
+ ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max - 1,
+ nlmsg_policy, NULL);
if (ret) {
err_str = "Unable to parse the nlmsg";
goto parse_nlmsg_error;
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a009f7132c73..f8f53bb90837 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -40,9 +40,11 @@
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/security.h>
#include <rdma/ib_cache.h>
#include "mad_priv.h"
+#include "core_priv.h"
#include "mad_rmpp.h"
#include "smi.h"
#include "opa_smi.h"
@@ -316,7 +318,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid port %d\n",
+ port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
}
@@ -367,6 +371,12 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
atomic_set(&mad_agent_priv->refcount, 1);
init_completion(&mad_agent_priv->comp);
+ ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
+ if (ret2) {
+ ret = ERR_PTR(ret2);
+ goto error4;
+ }
+
spin_lock_irqsave(&port_priv->reg_lock, flags);
mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
@@ -384,7 +394,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (method) {
if (method_in_use(&method,
mad_reg_req))
- goto error4;
+ goto error5;
}
}
ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
@@ -400,14 +410,14 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (is_vendor_method_in_use(
vendor_class,
mad_reg_req))
- goto error4;
+ goto error5;
}
}
ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
}
if (ret2) {
ret = ERR_PTR(ret2);
- goto error4;
+ goto error5;
}
}
@@ -416,9 +426,10 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
return &mad_agent_priv->agent;
-
-error4:
+error5:
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
+error4:
kfree(reg_req);
error3:
kfree(mad_agent_priv);
@@ -489,6 +500,7 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
struct ib_mad_agent *ret;
struct ib_mad_snoop_private *mad_snoop_priv;
int qpn;
+ int err;
/* Validate parameters */
if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
@@ -523,17 +535,25 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
mad_snoop_priv->agent.port_num = port_num;
mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
init_completion(&mad_snoop_priv->comp);
+
+ err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto error2;
+ }
+
mad_snoop_priv->snoop_index = register_snoop_agent(
&port_priv->qp_info[qpn],
mad_snoop_priv);
if (mad_snoop_priv->snoop_index < 0) {
ret = ERR_PTR(mad_snoop_priv->snoop_index);
- goto error2;
+ goto error3;
}
atomic_set(&mad_snoop_priv->refcount, 1);
return &mad_snoop_priv->agent;
-
+error3:
+ ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
error2:
kfree(mad_snoop_priv);
error1:
@@ -579,6 +599,8 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
deref_mad_agent(mad_agent_priv);
wait_for_completion(&mad_agent_priv->comp);
+ ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
+
kfree(mad_agent_priv->reg_req);
kfree(mad_agent_priv);
}
@@ -597,13 +619,15 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
deref_snoop_agent(mad_snoop_priv);
wait_for_completion(&mad_snoop_priv->comp);
+ ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
+
kfree(mad_snoop_priv);
}
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*/
-int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
+void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_snoop_private *mad_snoop_priv;
@@ -620,7 +644,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
agent);
unregister_mad_snoop(mad_snoop_priv);
}
- return 0;
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -1214,12 +1237,16 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
/* Walk list of send WRs and post each on send list */
for (; send_buf; send_buf = next_send_buf) {
-
mad_send_wr = container_of(send_buf,
struct ib_mad_send_wr_private,
send_buf);
mad_agent_priv = mad_send_wr->mad_agent_priv;
+ ret = ib_mad_enforce_security(mad_agent_priv,
+ mad_send_wr->send_wr.pkey_index);
+ if (ret)
+ goto error;
+
if (!send_buf->mad_agent->send_handler ||
(send_buf->timeout_ms &&
!send_buf->mad_agent->recv_handler)) {
@@ -1832,12 +1859,13 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
const struct ib_mad_send_wr_private *wr,
const struct ib_mad_recv_wc *rwc )
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
u8 send_resp, rcv_resp;
union ib_gid sgid;
struct ib_device *device = mad_agent_priv->agent.device;
u8 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
+ bool has_grh;
send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
@@ -1846,36 +1874,40 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
/* both requests, or both responses. GIDs different */
return 0;
- if (ib_query_ah(wr->send_buf.ah, &attr))
+ if (rdma_query_ah(wr->send_buf.ah, &attr))
/* Assume not equal, to avoid false positives. */
return 0;
- if (!!(attr.ah_flags & IB_AH_GRH) !=
- !!(rwc->wc->wc_flags & IB_WC_GRH))
+ has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
+ if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
/* one has GID, other does not. Assume different */
return 0;
if (!send_resp && rcv_resp) {
/* is request/response. */
- if (!(attr.ah_flags & IB_AH_GRH)) {
+ if (!has_grh) {
if (ib_get_cached_lmc(device, port_num, &lmc))
return 0;
- return (!lmc || !((attr.src_path_bits ^
+ return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
rwc->wc->dlid_path_bits) &
((1 << lmc) - 1)));
} else {
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(&attr);
+
if (ib_get_cached_gid(device, port_num,
- attr.grh.sgid_index, &sgid, NULL))
+ grh->sgid_index, &sgid, NULL))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
}
}
- if (!(attr.ah_flags & IB_AH_GRH))
- return attr.dlid == rwc->wc->slid;
+ if (!has_grh)
+ return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
else
- return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
+ return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
+ rwc->recv_buf.grh->sgid.raw,
16);
}
@@ -1940,6 +1972,14 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc mad_send_wc;
unsigned long flags;
+ int ret;
+
+ ret = ib_mad_enforce_security(mad_agent_priv,
+ mad_recv_wc->wc->pkey_index);
+ if (ret) {
+ ib_free_recv_mad(mad_recv_wc);
+ deref_mad_agent(mad_agent_priv);
+ }
INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
@@ -1997,6 +2037,8 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
}
+
+ return;
}
static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 382941b46e43..0d3cca0a8890 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -81,7 +81,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
- ib_destroy_ah(rmpp_recv->ah);
+ rdma_destroy_ah(rmpp_recv->ah);
kfree(rmpp_recv);
}
@@ -171,7 +171,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
hdr_len, 0, GFP_KERNEL,
IB_MGMT_BASE_VERSION);
if (IS_ERR(msg))
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
else {
msg->ah = ah;
msg->context[0] = ah;
@@ -201,7 +201,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
@@ -209,7 +209,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -237,7 +237,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
@@ -852,7 +852,7 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
struct mad_rmpp_recv *rmpp_recv;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
int newwin = 1;
@@ -867,10 +867,10 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
(rmpp_recv->method & IB_MGMT_METHOD_RESP))
continue;
- if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+ if (rdma_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
continue;
- if (rmpp_recv->slid == ah_attr.dlid) {
+ if (rmpp_recv->slid == rdma_ah_get_dlid(&ah_attr)) {
newwin = rmpp_recv->repwin;
break;
}
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 322cb67b07a9..45f2f095f793 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -720,7 +720,7 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -743,19 +743,18 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
return ret;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->mlid);
- ah_attr->sl = rec->sl;
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
-
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->mgid;
-
- ah_attr->grh.sgid_index = (u8) gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
-
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid));
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
+
+ rdma_ah_set_grh(ah_attr, &rec->mgid,
+ be32_to_cpu(rec->flow_label),
+ (u8)gid_index,
+ rec->hop_limit,
+ rec->traffic_class);
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 10469b0088b5..94931c474d41 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -37,6 +37,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <rdma/rdma_netlink.h>
+#include "core_priv.h"
struct ibnl_client {
struct list_head list;
@@ -55,7 +56,6 @@ int ibnl_chk_listeners(unsigned int group)
return -1;
return 0;
}
-EXPORT_SYMBOL(ibnl_chk_listeners);
int ibnl_add_client(int index, int nops,
const struct ibnl_client_cbs cb_table[])
@@ -146,7 +146,8 @@ nla_put_failure:
}
EXPORT_SYMBOL(ibnl_put_attr);
-static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct ibnl_client *client;
int type = nlh->nlmsg_type;
@@ -209,7 +210,7 @@ static void ibnl_rcv_reply_skb(struct sk_buff *skb)
if (nlh->nlmsg_flags & NLM_F_REQUEST)
return;
- ibnl_rcv_msg(skb, nlh);
+ ibnl_rcv_msg(skb, nlh, NULL);
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
if (msglen > skb->len)
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
new file mode 100644
index 000000000000..41c31a2bf093
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.c
@@ -0,0 +1,627 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <linux/rcupdate.h>
+#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
+
+void uverbs_uobject_get(struct ib_uobject *uobject)
+{
+ kref_get(&uobject->ref);
+}
+
+static void uverbs_uobject_free(struct kref *ref)
+{
+ struct ib_uobject *uobj =
+ container_of(ref, struct ib_uobject, ref);
+
+ if (uobj->type->type_class->needs_kfree_rcu)
+ kfree_rcu(uobj, rcu);
+ else
+ kfree(uobj);
+}
+
+void uverbs_uobject_put(struct ib_uobject *uobject)
+{
+ kref_put(&uobject->ref, uverbs_uobject_free);
+}
+
+static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
+{
+ /*
+ * When a shared access is required, we use a positive counter. Each
+ * shared access request checks that the value != -1 and increment it.
+ * Exclusive access is required for operations like write or destroy.
+ * In exclusive access mode, we check that the counter is zero (nobody
+ * claimed this object) and we set it to -1. Releasing a shared access
+ * lock is done simply by decreasing the counter. As for exclusive
+ * access locks, since only a single one of them is is allowed
+ * concurrently, setting the counter to zero is enough for releasing
+ * this lock.
+ */
+ if (!exclusive)
+ return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+ -EBUSY : 0;
+
+ /* lock is either WRITE or DESTROY - should be exclusive */
+ return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+}
+
+static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
+ const struct uverbs_obj_type *type)
+{
+ struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
+
+ if (!uobj)
+ return ERR_PTR(-ENOMEM);
+ /*
+ * user_handle should be filled by the handler,
+ * The object is added to the list in the commit stage.
+ */
+ uobj->context = context;
+ uobj->type = type;
+ atomic_set(&uobj->usecnt, 0);
+ kref_init(&uobj->ref);
+
+ return uobj;
+}
+
+static int idr_add_uobj(struct ib_uobject *uobj)
+{
+ int ret;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&uobj->context->ufile->idr_lock);
+
+ /*
+ * We start with allocating an idr pointing to NULL. This represents an
+ * object which isn't initialized yet. We'll replace it later on with
+ * the real object once we commit.
+ */
+ ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
+ min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
+ if (ret >= 0)
+ uobj->id = ret;
+
+ spin_unlock(&uobj->context->ufile->idr_lock);
+ idr_preload_end();
+
+ return ret < 0 ? ret : 0;
+}
+
+/*
+ * It only removes it from the uobjects list, uverbs_uobject_put() is still
+ * required.
+ */
+static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
+{
+ spin_lock(&uobj->context->ufile->idr_lock);
+ idr_remove(&uobj->context->ufile->idr, uobj->id);
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
+static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+
+ rcu_read_lock();
+ /* object won't be released as we're protected in rcu */
+ uobj = idr_find(&ucontext->ufile->idr, id);
+ if (!uobj) {
+ uobj = ERR_PTR(-ENOENT);
+ goto free;
+ }
+
+ uverbs_uobject_get(uobj);
+free:
+ rcu_read_unlock();
+ return uobj;
+}
+
+static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct file *f;
+ struct ib_uobject *uobject;
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+
+ if (exclusive)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ f = fget(id);
+ if (!f)
+ return ERR_PTR(-EBADF);
+
+ uobject = f->private_data;
+ /*
+ * fget(id) ensures we are not currently running uverbs_close_fd,
+ * and the caller is expected to ensure that uverbs_close_fd is never
+ * done while a call top lookup is possible.
+ */
+ if (f->f_op != fd_type->fops) {
+ fput(f);
+ return ERR_PTR(-EBADF);
+ }
+
+ uverbs_uobject_get(uobject);
+ return uobject;
+}
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ if (uobj->type != type) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ ret = uverbs_try_lock_object(uobj, exclusive);
+ if (ret) {
+ WARN(ucontext->cleanup_reason,
+ "ib_uverbs: Trying to lookup_get while cleanup context\n");
+ goto free;
+ }
+
+ return uobj;
+free:
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ int ret;
+ struct ib_uobject *uobj;
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = idr_add_uobj(uobj);
+ if (ret)
+ goto uobj_put;
+
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto idr_remove;
+
+ return uobj;
+
+idr_remove:
+ uverbs_idr_remove_uobj(uobj);
+uobj_put:
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+ int new_fd;
+ struct ib_uobject *uobj;
+ struct ib_uobject_file *uobj_file;
+ struct file *filp;
+
+ new_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (new_fd < 0)
+ return ERR_PTR(new_fd);
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj)) {
+ put_unused_fd(new_fd);
+ return uobj;
+ }
+
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ filp = anon_inode_getfile(fd_type->name,
+ fd_type->fops,
+ uobj_file,
+ fd_type->flags);
+ if (IS_ERR(filp)) {
+ put_unused_fd(new_fd);
+ uverbs_uobject_put(uobj);
+ return (void *)filp;
+ }
+
+ uobj_file->uobj.id = new_fd;
+ uobj_file->uobj.object = filp;
+ uobj_file->ufile = ucontext->ufile;
+ INIT_LIST_HEAD(&uobj->list);
+ kref_get(&uobj_file->ufile->ref);
+
+ return uobj;
+}
+
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ return type->type_class->alloc_begin(type, ucontext);
+}
+
+static void uverbs_uobject_add(struct ib_uobject *uobject)
+{
+ mutex_lock(&uobject->context->uobjects_lock);
+ list_add(&uobject->list, &uobject->context->uobjects);
+ mutex_unlock(&uobject->context->uobjects_lock);
+}
+
+static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_idr_type *idr_type =
+ container_of(uobj->type, struct uverbs_obj_idr_type,
+ type);
+ int ret = idr_type->destroy_object(uobj, why);
+
+ /*
+ * We can only fail gracefully if the user requested to destroy the
+ * object. In the rest of the cases, just remove whatever you can.
+ */
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_idr_remove_uobj(uobj);
+
+ return ret;
+}
+
+static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ struct file *filp = uobj->object;
+ int id = uobj_file->uobj.id;
+
+ /* Unsuccessful NEW */
+ fput(filp);
+ put_unused_fd(id);
+}
+
+static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(uobj->type, struct uverbs_obj_fd_type, type);
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ int ret = fd_type->context_closed(uobj_file, why);
+
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ if (why == RDMA_REMOVE_DURING_CLEANUP) {
+ alloc_abort_fd_uobject(uobj);
+ return ret;
+ }
+
+ uobj_file->uobj.context = NULL;
+ return ret;
+}
+
+static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
+{
+#ifdef CONFIG_LOCKDEP
+ if (exclusive)
+ WARN_ON(atomic_read(&uobj->usecnt) > 0);
+ else
+ WARN_ON(atomic_read(&uobj->usecnt) == -1);
+#endif
+}
+
+static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ ret = uobj->type->type_class->remove_commit(uobj, why);
+ if (ret && why == RDMA_REMOVE_DESTROY) {
+ /* We couldn't remove the object, so just unlock the uobject */
+ atomic_set(&uobj->usecnt, 0);
+ uobj->type->type_class->lookup_put(uobj, true);
+ } else {
+ mutex_lock(&ucontext->uobjects_lock);
+ list_del(&uobj->list);
+ mutex_unlock(&ucontext->uobjects_lock);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(uobj);
+ }
+
+ return ret;
+}
+
+/* This is called only for user requested DESTROY reasons */
+int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ /* put the ref count we took at lookup_get */
+ uverbs_uobject_put(uobj);
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+ WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
+ return 0;
+ }
+ lockdep_check(uobj, true);
+ ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
+
+ up_read(&ucontext->cleanup_rwsem);
+ return ret;
+}
+
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_uobject_add(uobj);
+ spin_lock(&uobj->context->ufile->idr_lock);
+ /*
+ * We already allocated this IDR with a NULL object, so
+ * this shouldn't fail.
+ */
+ WARN_ON(idr_replace(&uobj->context->ufile->idr,
+ uobj, uobj->id));
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+
+ uverbs_uobject_add(&uobj_file->uobj);
+ fd_install(uobj_file->uobj.id, uobj->object);
+ /* This shouldn't be used anymore. Use the file object instead */
+ uobj_file->uobj.id = 0;
+ /* Get another reference as we export this to the fops */
+ uverbs_uobject_get(&uobj_file->uobj);
+}
+
+int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
+{
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
+ int ret;
+
+ WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
+ ret = uobj->type->type_class->remove_commit(uobj,
+ RDMA_REMOVE_DURING_CLEANUP);
+ if (ret)
+ pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
+ uobj->id);
+ return ret;
+ }
+
+ uobj->type->type_class->alloc_commit(uobj);
+ up_read(&uobj->context->cleanup_rwsem);
+
+ return 0;
+}
+
+static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_idr_remove_uobj(uobj);
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_uobject_put(uobj);
+}
+
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
+{
+ uobj->type->type_class->alloc_abort(uobj);
+}
+
+static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+}
+
+static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ struct file *filp = uobj->object;
+
+ WARN_ON(exclusive);
+ /* This indirectly calls uverbs_close_fd and free the object */
+ fput(filp);
+}
+
+void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ lockdep_check(uobj, exclusive);
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ /*
+ * In order to unlock an object, either decrease its usecnt for
+ * read access or zero it in case of exclusive access. See
+ * uverbs_try_lock_object for locking schema information.
+ */
+ if (!exclusive)
+ atomic_dec(&uobj->usecnt);
+ else
+ atomic_set(&uobj->usecnt, 0);
+
+ uverbs_uobject_put(uobj);
+}
+
+const struct uverbs_obj_type_class uverbs_idr_class = {
+ .alloc_begin = alloc_begin_idr_uobject,
+ .lookup_get = lookup_get_idr_uobject,
+ .alloc_commit = alloc_commit_idr_uobject,
+ .alloc_abort = alloc_abort_idr_uobject,
+ .lookup_put = lookup_put_idr_uobject,
+ .remove_commit = remove_commit_idr_uobject,
+ /*
+ * When we destroy an object, we first just lock it for WRITE and
+ * actually DESTROY it in the finalize stage. So, the problematic
+ * scenario is when we just started the finalize stage of the
+ * destruction (nothing was executed yet). Now, the other thread
+ * fetched the object for READ access, but it didn't lock it yet.
+ * The DESTROY thread continues and starts destroying the object.
+ * When the other thread continue - without the RCU, it would
+ * access freed memory. However, the rcu_read_lock delays the free
+ * until the rcu_read_lock of the READ operation quits. Since the
+ * exclusive lock of the object is still taken by the DESTROY flow, the
+ * READ operation will get -EBUSY and it'll just bail out.
+ */
+ .needs_kfree_rcu = true,
+};
+
+static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
+{
+ struct ib_ucontext *ucontext;
+ struct ib_uverbs_file *ufile = uobj_file->ufile;
+ int ret;
+
+ mutex_lock(&uobj_file->ufile->cleanup_mutex);
+
+ /* uobject was either already cleaned up or is cleaned up right now anyway */
+ if (!uobj_file->uobj.context ||
+ !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
+ goto unlock;
+
+ ucontext = uobj_file->uobj.context;
+ ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
+ up_read(&ucontext->cleanup_rwsem);
+ if (ret)
+ pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
+unlock:
+ mutex_unlock(&ufile->cleanup_mutex);
+}
+
+void uverbs_close_fd(struct file *f)
+{
+ struct ib_uobject_file *uobj_file = f->private_data;
+ struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
+
+ _uverbs_close_fd(uobj_file);
+ uverbs_uobject_put(&uobj_file->uobj);
+ kref_put(uverbs_file_ref, ib_uverbs_release_file);
+}
+
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+{
+ enum rdma_remove_reason reason = device_removed ?
+ RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
+ unsigned int cur_order = 0;
+
+ ucontext->cleanup_reason = reason;
+ /*
+ * Waits for all remove_commit and alloc_commit to finish. Logically, We
+ * want to hold this forever as the context is going to be destroyed,
+ * but we'll release it since it causes a "held lock freed" BUG message.
+ */
+ down_write(&ucontext->cleanup_rwsem);
+
+ while (!list_empty(&ucontext->uobjects)) {
+ struct ib_uobject *obj, *next_obj;
+ unsigned int next_order = UINT_MAX;
+
+ /*
+ * This shouldn't run while executing other commands on this
+ * context. Thus, the only thing we should take care of is
+ * releasing a FD while traversing this list. The FD could be
+ * closed and released from the _release fop of this FD.
+ * In order to mitigate this, we add a lock.
+ * We take and release the lock per order traversal in order
+ * to let other threads (which might still use the FDs) chance
+ * to run.
+ */
+ mutex_lock(&ucontext->uobjects_lock);
+ list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
+ list) {
+ if (obj->type->destroy_order == cur_order) {
+ int ret;
+
+ /*
+ * if we hit this WARN_ON, that means we are
+ * racing with a lookup_get.
+ */
+ WARN_ON(uverbs_try_lock_object(obj, true));
+ ret = obj->type->type_class->remove_commit(obj,
+ reason);
+ list_del(&obj->list);
+ if (ret)
+ pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
+ obj->id, cur_order);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(obj);
+ } else {
+ next_order = min(next_order,
+ obj->type->destroy_order);
+ }
+ }
+ mutex_unlock(&ucontext->uobjects_lock);
+ cur_order = next_order;
+ }
+ up_write(&ucontext->cleanup_rwsem);
+}
+
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
+{
+ ucontext->cleanup_reason = 0;
+ mutex_init(&ucontext->uobjects_lock);
+ INIT_LIST_HEAD(&ucontext->uobjects);
+ init_rwsem(&ucontext->cleanup_rwsem);
+}
+
+const struct uverbs_obj_type_class uverbs_fd_class = {
+ .alloc_begin = alloc_begin_fd_uobject,
+ .lookup_get = lookup_get_fd_uobject,
+ .alloc_commit = alloc_commit_fd_uobject,
+ .alloc_abort = alloc_abort_fd_uobject,
+ .lookup_put = lookup_put_fd_uobject,
+ .remove_commit = remove_commit_fd_uobject,
+ .needs_kfree_rcu = false,
+};
+
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
new file mode 100644
index 000000000000..1b82e7ff7fe8
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_CORE_H
+#define RDMA_CORE_H
+
+#include <linux/idr.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mutex.h>
+
+/*
+ * These functions initialize the context and cleanups its uobjects.
+ * The context has a list of objects which is protected by a mutex
+ * on the context. initialize_ucontext should be called when we create
+ * a context.
+ * cleanup_ucontext removes all uobjects from the context and puts them.
+ */
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
+
+/*
+ * uverbs_uobject_get is called in order to increase the reference count on
+ * an uobject. This is useful when a handler wants to keep the uobject's memory
+ * alive, regardless if this uobject is still alive in the context's objects
+ * repository. Objects are put via uverbs_uobject_put.
+ */
+void uverbs_uobject_get(struct ib_uobject *uobject);
+
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
+void uverbs_uobject_put(struct ib_uobject *uobject);
+
+/* Indicate this fd is no longer used by this consumer, but its memory isn't
+ * necessarily released yet. When the last reference is put, we release the
+ * memory. After this call is executed, calling uverbs_uobject_get isn't
+ * allowed.
+ * This must be called from the release file_operations of the file!
+ */
+void uverbs_close_fd(struct file *f);
+
+#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 0621f4455732..94a9eefb3cfc 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -42,6 +42,8 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
+static struct workqueue_struct *gid_cache_wq;
+
enum gid_op_type {
GID_DEL = 0,
GID_ADD
@@ -144,7 +146,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
struct net_device *real_dev;
int res;
@@ -152,11 +153,11 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
return 0;
rcu_read_lock();
- real_dev = rdma_vlan_dev_real_dev(event_ndev);
+ real_dev = rdma_vlan_dev_real_dev(cookie);
if (!real_dev)
- real_dev = event_ndev;
+ real_dev = cookie;
- res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
@@ -192,17 +193,16 @@ static int pass_all_filter(struct ib_device *ib_dev, u8 port,
static int upper_device_filter(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
int res;
if (!rdma_ndev)
return 0;
- if (rdma_ndev == event_ndev)
+ if (rdma_ndev == cookie)
return 1;
rcu_read_lock();
- res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
+ res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
rcu_read_unlock();
return res;
@@ -379,18 +379,14 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- enum_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
- _add_netdev_ips(ib_dev, port, event_ndev);
+ enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
+ _add_netdev_ips(ib_dev, port, cookie);
}
static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
+ ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
}
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
@@ -460,7 +456,7 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
u8 port,
struct net_device *ndev))
{
- struct net_device *ndev = (struct net_device *)cookie;
+ struct net_device *ndev = cookie;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
@@ -519,9 +515,7 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- bond_delete_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
+ bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
}
/* The following functions operate on all IB devices. netdevice_event and
@@ -568,7 +562,7 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
}
INIT_WORK(&ndev_work->work, netdevice_event_work_handler);
- queue_work(ib_wq, &ndev_work->work);
+ queue_work(gid_cache_wq, &ndev_work->work);
return NOTIFY_DONE;
}
@@ -701,7 +695,7 @@ static int addr_event(struct notifier_block *this, unsigned long event,
dev_hold(ndev);
work->gid_attr.ndev = ndev;
- queue_work(ib_wq, &work->work);
+ queue_work(gid_cache_wq, &work->work);
return NOTIFY_DONE;
}
@@ -748,6 +742,10 @@ static struct notifier_block nb_inet6addr = {
int __init roce_gid_mgmt_init(void)
{
+ gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0);
+ if (!gid_cache_wq)
+ return -ENOMEM;
+
register_inetaddr_notifier(&nb_inetaddr);
if (IS_ENABLED(CONFIG_IPV6))
register_inet6addr_notifier(&nb_inet6addr);
@@ -772,4 +770,5 @@ void __exit roce_gid_mgmt_cleanup(void)
* ib-core is removed, all physical devices have been removed,
* so no issue with remaining hardware contexts.
*/
+ destroy_workqueue(gid_cache_wq);
}
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 81b742ca1639..70fa4cabe48e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -56,6 +56,8 @@
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
+#define IB_SA_CPI_MAX_RETRY_CNT 3
+#define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */
static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
struct ib_sa_sm_ah {
@@ -65,9 +67,23 @@ struct ib_sa_sm_ah {
u8 src_path_mask;
};
+enum rdma_class_port_info_type {
+ RDMA_CLASS_PORT_INFO_IB,
+ RDMA_CLASS_PORT_INFO_OPA
+};
+
+struct rdma_class_port_info {
+ enum rdma_class_port_info_type type;
+ union {
+ struct ib_class_port_info ib;
+ struct opa_class_port_info opa;
+ };
+};
+
struct ib_sa_classport_cache {
bool valid;
- struct ib_class_port_info data;
+ int retry_cnt;
+ struct rdma_class_port_info data;
};
struct ib_sa_port {
@@ -75,6 +91,7 @@ struct ib_sa_port {
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
struct ib_sa_classport_cache classport_info;
+ struct delayed_work ib_cpi_work;
spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
u8 port_num;
@@ -103,6 +120,7 @@ struct ib_sa_query {
#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
#define IB_SA_CANCEL 0x00000002
+#define IB_SA_QUERY_OPA 0x00000004
struct ib_sa_service_query {
void (*callback)(int, struct ib_sa_service_rec *, void *);
@@ -111,9 +129,10 @@ struct ib_sa_service_query {
};
struct ib_sa_path_query {
- void (*callback)(int, struct ib_sa_path_rec *, void *);
+ void (*callback)(int, struct sa_path_rec *, void *);
void *context;
struct ib_sa_query sa_query;
+ struct sa_path_rec *conv_pr;
};
struct ib_sa_guidinfo_query {
@@ -123,7 +142,7 @@ struct ib_sa_guidinfo_query {
};
struct ib_sa_classport_info_query {
- void (*callback)(int, struct ib_class_port_info *, void *);
+ void (*callback)(void *);
void *context;
struct ib_sa_query sa_query;
};
@@ -170,8 +189,8 @@ static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
#define PATH_REC_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \
+ .struct_offset_bytes = offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
@@ -187,15 +206,15 @@ static const struct ib_field path_rec_table[] = {
.offset_words = 6,
.offset_bits = 0,
.size_bits = 128 },
- { PATH_REC_FIELD(dlid),
+ { PATH_REC_FIELD(ib.dlid),
.offset_words = 10,
.offset_bits = 0,
.size_bits = 16 },
- { PATH_REC_FIELD(slid),
+ { PATH_REC_FIELD(ib.slid),
.offset_words = 10,
.offset_bits = 16,
.size_bits = 16 },
- { PATH_REC_FIELD(raw_traffic),
+ { PATH_REC_FIELD(ib.raw_traffic),
.offset_words = 11,
.offset_bits = 0,
.size_bits = 1 },
@@ -269,6 +288,136 @@ static const struct ib_field path_rec_table[] = {
.size_bits = 48 },
};
+#define OPA_PATH_REC_FIELD(field) \
+ .struct_offset_bytes = \
+ offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = \
+ sizeof((struct sa_path_rec *)0)->field, \
+ .field_name = "sa_path_rec:" #field
+
+static const struct ib_field opa_path_rec_table[] = {
+ { OPA_PATH_REC_FIELD(service_id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { OPA_PATH_REC_FIELD(dgid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(sgid),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(opa.dlid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.slid),
+ .offset_words = 11,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.raw_traffic),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 12,
+ .offset_bits = 1,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(flow_label),
+ .offset_words = 12,
+ .offset_bits = 4,
+ .size_bits = 20 },
+ { OPA_PATH_REC_FIELD(hop_limit),
+ .offset_words = 12,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(traffic_class),
+ .offset_words = 13,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(reversible),
+ .offset_words = 13,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(numb_path),
+ .offset_words = 13,
+ .offset_bits = 9,
+ .size_bits = 7 },
+ { OPA_PATH_REC_FIELD(pkey),
+ .offset_words = 13,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_PATH_REC_FIELD(opa.l2_8B),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_10B),
+ .offset_words = 14,
+ .offset_bits = 1,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_9B),
+ .offset_words = 14,
+ .offset_bits = 2,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_16B),
+ .offset_words = 14,
+ .offset_bits = 3,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 4,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_type),
+ .offset_words = 14,
+ .offset_bits = 6,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_priority),
+ .offset_words = 14,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 16,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(sl),
+ .offset_words = 14,
+ .offset_bits = 19,
+ .size_bits = 5 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(mtu_selector),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(mtu),
+ .offset_words = 15,
+ .offset_bits = 2,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(rate_selector),
+ .offset_words = 15,
+ .offset_bits = 8,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(rate),
+ .offset_words = 15,
+ .offset_bits = 10,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(packet_life_time_selector),
+ .offset_words = 15,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(packet_life_time),
+ .offset_words = 15,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(preference),
+ .offset_words = 15,
+ .offset_bits = 24,
+ .size_bits = 8 },
+};
+
#define MCMEMBER_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
.struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
@@ -406,7 +555,7 @@ static const struct ib_field service_rec_table[] = {
.struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
.field_name = "ib_class_port_info:" #field
-static const struct ib_field classport_info_rec_table[] = {
+static const struct ib_field ib_classport_info_rec_table[] = {
{ CLASSPORTINFO_REC_FIELD(base_version),
.offset_words = 0,
.offset_bits = 0,
@@ -477,6 +626,88 @@ static const struct ib_field classport_info_rec_table[] = {
.size_bits = 32 },
};
+#define OPA_CLASSPORTINFO_REC_FIELD(field) \
+ .struct_offset_bytes =\
+ offsetof(struct opa_class_port_info, field), \
+ .struct_size_bytes = \
+ sizeof((struct opa_class_port_info *)0)->field, \
+ .field_name = "opa_class_port_info:" #field
+
+static const struct ib_field opa_classport_info_rec_table[] = {
+ { OPA_CLASSPORTINFO_REC_FIELD(base_version),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(class_version),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
+ .offset_words = 9,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
+ .offset_words = 16,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
+ .offset_words = 17,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
+ .offset_words = 18,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
+ .offset_words = 18,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
+ .offset_words = 19,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 19,
+ .offset_bits = 8,
+ .size_bits = 24 },
+};
+
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
@@ -518,7 +749,7 @@ static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
struct ib_sa_query *query)
{
- struct ib_sa_path_rec *sa_rec = query->mad_buf->context[1];
+ struct sa_path_rec *sa_rec = query->mad_buf->context[1];
struct ib_sa_mad *mad = query->mad_buf->mad;
ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
u16 val16;
@@ -528,8 +759,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
query->mad_buf->context[1] = NULL;
/* Construct the family header first */
- header = (struct rdma_ls_resolve_header *)
- skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
memcpy(header->device_name, query->port->agent->device->name,
LS_DEVICE_NAME_MAX);
header->port_num = query->port->port_num;
@@ -808,7 +1038,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
return -EPERM;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
- nlmsg_len(nlh), ib_nl_policy);
+ nlmsg_len(nlh), ib_nl_policy, NULL);
attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
if (ret || !attr)
goto settimeout_out;
@@ -860,7 +1090,7 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
return 0;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
- nlmsg_len(nlh), ib_nl_policy);
+ nlmsg_len(nlh), ib_nl_policy, NULL);
if (ret)
return 0;
@@ -927,96 +1157,10 @@ static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
- ib_destroy_ah(sm_ah->ah);
+ rdma_destroy_ah(sm_ah->ah);
kfree(sm_ah);
}
-static void update_sm_ah(struct work_struct *work)
-{
- struct ib_sa_port *port =
- container_of(work, struct ib_sa_port, update_task);
- struct ib_sa_sm_ah *new_ah;
- struct ib_port_attr port_attr;
- struct ib_ah_attr ah_attr;
-
- if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
- pr_warn("Couldn't query port\n");
- return;
- }
-
- new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
- if (!new_ah) {
- return;
- }
-
- kref_init(&new_ah->ref);
- new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
-
- new_ah->pkey_index = 0;
- if (ib_find_pkey(port->agent->device, port->port_num,
- IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
- pr_err("Couldn't find index for default PKey\n");
-
- memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = port_attr.sm_lid;
- ah_attr.sl = port_attr.sm_sl;
- ah_attr.port_num = port->port_num;
- if (port_attr.grh_required) {
- ah_attr.ah_flags = IB_AH_GRH;
- ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
- ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
- }
-
- new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
- if (IS_ERR(new_ah->ah)) {
- pr_warn("Couldn't create new SM AH\n");
- kfree(new_ah);
- return;
- }
-
- spin_lock_irq(&port->ah_lock);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = new_ah;
- spin_unlock_irq(&port->ah_lock);
-
-}
-
-static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
-{
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER) {
- unsigned long flags;
- struct ib_sa_device *sa_dev =
- container_of(handler, typeof(*sa_dev), event_handler);
- struct ib_sa_port *port =
- &sa_dev->port[event->element.port_num - sa_dev->start_port];
-
- if (!rdma_cap_ib_sa(handler->device, port->port_num))
- return;
-
- spin_lock_irqsave(&port->ah_lock, flags);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = NULL;
- spin_unlock_irqrestore(&port->ah_lock, flags);
-
- if (event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_LID_CHANGE) {
- spin_lock_irqsave(&port->classport_lock, flags);
- port->classport_info.valid = false;
- spin_unlock_irqrestore(&port->classport_lock, flags);
- }
- queue_work(ib_wq, &sa_dev->port[event->element.port_num -
- sa_dev->start_port].update_task);
- }
-}
-
void ib_sa_register_client(struct ib_sa_client *client)
{
atomic_set(&client->users, 1);
@@ -1085,7 +1229,8 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
}
int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -1093,21 +1238,26 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct net_device *ndev = NULL;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->dlid);
- ah_attr->sl = rec->sl;
- ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
- get_src_path_mask(device, port_num);
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
-
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
+ get_src_path_mask(device, port_num));
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
use_roce = rdma_cap_eth_ah(device, port_num);
if (use_roce) {
struct net_device *idev;
struct net_device *resolved_dev;
- struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
- .net = rec->net ? rec->net :
- &init_net};
+ struct rdma_dev_addr dev_addr = {
+ .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
+ sa_path_get_ifindex(rec) : 0),
+ .net = sa_path_get_ndev(rec) ?
+ sa_path_get_ndev(rec) :
+ &init_net
+ };
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
@@ -1128,7 +1278,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
- rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
idev = device->get_netdev(device, port_num);
@@ -1159,28 +1309,31 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
}
if (rec->hop_limit > 0 || use_roce) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->dgid;
+ enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
- ret = ib_find_cached_gid_by_port(device, &rec->sgid,
- rec->gid_type, port_num, ndev,
- &gid_index);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
+ port_num, ndev, &gid_index);
if (ret) {
if (ndev)
dev_put(ndev);
return ret;
}
- ah_attr->grh.sgid_index = gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
+ rdma_ah_set_grh(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ gid_index, rec->hop_limit,
+ rec->traffic_class);
if (ndev)
dev_put(ndev);
}
- if (use_roce)
- memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
+ if (use_roce) {
+ u8 *dmac = sa_path_get_dmac(rec);
+
+ if (!dmac)
+ return -EINVAL;
+ memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN);
+ }
return 0;
}
@@ -1203,7 +1356,9 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
gfp_mask,
- IB_MGMT_BASE_VERSION);
+ ((query->flags & IB_SA_QUERY_OPA) ?
+ OPA_MGMT_BASE_VERSION :
+ IB_MGMT_BASE_VERSION));
if (IS_ERR(query->mad_buf)) {
kref_put(&query->sm_ah->ref, free_sm_ah);
return -ENOMEM;
@@ -1220,16 +1375,21 @@ static void free_mad(struct ib_sa_query *query)
kref_put(&query->sm_ah->ref, free_sm_ah);
}
-static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
+static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
{
+ struct ib_sa_mad *mad = query->mad_buf->mad;
unsigned long flags;
memset(mad, 0, sizeof *mad);
- mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ if (query->flags & IB_SA_QUERY_OPA) {
+ mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
+ } else {
+ mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ }
mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
-
spin_lock_irqsave(&tid_lock, flags);
mad->mad_hdr.tid =
cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
@@ -1258,7 +1418,8 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
query->mad_buf->context[0] = query;
query->id = id;
- if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
+ if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
+ (!(query->flags & IB_SA_QUERY_OPA))) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask))
return id;
@@ -1281,18 +1442,75 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
return ret ? ret : id;
}
-void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
{
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
}
EXPORT_SYMBOL(ib_sa_unpack_path);
-void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
+void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
{
ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
}
EXPORT_SYMBOL(ib_sa_pack_path);
+static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ unsigned long flags;
+ bool ret = false;
+
+ if (!sa_dev)
+ return ret;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (!port->classport_info.valid)
+ goto ret;
+
+ if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
+ ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
+ OPA_CLASS_PORT_INFO_PR_SUPPORT;
+ret:
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+
+enum opa_pr_supported {
+ PR_NOT_SUPPORTED,
+ PR_OPA_SUPPORTED,
+ PR_IB_SUPPORTED
+};
+
+/**
+ * Check if current PR query can be an OPA query.
+ * Retuns PR_NOT_SUPPORTED if a path record query is not
+ * possible, PR_OPA_SUPPORTED if an OPA path record query
+ * is possible and PR_IB_SUPPORTED if an IB path record
+ * query is possible.
+ */
+static int opa_pr_query_possible(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num,
+ struct sa_path_rec *rec)
+{
+ struct ib_port_attr port_attr;
+
+ if (ib_query_port(device, port_num, &port_attr))
+ return PR_NOT_SUPPORTED;
+
+ if (ib_sa_opa_pathrecord_support(client, device, port_num))
+ return PR_OPA_SUPPORTED;
+
+ if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+ return PR_NOT_SUPPORTED;
+ else
+ return PR_IB_SUPPORTED;
+}
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -1301,22 +1519,44 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
container_of(sa_query, struct ib_sa_path_query, sa_query);
if (mad) {
- struct ib_sa_path_rec rec;
-
- ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
- mad->data, &rec);
- rec.net = NULL;
- rec.ifindex = 0;
- rec.gid_type = IB_GID_TYPE_IB;
- eth_zero_addr(rec.dmac);
- query->callback(status, &rec, query->context);
+ struct sa_path_rec rec;
+
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ ib_unpack(opa_path_rec_table,
+ ARRAY_SIZE(opa_path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ query->callback(status, &rec, query->context);
+ } else {
+ ib_unpack(path_rec_table,
+ ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_ndev(&rec, NULL);
+ sa_path_set_ifindex(&rec, 0);
+ sa_path_set_dmac_zero(&rec);
+
+ if (query->conv_pr) {
+ struct sa_path_rec opa;
+
+ memset(&opa, 0, sizeof(struct sa_path_rec));
+ sa_convert_path_ib_to_opa(&opa, &rec);
+ query->callback(status, &opa, query->context);
+ } else {
+ query->callback(status, &rec, query->context);
+ }
+ }
} else
query->callback(status, NULL, query->context);
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
- kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
+ struct ib_sa_path_query *query =
+ container_of(sa_query, struct ib_sa_path_query, sa_query);
+
+ kfree(query->conv_pr);
+ kfree(query);
}
/**
@@ -1346,11 +1586,11 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
+ struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
- struct ib_sa_path_rec *resp,
+ struct sa_path_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
@@ -1360,11 +1600,16 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
+ enum opa_pr_supported status;
int ret;
if (!sa_dev)
return -ENODEV;
+ if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
+ (rec->rec_type != SA_PATH_REC_TYPE_OPA))
+ return -EINVAL;
+
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
@@ -1373,9 +1618,26 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
return -ENOMEM;
query->sa_query.port = port;
+ if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
+ status = opa_pr_query_possible(client, device, port_num, rec);
+ if (status == PR_NOT_SUPPORTED) {
+ ret = -EINVAL;
+ goto err1;
+ } else if (status == PR_OPA_SUPPORTED) {
+ query->sa_query.flags |= IB_SA_QUERY_OPA;
+ } else {
+ query->conv_pr =
+ kmalloc(sizeof(*query->conv_pr), gfp_mask);
+ if (!query->conv_pr) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+ }
+ }
+
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err2;
ib_sa_client_get(client);
query->sa_query.client = client;
@@ -1383,7 +1645,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
query->sa_query.release = ib_sa_path_rec_release;
@@ -1391,24 +1653,36 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
mad->sa_hdr.comp_mask = comp_mask;
- ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
+ if (query->sa_query.flags & IB_SA_QUERY_OPA) {
+ ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
+ rec, mad->data);
+ } else if (query->conv_pr) {
+ sa_convert_path_opa_to_ib(query->conv_pr, rec);
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ query->conv_pr, mad->data);
+ } else {
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec, mad->data);
+ }
*sa_query = &query->sa_query;
query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
- query->sa_query.mad_buf->context[1] = rec;
+ query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
+ query->conv_pr : rec;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err3;
return ret;
-err2:
+err3:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-
+err2:
+ kfree(query->conv_pr);
err1:
kfree(query);
return ret;
@@ -1508,7 +1782,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
query->sa_query.release = ib_sa_service_rec_release;
@@ -1600,7 +1874,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
query->sa_query.release = ib_sa_mcmember_rec_release;
@@ -1697,7 +1971,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
query->sa_query.release = ib_sa_guidinfo_rec_release;
@@ -1728,7 +2002,42 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
-/* Support get SA ClassPortInfo */
+bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ bool ret = false;
+ unsigned long flags;
+
+ if (!sa_dev)
+ return ret;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if ((port->classport_info.valid) &&
+ (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
+ ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
+ & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
+
+struct ib_classport_info_context {
+ struct completion done;
+ struct ib_sa_query *sa_query;
+};
+
+static void ib_classportinfo_cb(void *context)
+{
+ struct ib_classport_info_context *cb_ctx = context;
+
+ complete(&cb_ctx->done);
+}
+
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -1736,91 +2045,91 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
unsigned long flags;
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
+ struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
if (mad) {
- struct ib_class_port_info rec;
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ struct opa_class_port_info rec;
- ib_unpack(classport_info_rec_table,
- ARRAY_SIZE(classport_info_rec_table),
- mad->data, &rec);
+ ib_unpack(opa_classport_info_rec_table,
+ ARRAY_SIZE(opa_classport_info_rec_table),
+ mad->data, &rec);
- spin_lock_irqsave(&sa_query->port->classport_lock, flags);
- if (!status && !sa_query->port->classport_info.valid) {
- memcpy(&sa_query->port->classport_info.data, &rec,
- sizeof(sa_query->port->classport_info.data));
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.opa, &rec,
+ sizeof(info->data.opa));
- sa_query->port->classport_info.valid = true;
- }
- spin_unlock_irqrestore(&sa_query->port->classport_lock, flags);
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_OPA;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
- query->callback(status, &rec, query->context);
- } else {
- query->callback(status, NULL, query->context);
+ } else {
+ struct ib_class_port_info rec;
+
+ ib_unpack(ib_classport_info_rec_table,
+ ARRAY_SIZE(ib_classport_info_rec_table),
+ mad->data, &rec);
+
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.ib, &rec,
+ sizeof(info->data.ib));
+
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_IB;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
+ }
}
+ query->callback(query->context);
}
-static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
+static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_classport_info_query,
sa_query));
}
-int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_class_port_info *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
+ int timeout_ms,
+ void (*callback)(void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
{
- struct ib_sa_classport_info_query *query;
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
struct ib_mad_agent *agent;
+ struct ib_sa_classport_info_query *query;
struct ib_sa_mad *mad;
- struct ib_class_port_info cached_class_port_info;
+ gfp_t gfp_mask = GFP_KERNEL;
int ret;
- unsigned long flags;
- if (!sa_dev)
- return -ENODEV;
-
- port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
- /* Use cached ClassPortInfo attribute if valid instead of sending mad */
- spin_lock_irqsave(&port->classport_lock, flags);
- if (port->classport_info.valid && callback) {
- memcpy(&cached_class_port_info, &port->classport_info.data,
- sizeof(cached_class_port_info));
- spin_unlock_irqrestore(&port->classport_lock, flags);
- callback(0, &cached_class_port_info, context);
- return 0;
- }
- spin_unlock_irqrestore(&port->classport_lock, flags);
-
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
+ query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
+ port->port_num) ?
+ IB_SA_QUERY_OPA : 0;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err_free;
- ib_sa_client_get(client);
- query->sa_query.client = client;
- query->callback = callback;
- query->context = context;
+ query->callback = callback;
+ query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
- query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
-
- query->sa_query.release = ib_sa_portclass_info_rec_release;
- /* support GET only */
+ query->sa_query.callback = ib_sa_classport_info_rec_callback;
+ query->sa_query.release = ib_sa_classport_info_rec_release;
mad->mad_hdr.method = IB_MGMT_METHOD_GET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
mad->sa_hdr.comp_mask = 0;
@@ -1828,20 +2137,71 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err_free_mad;
return ret;
-err2:
+err_free_mad:
*sa_query = NULL;
- ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-err1:
+err_free:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
+
+static void update_ib_cpi(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, ib_cpi_work.work);
+ struct ib_classport_info_context *cb_context;
+ unsigned long flags;
+ int ret;
+
+ /* If the classport info is valid, nothing
+ * to do here.
+ */
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (port->classport_info.valid) {
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+ cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
+ if (!cb_context)
+ goto err_nomem;
+
+ init_completion(&cb_context->done);
+
+ ret = ib_sa_classport_info_rec_query(port, 3000,
+ ib_classportinfo_cb, cb_context,
+ &cb_context->sa_query);
+ if (ret < 0)
+ goto free_cb_err;
+ wait_for_completion(&cb_context->done);
+free_cb_err:
+ kfree(cb_context);
+ spin_lock_irqsave(&port->classport_lock, flags);
+
+ /* If the classport info is still not valid, the query should have
+ * failed for some reason. Retry issuing the query
+ */
+ if (!port->classport_info.valid) {
+ port->classport_info.retry_cnt++;
+ if (port->classport_info.retry_cnt <=
+ IB_SA_CPI_MAX_RETRY_CNT) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
+ }
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+err_nomem:
+ return;
+}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
@@ -1870,7 +2230,8 @@ static void send_handler(struct ib_mad_agent *agent,
spin_unlock_irqrestore(&idr_lock, flags);
free_mad(query);
- ib_sa_client_put(query->client);
+ if (query->client)
+ ib_sa_client_put(query->client);
query->release(query);
}
@@ -1897,6 +2258,102 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
ib_free_recv_mad(mad_recv_wc);
}
+static void update_sm_ah(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, update_task);
+ struct ib_sa_sm_ah *new_ah;
+ struct ib_port_attr port_attr;
+ struct rdma_ah_attr ah_attr;
+
+ if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
+ pr_warn("Couldn't query port\n");
+ return;
+ }
+
+ new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
+ if (!new_ah)
+ return;
+
+ kref_init(&new_ah->ref);
+ new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
+
+ new_ah->pkey_index = 0;
+ if (ib_find_pkey(port->agent->device, port->port_num,
+ IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
+ pr_err("Couldn't find index for default PKey\n");
+
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.type = rdma_ah_find_type(port->agent->device,
+ port->port_num);
+ rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
+ rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
+ rdma_ah_set_port_num(&ah_attr, port->port_num);
+ if (port_attr.grh_required) {
+ rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
+
+ rdma_ah_set_subnet_prefix(&ah_attr,
+ cpu_to_be64(port_attr.subnet_prefix));
+ rdma_ah_set_interface_id(&ah_attr,
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
+ }
+
+ new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
+ if (IS_ERR(new_ah->ah)) {
+ pr_warn("Couldn't create new SM AH\n");
+ kfree(new_ah);
+ return;
+ }
+
+ spin_lock_irq(&port->ah_lock);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = new_ah;
+ spin_unlock_irq(&port->ah_lock);
+}
+
+static void ib_sa_event(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER) {
+ unsigned long flags;
+ struct ib_sa_device *sa_dev =
+ container_of(handler, typeof(*sa_dev), event_handler);
+ u8 port_num = event->element.port_num - sa_dev->start_port;
+ struct ib_sa_port *port = &sa_dev->port[port_num];
+
+ if (!rdma_cap_ib_sa(handler->device, port->port_num))
+ return;
+
+ spin_lock_irqsave(&port->ah_lock, flags);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = NULL;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ if (event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PORT_ACTIVE) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ port->classport_info.valid = false;
+ port->classport_info.retry_cnt = 0;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ queue_delayed_work(ib_wq,
+ &port->ib_cpi_work, delay);
+ }
+ queue_work(ib_wq, &sa_dev->port[port_num].update_task);
+ }
+}
+
static void ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
@@ -1934,6 +2391,8 @@ static void ib_sa_add_one(struct ib_device *device)
goto err;
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
+ INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
+ update_ib_cpi);
count++;
}
@@ -1980,11 +2439,11 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
return;
ib_unregister_event_handler(&sa_dev->event_handler);
-
flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_cap_ib_sa(device, i + 1)) {
+ cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
ib_unregister_mad_agent(sa_dev->port[i].agent);
if (sa_dev->port[i].sm_ah)
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
new file mode 100644
index 000000000000..70ad19c4c73e
--- /dev/null
+++ b/drivers/infiniband/core/security.c
@@ -0,0 +1,709 @@
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifdef CONFIG_SECURITY_INFINIBAND
+
+#include <linux/security.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include "core_priv.h"
+#include "mad_priv.h"
+
+static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp)
+{
+ struct pkey_index_qp_list *pkey = NULL;
+ struct pkey_index_qp_list *tmp_pkey;
+ struct ib_device *dev = pp->sec->dev;
+
+ spin_lock(&dev->port_pkey_list[pp->port_num].list_lock);
+ list_for_each_entry(tmp_pkey,
+ &dev->port_pkey_list[pp->port_num].pkey_list,
+ pkey_index_list) {
+ if (tmp_pkey->pkey_index == pp->pkey_index) {
+ pkey = tmp_pkey;
+ break;
+ }
+ }
+ spin_unlock(&dev->port_pkey_list[pp->port_num].list_lock);
+ return pkey;
+}
+
+static int get_pkey_and_subnet_prefix(struct ib_port_pkey *pp,
+ u16 *pkey,
+ u64 *subnet_prefix)
+{
+ struct ib_device *dev = pp->sec->dev;
+ int ret;
+
+ ret = ib_get_cached_pkey(dev, pp->port_num, pp->pkey_index, pkey);
+ if (ret)
+ return ret;
+
+ ret = ib_get_cached_subnet_prefix(dev, pp->port_num, subnet_prefix);
+
+ return ret;
+}
+
+static int enforce_qp_pkey_security(u16 pkey,
+ u64 subnet_prefix,
+ struct ib_qp_security *qp_sec)
+{
+ struct ib_qp_security *shared_qp_sec;
+ int ret;
+
+ ret = security_ib_pkey_access(qp_sec->security, subnet_prefix, pkey);
+ if (ret)
+ return ret;
+
+ if (qp_sec->qp == qp_sec->qp->real_qp) {
+ list_for_each_entry(shared_qp_sec,
+ &qp_sec->shared_qp_list,
+ shared_qp_list) {
+ ret = security_ib_pkey_access(shared_qp_sec->security,
+ subnet_prefix,
+ pkey);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/* The caller of this function must hold the QP security
+ * mutex of the QP of the security structure in *pps.
+ *
+ * It takes separate ports_pkeys and security structure
+ * because in some cases the pps will be for a new settings
+ * or the pps will be for the real QP and security structure
+ * will be for a shared QP.
+ */
+static int check_qp_port_pkey_settings(struct ib_ports_pkeys *pps,
+ struct ib_qp_security *sec)
+{
+ u64 subnet_prefix;
+ u16 pkey;
+ int ret = 0;
+
+ if (!pps)
+ return 0;
+
+ if (pps->main.state != IB_PORT_PKEY_NOT_VALID) {
+ ret = get_pkey_and_subnet_prefix(&pps->main,
+ &pkey,
+ &subnet_prefix);
+ if (ret)
+ return ret;
+
+ ret = enforce_qp_pkey_security(pkey,
+ subnet_prefix,
+ sec);
+ if (ret)
+ return ret;
+ }
+
+ if (pps->alt.state != IB_PORT_PKEY_NOT_VALID) {
+ ret = get_pkey_and_subnet_prefix(&pps->alt,
+ &pkey,
+ &subnet_prefix);
+ if (ret)
+ return ret;
+
+ ret = enforce_qp_pkey_security(pkey,
+ subnet_prefix,
+ sec);
+ }
+
+ return ret;
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static void qp_to_error(struct ib_qp_security *sec)
+{
+ struct ib_qp_security *shared_qp_sec;
+ struct ib_qp_attr attr = {
+ .qp_state = IB_QPS_ERR
+ };
+ struct ib_event event = {
+ .event = IB_EVENT_QP_FATAL
+ };
+
+ /* If the QP is in the process of being destroyed
+ * the qp pointer in the security structure is
+ * undefined. It cannot be modified now.
+ */
+ if (sec->destroying)
+ return;
+
+ ib_modify_qp(sec->qp,
+ &attr,
+ IB_QP_STATE);
+
+ if (sec->qp->event_handler && sec->qp->qp_context) {
+ event.element.qp = sec->qp;
+ sec->qp->event_handler(&event,
+ sec->qp->qp_context);
+ }
+
+ list_for_each_entry(shared_qp_sec,
+ &sec->shared_qp_list,
+ shared_qp_list) {
+ struct ib_qp *qp = shared_qp_sec->qp;
+
+ if (qp->event_handler && qp->qp_context) {
+ event.element.qp = qp;
+ event.device = qp->device;
+ qp->event_handler(&event,
+ qp->qp_context);
+ }
+ }
+}
+
+static inline void check_pkey_qps(struct pkey_index_qp_list *pkey,
+ struct ib_device *device,
+ u8 port_num,
+ u64 subnet_prefix)
+{
+ struct ib_port_pkey *pp, *tmp_pp;
+ bool comp;
+ LIST_HEAD(to_error_list);
+ u16 pkey_val;
+
+ if (!ib_get_cached_pkey(device,
+ port_num,
+ pkey->pkey_index,
+ &pkey_val)) {
+ spin_lock(&pkey->qp_list_lock);
+ list_for_each_entry(pp, &pkey->qp_list, qp_list) {
+ if (atomic_read(&pp->sec->error_list_count))
+ continue;
+
+ if (enforce_qp_pkey_security(pkey_val,
+ subnet_prefix,
+ pp->sec)) {
+ atomic_inc(&pp->sec->error_list_count);
+ list_add(&pp->to_error_list,
+ &to_error_list);
+ }
+ }
+ spin_unlock(&pkey->qp_list_lock);
+ }
+
+ list_for_each_entry_safe(pp,
+ tmp_pp,
+ &to_error_list,
+ to_error_list) {
+ mutex_lock(&pp->sec->mutex);
+ qp_to_error(pp->sec);
+ list_del(&pp->to_error_list);
+ atomic_dec(&pp->sec->error_list_count);
+ comp = pp->sec->destroying;
+ mutex_unlock(&pp->sec->mutex);
+
+ if (comp)
+ complete(&pp->sec->error_complete);
+ }
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static int port_pkey_list_insert(struct ib_port_pkey *pp)
+{
+ struct pkey_index_qp_list *tmp_pkey;
+ struct pkey_index_qp_list *pkey;
+ struct ib_device *dev;
+ u8 port_num = pp->port_num;
+ int ret = 0;
+
+ if (pp->state != IB_PORT_PKEY_VALID)
+ return 0;
+
+ dev = pp->sec->dev;
+
+ pkey = get_pkey_idx_qp_list(pp);
+
+ if (!pkey) {
+ bool found = false;
+
+ pkey = kzalloc(sizeof(*pkey), GFP_KERNEL);
+ if (!pkey)
+ return -ENOMEM;
+
+ spin_lock(&dev->port_pkey_list[port_num].list_lock);
+ /* Check for the PKey again. A racing process may
+ * have created it.
+ */
+ list_for_each_entry(tmp_pkey,
+ &dev->port_pkey_list[port_num].pkey_list,
+ pkey_index_list) {
+ if (tmp_pkey->pkey_index == pp->pkey_index) {
+ kfree(pkey);
+ pkey = tmp_pkey;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pkey->pkey_index = pp->pkey_index;
+ spin_lock_init(&pkey->qp_list_lock);
+ INIT_LIST_HEAD(&pkey->qp_list);
+ list_add(&pkey->pkey_index_list,
+ &dev->port_pkey_list[port_num].pkey_list);
+ }
+ spin_unlock(&dev->port_pkey_list[port_num].list_lock);
+ }
+
+ spin_lock(&pkey->qp_list_lock);
+ list_add(&pp->qp_list, &pkey->qp_list);
+ spin_unlock(&pkey->qp_list_lock);
+
+ pp->state = IB_PORT_PKEY_LISTED;
+
+ return ret;
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static void port_pkey_list_remove(struct ib_port_pkey *pp)
+{
+ struct pkey_index_qp_list *pkey;
+
+ if (pp->state != IB_PORT_PKEY_LISTED)
+ return;
+
+ pkey = get_pkey_idx_qp_list(pp);
+
+ spin_lock(&pkey->qp_list_lock);
+ list_del(&pp->qp_list);
+ spin_unlock(&pkey->qp_list_lock);
+
+ /* The setting may still be valid, i.e. after
+ * a destroy has failed for example.
+ */
+ pp->state = IB_PORT_PKEY_VALID;
+}
+
+static void destroy_qp_security(struct ib_qp_security *sec)
+{
+ security_ib_free_security(sec->security);
+ kfree(sec->ports_pkeys);
+ kfree(sec);
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
+ const struct ib_qp_attr *qp_attr,
+ int qp_attr_mask)
+{
+ struct ib_ports_pkeys *new_pps;
+ struct ib_ports_pkeys *qp_pps = qp->qp_sec->ports_pkeys;
+
+ new_pps = kzalloc(sizeof(*new_pps), GFP_KERNEL);
+ if (!new_pps)
+ return NULL;
+
+ if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) {
+ if (!qp_pps) {
+ new_pps->main.port_num = qp_attr->port_num;
+ new_pps->main.pkey_index = qp_attr->pkey_index;
+ } else {
+ new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ?
+ qp_attr->port_num :
+ qp_pps->main.port_num;
+
+ new_pps->main.pkey_index =
+ (qp_attr_mask & IB_QP_PKEY_INDEX) ?
+ qp_attr->pkey_index :
+ qp_pps->main.pkey_index;
+ }
+ new_pps->main.state = IB_PORT_PKEY_VALID;
+ } else if (qp_pps) {
+ new_pps->main.port_num = qp_pps->main.port_num;
+ new_pps->main.pkey_index = qp_pps->main.pkey_index;
+ if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
+ new_pps->main.state = IB_PORT_PKEY_VALID;
+ }
+
+ if (qp_attr_mask & IB_QP_ALT_PATH) {
+ new_pps->alt.port_num = qp_attr->alt_port_num;
+ new_pps->alt.pkey_index = qp_attr->alt_pkey_index;
+ new_pps->alt.state = IB_PORT_PKEY_VALID;
+ } else if (qp_pps) {
+ new_pps->alt.port_num = qp_pps->alt.port_num;
+ new_pps->alt.pkey_index = qp_pps->alt.pkey_index;
+ if (qp_pps->alt.state != IB_PORT_PKEY_NOT_VALID)
+ new_pps->alt.state = IB_PORT_PKEY_VALID;
+ }
+
+ new_pps->main.sec = qp->qp_sec;
+ new_pps->alt.sec = qp->qp_sec;
+ return new_pps;
+}
+
+int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
+{
+ struct ib_qp *real_qp = qp->real_qp;
+ int ret;
+
+ ret = ib_create_qp_security(qp, dev);
+
+ if (ret)
+ return ret;
+
+ mutex_lock(&real_qp->qp_sec->mutex);
+ ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
+ qp->qp_sec);
+
+ if (ret)
+ goto ret;
+
+ if (qp != real_qp)
+ list_add(&qp->qp_sec->shared_qp_list,
+ &real_qp->qp_sec->shared_qp_list);
+ret:
+ mutex_unlock(&real_qp->qp_sec->mutex);
+ if (ret)
+ destroy_qp_security(qp->qp_sec);
+
+ return ret;
+}
+
+void ib_close_shared_qp_security(struct ib_qp_security *sec)
+{
+ struct ib_qp *real_qp = sec->qp->real_qp;
+
+ mutex_lock(&real_qp->qp_sec->mutex);
+ list_del(&sec->shared_qp_list);
+ mutex_unlock(&real_qp->qp_sec->mutex);
+
+ destroy_qp_security(sec);
+}
+
+int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
+{
+ int ret;
+
+ qp->qp_sec = kzalloc(sizeof(*qp->qp_sec), GFP_KERNEL);
+ if (!qp->qp_sec)
+ return -ENOMEM;
+
+ qp->qp_sec->qp = qp;
+ qp->qp_sec->dev = dev;
+ mutex_init(&qp->qp_sec->mutex);
+ INIT_LIST_HEAD(&qp->qp_sec->shared_qp_list);
+ atomic_set(&qp->qp_sec->error_list_count, 0);
+ init_completion(&qp->qp_sec->error_complete);
+ ret = security_ib_alloc_security(&qp->qp_sec->security);
+ if (ret)
+ kfree(qp->qp_sec);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_create_qp_security);
+
+void ib_destroy_qp_security_begin(struct ib_qp_security *sec)
+{
+ mutex_lock(&sec->mutex);
+
+ /* Remove the QP from the lists so it won't get added to
+ * a to_error_list during the destroy process.
+ */
+ if (sec->ports_pkeys) {
+ port_pkey_list_remove(&sec->ports_pkeys->main);
+ port_pkey_list_remove(&sec->ports_pkeys->alt);
+ }
+
+ /* If the QP is already in one or more of those lists
+ * the destroying flag will ensure the to error flow
+ * doesn't operate on an undefined QP.
+ */
+ sec->destroying = true;
+
+ /* Record the error list count to know how many completions
+ * to wait for.
+ */
+ sec->error_comps_pending = atomic_read(&sec->error_list_count);
+
+ mutex_unlock(&sec->mutex);
+}
+
+void ib_destroy_qp_security_abort(struct ib_qp_security *sec)
+{
+ int ret;
+ int i;
+
+ /* If a concurrent cache update is in progress this
+ * QP security could be marked for an error state
+ * transition. Wait for this to complete.
+ */
+ for (i = 0; i < sec->error_comps_pending; i++)
+ wait_for_completion(&sec->error_complete);
+
+ mutex_lock(&sec->mutex);
+ sec->destroying = false;
+
+ /* Restore the position in the lists and verify
+ * access is still allowed in case a cache update
+ * occurred while attempting to destroy.
+ *
+ * Because these setting were listed already
+ * and removed during ib_destroy_qp_security_begin
+ * we know the pkey_index_qp_list for the PKey
+ * already exists so port_pkey_list_insert won't fail.
+ */
+ if (sec->ports_pkeys) {
+ port_pkey_list_insert(&sec->ports_pkeys->main);
+ port_pkey_list_insert(&sec->ports_pkeys->alt);
+ }
+
+ ret = check_qp_port_pkey_settings(sec->ports_pkeys, sec);
+ if (ret)
+ qp_to_error(sec);
+
+ mutex_unlock(&sec->mutex);
+}
+
+void ib_destroy_qp_security_end(struct ib_qp_security *sec)
+{
+ int i;
+
+ /* If a concurrent cache update is occurring we must
+ * wait until this QP security structure is processed
+ * in the QP to error flow before destroying it because
+ * the to_error_list is in use.
+ */
+ for (i = 0; i < sec->error_comps_pending; i++)
+ wait_for_completion(&sec->error_complete);
+
+ destroy_qp_security(sec);
+}
+
+void ib_security_cache_change(struct ib_device *device,
+ u8 port_num,
+ u64 subnet_prefix)
+{
+ struct pkey_index_qp_list *pkey;
+
+ list_for_each_entry(pkey,
+ &device->port_pkey_list[port_num].pkey_list,
+ pkey_index_list) {
+ check_pkey_qps(pkey,
+ device,
+ port_num,
+ subnet_prefix);
+ }
+}
+
+void ib_security_destroy_port_pkey_list(struct ib_device *device)
+{
+ struct pkey_index_qp_list *pkey, *tmp_pkey;
+ int i;
+
+ for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+ spin_lock(&device->port_pkey_list[i].list_lock);
+ list_for_each_entry_safe(pkey,
+ tmp_pkey,
+ &device->port_pkey_list[i].pkey_list,
+ pkey_index_list) {
+ list_del(&pkey->pkey_index_list);
+ kfree(pkey);
+ }
+ spin_unlock(&device->port_pkey_list[i].list_lock);
+ }
+}
+
+int ib_security_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata)
+{
+ int ret = 0;
+ struct ib_ports_pkeys *tmp_pps;
+ struct ib_ports_pkeys *new_pps;
+ bool special_qp = (qp->qp_type == IB_QPT_SMI ||
+ qp->qp_type == IB_QPT_GSI ||
+ qp->qp_type >= IB_QPT_RESERVED1);
+ bool pps_change = ((qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) ||
+ (qp_attr_mask & IB_QP_ALT_PATH));
+
+ if (pps_change && !special_qp) {
+ mutex_lock(&qp->qp_sec->mutex);
+ new_pps = get_new_pps(qp,
+ qp_attr,
+ qp_attr_mask);
+
+ /* Add this QP to the lists for the new port
+ * and pkey settings before checking for permission
+ * in case there is a concurrent cache update
+ * occurring. Walking the list for a cache change
+ * doesn't acquire the security mutex unless it's
+ * sending the QP to error.
+ */
+ ret = port_pkey_list_insert(&new_pps->main);
+
+ if (!ret)
+ ret = port_pkey_list_insert(&new_pps->alt);
+
+ if (!ret)
+ ret = check_qp_port_pkey_settings(new_pps,
+ qp->qp_sec);
+ }
+
+ if (!ret)
+ ret = qp->device->modify_qp(qp->real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
+
+ if (pps_change && !special_qp) {
+ /* Clean up the lists and free the appropriate
+ * ports_pkeys structure.
+ */
+ if (ret) {
+ tmp_pps = new_pps;
+ } else {
+ tmp_pps = qp->qp_sec->ports_pkeys;
+ qp->qp_sec->ports_pkeys = new_pps;
+ }
+
+ if (tmp_pps) {
+ port_pkey_list_remove(&tmp_pps->main);
+ port_pkey_list_remove(&tmp_pps->alt);
+ }
+ kfree(tmp_pps);
+ mutex_unlock(&qp->qp_sec->mutex);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(ib_security_modify_qp);
+
+int ib_security_pkey_access(struct ib_device *dev,
+ u8 port_num,
+ u16 pkey_index,
+ void *sec)
+{
+ u64 subnet_prefix;
+ u16 pkey;
+ int ret;
+
+ ret = ib_get_cached_pkey(dev, port_num, pkey_index, &pkey);
+ if (ret)
+ return ret;
+
+ ret = ib_get_cached_subnet_prefix(dev, port_num, &subnet_prefix);
+
+ if (ret)
+ return ret;
+
+ return security_ib_pkey_access(sec, subnet_prefix, pkey);
+}
+EXPORT_SYMBOL(ib_security_pkey_access);
+
+static int ib_mad_agent_security_change(struct notifier_block *nb,
+ unsigned long event,
+ void *data)
+{
+ struct ib_mad_agent *ag = container_of(nb, struct ib_mad_agent, lsm_nb);
+
+ if (event != LSM_POLICY_CHANGE)
+ return NOTIFY_DONE;
+
+ ag->smp_allowed = !security_ib_endport_manage_subnet(ag->security,
+ ag->device->name,
+ ag->port_num);
+
+ return NOTIFY_OK;
+}
+
+int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
+ enum ib_qp_type qp_type)
+{
+ int ret;
+
+ ret = security_ib_alloc_security(&agent->security);
+ if (ret)
+ return ret;
+
+ if (qp_type != IB_QPT_SMI)
+ return 0;
+
+ ret = security_ib_endport_manage_subnet(agent->security,
+ agent->device->name,
+ agent->port_num);
+ if (ret)
+ return ret;
+
+ agent->lsm_nb.notifier_call = ib_mad_agent_security_change;
+ ret = register_lsm_notifier(&agent->lsm_nb);
+ if (ret)
+ return ret;
+
+ agent->smp_allowed = true;
+ agent->lsm_nb_reg = true;
+ return 0;
+}
+
+void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
+{
+ security_ib_free_security(agent->security);
+ if (agent->lsm_nb_reg)
+ unregister_lsm_notifier(&agent->lsm_nb);
+}
+
+int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
+{
+ int ret;
+
+ if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed)
+ return -EACCES;
+
+ ret = ib_security_pkey_access(map->agent.device,
+ map->agent.port_num,
+ pkey_index,
+ map->agent.security);
+
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+#endif /* CONFIG_SECURITY_INFINIBAND */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c1fb545e8d78..7ebe1ef23652 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -253,6 +253,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
speed = " EDR";
rate = 250;
break;
+ case IB_SPEED_HDR:
+ speed = " HDR";
+ rate = 500;
+ break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
rate = 25;
@@ -1258,7 +1262,7 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
- device->dev.parent = device->dma_device;
+ WARN_ON_ONCE(!device->dev.parent);
ret = dev_set_name(class_dev, "%s", device->name);
if (ret)
return ret;
@@ -1301,7 +1305,7 @@ err_put:
free_port_list_attributes(device);
err_unregister:
- device_unregister(class_dev);
+ device_del(class_dev);
err:
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index e0a995b85a2d..112099c86a19 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -702,10 +702,10 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
return 0;
}
-static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
+static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
{
struct ib_user_path_rec upath;
- struct ib_sa_path_rec *sa_path;
+ struct sa_path_rec *sa_path;
*path = NULL;
@@ -962,7 +962,7 @@ static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
int in_len, int out_len)
{
struct ib_ucm_context *ctx;
- struct ib_sa_path_rec *path = NULL;
+ struct sa_path_rec *path = NULL;
struct ib_ucm_lap cmd;
const void *data = NULL;
int result;
@@ -1205,12 +1205,15 @@ static void ib_ucm_release_dev(struct device *dev)
struct ib_ucm_device *ucm_dev;
ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- cdev_del(&ucm_dev->cdev);
+ kfree(ucm_dev);
+}
+
+static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
+{
if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
clear_bit(ucm_dev->devnum, dev_map);
else
clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
- kfree(ucm_dev);
}
static const struct file_operations ucm_fops = {
@@ -1266,7 +1269,9 @@ static void ib_ucm_add_one(struct ib_device *device)
if (!ucm_dev)
return;
+ device_initialize(&ucm_dev->dev);
ucm_dev->ib_dev = device;
+ ucm_dev->dev.release = ib_ucm_release_dev;
devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
if (devnum >= IB_UCM_MAX_DEVICES) {
@@ -1286,16 +1291,14 @@ static void ib_ucm_add_one(struct ib_device *device)
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
- if (cdev_add(&ucm_dev->cdev, base, 1))
- goto err;
ucm_dev->dev.class = &cm_class;
- ucm_dev->dev.parent = device->dma_device;
- ucm_dev->dev.devt = ucm_dev->cdev.dev;
- ucm_dev->dev.release = ib_ucm_release_dev;
+ ucm_dev->dev.parent = device->dev.parent;
+ ucm_dev->dev.devt = base;
+
dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
- if (device_register(&ucm_dev->dev))
- goto err_cdev;
+ if (cdev_device_add(&ucm_dev->cdev, &ucm_dev->dev))
+ goto err_devnum;
if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev))
goto err_dev;
@@ -1304,15 +1307,11 @@ static void ib_ucm_add_one(struct ib_device *device)
return;
err_dev:
- device_unregister(&ucm_dev->dev);
-err_cdev:
- cdev_del(&ucm_dev->cdev);
- if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
+err_devnum:
+ ib_ucm_free_dev(ucm_dev);
err:
- kfree(ucm_dev);
+ put_device(&ucm_dev->dev);
return;
}
@@ -1323,7 +1322,9 @@ static void ib_ucm_remove_one(struct ib_device *device, void *client_data)
if (!ucm_dev)
return;
- device_unregister(&ucm_dev->dev);
+ cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
+ ib_ucm_free_dev(ucm_dev);
+ put_device(&ucm_dev->dev);
}
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index e12f8faf8c23..276f0ef835bd 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -898,11 +898,18 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
for (i = 0, out_len -= sizeof(*resp);
i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
i++, out_len -= sizeof(struct ib_path_rec_data)) {
+ struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i];
resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL;
- ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
- &resp->path_data[i].path_rec);
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB) {
+ ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
+ } else {
+ struct sa_path_rec ib;
+
+ sa_convert_path_opa_to_ib(&ib, rec);
+ ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
+ }
}
if (copy_to_user(response, resp,
@@ -1197,7 +1204,7 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
static int ucma_set_ib_path(struct ucma_context *ctx,
struct ib_path_rec_data *path_data, size_t optlen)
{
- struct ib_sa_path_rec sa_path;
+ struct sa_path_rec sa_path;
struct rdma_cm_event event;
int ret;
@@ -1215,8 +1222,17 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
memset(&sa_path, 0, sizeof(sa_path));
+ sa_path.rec_type = SA_PATH_REC_TYPE_IB;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
- ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+
+ if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) {
+ struct sa_path_rec opa;
+
+ sa_convert_path_ib_to_opa(&opa, &sa_path);
+ ret = rdma_set_ib_paths(ctx->cm_id, &opa, 1);
+ } else {
+ ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ }
if (ret)
return ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 4609b921f899..21e60b1e2ff4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -34,7 +34,8 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/export.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
@@ -57,7 +58,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
page = sg_page(sg);
- if (umem->writable && dirty)
+ if (!PageDirty(page) && umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
}
@@ -99,9 +100,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync)
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
- if (!size)
- return ERR_PTR(-EINVAL);
-
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
@@ -117,11 +115,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->context = context;
- umem->length = size;
- umem->address = addr;
- umem->page_size = PAGE_SIZE;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any of the following
* access flags are set. "Local write" and "remote write"
@@ -135,7 +133,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (access & IB_ACCESS_ON_DEMAND) {
put_pid(umem->pid);
- ret = ib_umem_odp_get(context, umem);
+ ret = ib_umem_odp_get(context, umem, access);
if (ret) {
kfree(umem);
return ERR_PTR(ret);
@@ -317,7 +315,6 @@ EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
- int shift;
int i;
int n;
struct scatterlist *sg;
@@ -325,11 +322,9 @@ int ib_umem_page_count(struct ib_umem *umem)
if (umem->odp_data)
return ib_umem_num_pages(umem);
- shift = ilog2(umem->page_size);
-
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> shift;
+ n += sg_dma_len(sg) >> umem->page_shift;
return n;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6b079a31dced..8c4ec564e495 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -32,10 +32,13 @@
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/pid.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -239,7 +242,73 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr,
+ size_t size)
+{
+ struct ib_umem *umem;
+ struct ib_umem_odp *odp_data;
+ int pages = size >> PAGE_SHIFT;
+ int ret;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->writable = 1;
+
+ odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+ if (!odp_data) {
+ ret = -ENOMEM;
+ goto out_umem;
+ }
+ odp_data->umem = umem;
+
+ mutex_init(&odp_data->umem_mutex);
+ init_completion(&odp_data->notifier_completion);
+
+ odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
+ if (!odp_data->page_list) {
+ ret = -ENOMEM;
+ goto out_odp_data;
+ }
+
+ odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
+ if (!odp_data->dma_list) {
+ ret = -ENOMEM;
+ goto out_page_list;
+ }
+
+ down_write(&context->umem_rwsem);
+ context->odp_mrs_count++;
+ rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
+ if (likely(!atomic_read(&context->notifier_count)))
+ odp_data->mn_counters_active = true;
+ else
+ list_add(&odp_data->no_private_counters,
+ &context->no_private_counters);
+ up_write(&context->umem_rwsem);
+
+ umem->odp_data = odp_data;
+
+ return umem;
+
+out_page_list:
+ vfree(odp_data->page_list);
+out_odp_data:
+ kfree(odp_data);
+out_umem:
+ kfree(umem);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_odp_umem);
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
+ int access)
{
int ret_val;
struct pid *our_pid;
@@ -248,6 +317,24 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
if (!mm)
return -EINVAL;
+ if (access & IB_ACCESS_HUGETLB) {
+ struct vm_area_struct *vma;
+ struct hstate *h;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, ib_umem_start(umem));
+ if (!vma || !is_vm_hugetlb_page(vma)) {
+ up_read(&mm->mmap_sem);
+ return -EINVAL;
+ }
+ h = hstate_vma(vma);
+ umem->page_shift = huge_page_shift(h);
+ up_read(&mm->mmap_sem);
+ umem->hugetlb = 1;
+ } else {
+ umem->hugetlb = 0;
+ }
+
/* Prevent creating ODP MRs in child processes */
rcu_read_lock();
our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
@@ -258,7 +345,6 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
goto out_mm;
}
- umem->hugetlb = 0;
umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
if (!umem->odp_data) {
ret_val = -ENOMEM;
@@ -270,18 +356,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
init_completion(&umem->odp_data->notifier_completion);
- umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+ if (ib_umem_num_pages(umem)) {
+ umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->page_list));
- if (!umem->odp_data->page_list) {
- ret_val = -ENOMEM;
- goto out_odp_data;
- }
+ if (!umem->odp_data->page_list) {
+ ret_val = -ENOMEM;
+ goto out_odp_data;
+ }
- umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+ umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->dma_list));
- if (!umem->odp_data->dma_list) {
- ret_val = -ENOMEM;
- goto out_page_list;
+ if (!umem->odp_data->dma_list) {
+ ret_val = -ENOMEM;
+ goto out_page_list;
+ }
}
/*
@@ -435,7 +523,6 @@ out:
static int ib_umem_odp_map_dma_single_page(
struct ib_umem *umem,
int page_index,
- u64 base_virt_addr,
struct page *page,
u64 access_mask,
unsigned long current_seq)
@@ -458,7 +545,7 @@ static int ib_umem_odp_map_dma_single_page(
if (!(umem->odp_data->dma_list[page_index])) {
dma_addr = ib_dma_map_page(dev,
page,
- 0, PAGE_SIZE,
+ 0, BIT(umem->page_shift),
DMA_BIDIRECTIONAL);
if (ib_dma_mapping_error(dev, dma_addr)) {
ret = -EFAULT;
@@ -466,6 +553,7 @@ static int ib_umem_odp_map_dma_single_page(
}
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
umem->odp_data->page_list[page_index] = page;
+ umem->npages++;
stored_page = 1;
} else if (umem->odp_data->page_list[page_index] == page) {
umem->odp_data->dma_list[page_index] |= access_mask;
@@ -485,8 +573,9 @@ out:
if (remove_existing_mapping && umem->context->invalidate_range) {
invalidate_page_trampoline(
umem,
- base_virt_addr + (page_index * PAGE_SIZE),
- base_virt_addr + ((page_index+1)*PAGE_SIZE),
+ ib_umem_start(umem) + (page_index >> umem->page_shift),
+ ib_umem_start(umem) + ((page_index + 1) >>
+ umem->page_shift),
NULL);
ret = -EAGAIN;
}
@@ -505,7 +594,8 @@ out:
* for failure.
* An -EAGAIN error code is returned when a concurrent mmu notifier prevents
* the function from completing its task.
- *
+ * An -ENOENT error code indicates that userspace process is being terminated
+ * and mm was already destroyed.
* @umem: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -524,10 +614,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
struct page **local_page_list = NULL;
- u64 off;
- int j, k, ret = 0, start_idx, npages = 0;
- u64 base_virt_addr;
+ u64 page_mask, off;
+ int j, k, ret = 0, start_idx, npages = 0, page_shift;
unsigned int flags = 0;
+ phys_addr_t p = 0;
if (access_mask == 0)
return -EINVAL;
@@ -540,9 +630,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
if (!local_page_list)
return -ENOMEM;
- off = user_virt & (~PAGE_MASK);
- user_virt = user_virt & PAGE_MASK;
- base_virt_addr = user_virt;
+ page_shift = umem->page_shift;
+ page_mask = ~(BIT(page_shift) - 1);
+ off = user_virt & (~page_mask);
+ user_virt = user_virt & page_mask;
bcnt += off; /* Charge for the first page offset as well. */
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
@@ -553,20 +644,20 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL) {
- ret = -EINVAL;
+ ret = -ENOENT;
goto out_put_task;
}
if (access_mask & ODP_WRITE_ALLOWED_BIT)
flags |= FOLL_WRITE;
- start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
+ start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
k = start_idx;
while (bcnt > 0) {
- const size_t gup_num_pages =
- min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
- PAGE_SIZE / sizeof(struct page *));
+ const size_t gup_num_pages = min_t(size_t,
+ (bcnt + BIT(page_shift) - 1) >> page_shift,
+ PAGE_SIZE / sizeof(struct page *));
down_read(&owning_mm->mmap_sem);
/*
@@ -585,14 +676,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
break;
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- user_virt += npages << PAGE_SHIFT;
mutex_lock(&umem->odp_data->umem_mutex);
- for (j = 0; j < npages; ++j) {
+ for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
+ if (user_virt & ~page_mask) {
+ p += PAGE_SIZE;
+ if (page_to_phys(local_page_list[j]) != p) {
+ ret = -EFAULT;
+ break;
+ }
+ put_page(local_page_list[j]);
+ continue;
+ }
+
ret = ib_umem_odp_map_dma_single_page(
- umem, k, base_virt_addr, local_page_list[j],
- access_mask, current_seq);
+ umem, k, local_page_list[j],
+ access_mask, current_seq);
if (ret < 0)
break;
+
+ p = page_to_phys(local_page_list[j]);
k++;
}
mutex_unlock(&umem->odp_data->umem_mutex);
@@ -636,8 +738,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
* invalidations, so we must make sure we free each page only
* once. */
mutex_lock(&umem->odp_data->umem_mutex);
- for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
+ idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
if (umem->odp_data->page_list[idx]) {
struct page *page = umem->odp_data->page_list[idx];
dma_addr_t dma = umem->odp_data->dma_list[idx];
@@ -665,6 +767,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
put_page(page);
umem->odp_data->page_list[idx] = NULL;
umem->odp_data->dma_list[idx] = 0;
+ umem->npages--;
}
}
mutex_unlock(&umem->odp_data->umem_mutex);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
index 727d788448f5..d176597b4d78 100644
--- a/drivers/infiniband/core/umem_rbtree.c
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
void *cookie)
{
int ret_val = 0;
- struct umem_odp_node *node;
+ struct umem_odp_node *node, *next;
struct ib_umem_odp *umem;
if (unlikely(start == last))
return ret_val;
- for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
- node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+ for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+ node; node = next) {
+ next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
}
return ret_val;
}
+EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
+
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+ u64 addr, u64 length)
+{
+ struct umem_odp_node *node;
+
+ node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
+ if (node)
+ return container_of(node, struct ib_umem_odp, interval_tree);
+ return NULL;
+
+}
+EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 249b403b43a4..36a6f5c8914c 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -197,7 +197,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_umad_packet *packet = send_wc->send_buf->context[0];
dequeue_send(file, packet);
- ib_destroy_ah(packet->msg->ah);
+ rdma_destroy_ah(packet->msg->ah);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
@@ -235,17 +235,19 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
if (packet->mad.hdr.grh_present) {
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
+ const struct ib_global_route *grh;
ib_init_ah_from_wc(agent->device, agent->port_num,
mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
&ah_attr);
- packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
- packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
- packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
- memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
- packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
+ grh = rdma_ah_read_grh(&ah_attr);
+ packet->mad.hdr.gid_index = grh->sgid_index;
+ packet->mad.hdr.hop_limit = grh->hop_limit;
+ packet->mad.hdr.traffic_class = grh->traffic_class;
+ memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
+ packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
}
if (queue_packet(file, agent, packet))
@@ -449,7 +451,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
struct ib_umad_file *file = filp->private_data;
struct ib_umad_packet *packet;
struct ib_mad_agent *agent;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
struct ib_rmpp_mad *rmpp_mad;
__be64 *tid;
@@ -489,20 +491,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
}
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid);
- ah_attr.sl = packet->mad.hdr.sl;
- ah_attr.src_path_bits = packet->mad.hdr.path_bits;
- ah_attr.port_num = file->port->port_num;
+ ah_attr.type = rdma_ah_find_type(file->port->ib_dev,
+ file->port->port_num);
+ rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid));
+ rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl);
+ rdma_ah_set_path_bits(&ah_attr, packet->mad.hdr.path_bits);
+ rdma_ah_set_port_num(&ah_attr, file->port->port_num);
if (packet->mad.hdr.grh_present) {
- ah_attr.ah_flags = IB_AH_GRH;
- memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
- ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
- ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
- ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
- ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
+ rdma_ah_set_grh(&ah_attr, NULL,
+ be32_to_cpu(packet->mad.hdr.flow_label),
+ packet->mad.hdr.gid_index,
+ packet->mad.hdr.hop_limit,
+ packet->mad.hdr.traffic_class);
+ rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid);
}
- ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ ah = rdma_create_ah(agent->qp->pd, &ah_attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_up;
@@ -596,7 +600,7 @@ err_send:
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err_up:
mutex_unlock(&file->mutex);
err:
@@ -1183,12 +1187,12 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
- port->cdev.kobj.parent = &umad_dev->kobj;
+ cdev_set_parent(&port->cdev, &umad_dev->kobj);
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
- port->dev = device_create(umad_class, device->dma_device,
+ port->dev = device_create(umad_class, device->dev.parent,
port->cdev.dev, port,
"umad%d", port->dev_num);
if (IS_ERR(port->dev))
@@ -1202,12 +1206,12 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
- port->sm_cdev.kobj.parent = &umad_dev->kobj;
+ cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
- port->sm_dev = device_create(umad_class, device->dma_device,
+ port->sm_dev = device_create(umad_class, device->dev.parent,
port->sm_cdev.dev, port,
"issm%d", port->dev_num);
if (IS_ERR(port->sm_dev))
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 455034ac994e..64d494a64daf 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -76,12 +76,13 @@
* an asynchronous event queue file is created and released when the
* event file is closed.
*
- * struct ib_uverbs_event_file: One reference is held by the VFS and
- * released when the file is closed. For asynchronous event files,
- * another reference is held by the corresponding main context file
- * and released when that file is closed. For completion event files,
- * a reference is taken when a CQ is created that uses the file, and
- * released when the CQ is destroyed.
+ * struct ib_uverbs_event_queue: Base structure for
+ * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file.
+ * One reference is held by the VFS and released when the file is closed.
+ * For asynchronous event files, another reference is held by the corresponding
+ * main context file and released when that file is closed. For completion
+ * event files, a reference is taken when a CQ is created that uses the file,
+ * and released when the CQ is destroyed.
*/
struct ib_uverbs_device {
@@ -101,18 +102,26 @@ struct ib_uverbs_device {
struct list_head uverbs_events_file_list;
};
-struct ib_uverbs_event_file {
- struct kref ref;
- int is_async;
- struct ib_uverbs_file *uverbs_file;
+struct ib_uverbs_event_queue {
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
+};
+
+struct ib_uverbs_async_event_file {
+ struct ib_uverbs_event_queue ev_queue;
+ struct ib_uverbs_file *uverbs_file;
+ struct kref ref;
struct list_head list;
};
+struct ib_uverbs_completion_event_file {
+ struct ib_uobject_file uobj_file;
+ struct ib_uverbs_event_queue ev_queue;
+};
+
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
@@ -120,9 +129,13 @@ struct ib_uverbs_file {
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
- struct ib_uverbs_event_file *async_file;
+ struct ib_uverbs_async_event_file *async_file;
struct list_head list;
int is_closed;
+
+ struct idr idr;
+ /* spinlock protects write access to idr */
+ spinlock_t idr_lock;
};
struct ib_uverbs_event {
@@ -159,6 +172,8 @@ struct ib_usrq_object {
struct ib_uqp_object {
struct ib_uevent_object uevent;
+ /* lock for mcast list */
+ struct mutex mcast_lock;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
@@ -176,32 +191,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
-extern spinlock_t ib_uverbs_idr_lock;
-extern struct idr ib_uverbs_pd_idr;
-extern struct idr ib_uverbs_mr_idr;
-extern struct idr ib_uverbs_mw_idr;
-extern struct idr ib_uverbs_ah_idr;
-extern struct idr ib_uverbs_cq_idr;
-extern struct idr ib_uverbs_qp_idr;
-extern struct idr ib_uverbs_srq_idr;
-extern struct idr ib_uverbs_xrcd_idr;
-extern struct idr ib_uverbs_rule_idr;
-extern struct idr ib_uverbs_wq_idr;
-extern struct idr ib_uverbs_rwq_ind_tbl_idr;
-
-void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
-
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async);
+extern const struct file_operations uverbs_event_fops;
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
+void ib_uverbs_release_file(struct kref *ref);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
@@ -210,9 +211,12 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why);
int uverbs_dealloc_mw(struct ib_mw *mw);
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj);
struct ib_uverbs_flow_spec {
union {
@@ -228,6 +232,8 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
+ struct ib_uverbs_flow_spec_action_tag flow_tag;
+ struct ib_uverbs_flow_spec_action_drop drop;
};
};
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 700782203483..2c98533a0203 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -40,270 +40,29 @@
#include <linux/uaccess.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+
#include "uverbs.h"
#include "core_priv.h"
-struct uverbs_lock_class {
- struct lock_class_key key;
- char name[16];
-};
-
-static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
-static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
-static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
-static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
-static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
-static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
-static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
-static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
-static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
-static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
-
-/*
- * The ib_uobject locking scheme is as follows:
- *
- * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- * needs to be held during all idr write operations. When an object is
- * looked up, a reference must be taken on the object's kref before
- * dropping this lock. For read operations, the rcu_read_lock()
- * and rcu_write_lock() but similarly the kref reference is grabbed
- * before the rcu_read_unlock().
- *
- * - Each object also has an rwsem. This rwsem must be held for
- * reading while an operation that uses the object is performed.
- * For example, while registering an MR, the associated PD's
- * uobject.mutex must be held for reading. The rwsem must be held
- * for writing while initializing or destroying an object.
- *
- * - In addition, each object has a "live" flag. If this flag is not
- * set, then lookups of the object will fail even if it is found in
- * the idr. This handles a reader that blocks and does not acquire
- * the rwsem until after the object is destroyed. The destroy
- * operation will set the live flag to 0 and then drop the rwsem;
- * this will allow the reader to acquire the rwsem, see that the
- * live flag is 0, and then drop the rwsem and its reference to
- * object. The underlying storage will not be freed until the last
- * reference to the object is dropped.
- */
-
-static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
- struct ib_ucontext *context, struct uverbs_lock_class *c)
-{
- uobj->user_handle = user_handle;
- uobj->context = context;
- kref_init(&uobj->ref);
- init_rwsem(&uobj->mutex);
- lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
- uobj->live = 0;
-}
-
-static void release_uobj(struct kref *kref)
-{
- kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
-}
-
-static void put_uobj(struct ib_uobject *uobj)
-{
- kref_put(&uobj->ref, release_uobj);
-}
-
-static void put_uobj_read(struct ib_uobject *uobj)
-{
- up_read(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static void put_uobj_write(struct ib_uobject *uobj)
-{
- up_write(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
-{
- int ret;
-
- idr_preload(GFP_KERNEL);
- spin_lock(&ib_uverbs_idr_lock);
-
- ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
- if (ret >= 0)
- uobj->id = ret;
-
- spin_unlock(&ib_uverbs_idr_lock);
- idr_preload_end();
-
- return ret < 0 ? ret : 0;
-}
-
-void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
+static struct ib_uverbs_completion_event_file *
+ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
{
- spin_lock(&ib_uverbs_idr_lock);
- idr_remove(idr, uobj->id);
- spin_unlock(&ib_uverbs_idr_lock);
-}
+ struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+ fd, context);
+ struct ib_uobject_file *uobj_file;
-static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
+ if (IS_ERR(uobj))
+ return (void *)uobj;
- rcu_read_lock();
- uobj = idr_find(idr, id);
- if (uobj) {
- if (uobj->context == context)
- kref_get(&uobj->ref);
- else
- uobj = NULL;
- }
- rcu_read_unlock();
+ uverbs_uobject_get(uobj);
+ uobj_put_read(uobj);
- return uobj;
-}
-
-static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
- struct ib_ucontext *context, int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- if (nested)
- down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
- else
- down_read(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_read(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- down_write(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_write(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
- int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_read_uobj(idr, id, context, nested);
- return uobj ? uobj->object : NULL;
-}
-
-static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
-}
-
-static void put_pd_read(struct ib_pd *pd)
-{
- put_uobj_read(pd->uobject);
-}
-
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
-{
- return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
-}
-
-static void put_cq_read(struct ib_cq *cq)
-{
- put_uobj_read(cq->uobject);
-}
-
-static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
-}
-
-static void put_ah_read(struct ib_ah *ah)
-{
- put_uobj_read(ah->uobject);
-}
-
-static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
-}
-
-static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
-}
-
-static void put_wq_read(struct ib_wq *wq)
-{
- put_uobj_read(wq->uobject);
-}
-
-static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
- struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
-}
-
-static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
-{
- put_uobj_read(ind_table->uobject);
-}
-
-static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
- return uobj ? uobj->object : NULL;
-}
-
-static void put_qp_read(struct ib_qp *qp)
-{
- put_uobj_read(qp->uobject);
-}
-
-static void put_qp_write(struct ib_qp *qp)
-{
- put_uobj_write(qp->uobject);
-}
-
-static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
-}
-
-static void put_srq_read(struct ib_srq *srq)
-{
- put_uobj_read(srq->uobject);
-}
-
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
- struct ib_uobject **uobj)
-{
- *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
- return *uobj ? (*uobj)->object : NULL;
-}
-
-static void put_xrcd_read(struct ib_uobject *uobj)
-{
- put_uobj_read(uobj);
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ return container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
}
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -316,6 +75,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_udata udata;
struct ib_ucontext *ucontext;
struct file *filp;
+ struct ib_rdmacg_object cg_obj;
int ret;
if (out_len < sizeof resp)
@@ -335,24 +95,22 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
+ ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+ if (ret)
+ goto err;
+
ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
- goto err;
+ goto err_alloc;
}
ucontext->device = ib_dev;
- INIT_LIST_HEAD(&ucontext->pd_list);
- INIT_LIST_HEAD(&ucontext->mr_list);
- INIT_LIST_HEAD(&ucontext->mw_list);
- INIT_LIST_HEAD(&ucontext->cq_list);
- INIT_LIST_HEAD(&ucontext->qp_list);
- INIT_LIST_HEAD(&ucontext->srq_list);
- INIT_LIST_HEAD(&ucontext->ah_list);
- INIT_LIST_HEAD(&ucontext->wq_list);
- INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
- INIT_LIST_HEAD(&ucontext->xrcd_list);
- INIT_LIST_HEAD(&ucontext->rule_list);
+ ucontext->cg_obj = cg_obj;
+ /* ufile is required when some objects are released */
+ ucontext->ufile = file;
+ uverbs_initialize_ucontext(ucontext);
+
rcu_read_lock();
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
@@ -376,7 +134,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_free;
resp.async_fd = ret;
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
+ filp = ib_uverbs_alloc_async_event_file(file, ib_dev);
if (IS_ERR(filp)) {
ret = PTR_ERR(filp);
goto err_fd;
@@ -407,6 +165,9 @@ err_free:
put_pid(ucontext->tgid);
ib_dev->dealloc_ucontext(ucontext);
+err_alloc:
+ ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+
err:
mutex_unlock(&file->mutex);
return ret;
@@ -556,12 +317,9 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(pd), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
if (IS_ERR(pd)) {
@@ -575,10 +333,6 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
atomic_set(&pd->usecnt, 0);
uobj->object = pd;
- ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
@@ -588,24 +342,15 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
goto err_copy;
}
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->pd_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
-
-err_idr:
ib_dealloc_pd(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -616,43 +361,19 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
{
struct ib_uverbs_dealloc_pd cmd;
struct ib_uobject *uobj;
- struct ib_pd *pd;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- pd = uobj->object;
-
- if (atomic_read(&pd->usecnt)) {
- ret = -EBUSY;
- goto err_put;
- }
-
- ret = pd->device->dealloc_pd(uobj->object);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
- if (ret)
- goto err_put;
-
- uobj->live = 0;
- put_uobj_write(uobj);
+ uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+ ret = uobj_remove_commit(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
-
-err_put:
- put_uobj_write(uobj);
- return ret;
+ return ret ?: in_len;
}
struct xrcd_table_entry {
@@ -789,16 +510,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj) {
- ret = -ENOMEM;
+ obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd),
+ file->ucontext);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
goto err_tree_mutex_unlock;
}
- init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
-
- down_write(&obj->uobject.mutex);
-
if (!xrcd) {
xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
if (IS_ERR(xrcd)) {
@@ -816,10 +534,6 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.xrcd_handle = obj->uobject.id;
@@ -828,7 +542,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
/* create new inode/xrcd table entry */
ret = xrcd_table_insert(file->device, inode, xrcd);
if (ret)
- goto err_insert_xrcd;
+ goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
@@ -842,12 +556,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (f.file)
fdput(f);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
mutex_unlock(&file->device->xrcd_tree_mutex);
return in_len;
@@ -859,14 +568,11 @@ err_copy:
atomic_dec(&xrcd->usecnt);
}
-err_insert_xrcd:
- idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
-
-err_idr:
+err_dealloc_xrcd:
ib_dealloc_xrcd(xrcd);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
err_tree_mutex_unlock:
if (f.file)
@@ -884,75 +590,41 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
{
struct ib_uverbs_close_xrcd cmd;
struct ib_uobject *uobj;
- struct ib_xrcd *xrcd = NULL;
- struct inode *inode = NULL;
- struct ib_uxrcd_object *obj;
- int live;
int ret = 0;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- mutex_lock(&file->device->xrcd_tree_mutex);
- uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
- if (!uobj) {
- ret = -EINVAL;
- goto out;
- }
-
- xrcd = uobj->object;
- inode = xrcd->inode;
- obj = container_of(uobj, struct ib_uxrcd_object, uobject);
- if (atomic_read(&obj->refcnt)) {
- put_uobj_write(uobj);
- ret = -EBUSY;
- goto out;
- }
-
- if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
- ret = ib_dealloc_xrcd(uobj->object);
- if (!ret)
- uobj->live = 0;
+ uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj)) {
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+ return PTR_ERR(uobj);
}
- live = uobj->live;
- if (inode && ret)
- atomic_inc(&xrcd->usecnt);
-
- put_uobj_write(uobj);
-
- if (ret)
- goto out;
-
- if (inode && !live)
- xrcd_table_delete(file->device, inode);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- ret = in_len;
-
-out:
- mutex_unlock(&file->device->xrcd_tree_mutex);
- return ret;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
- struct ib_xrcd *xrcd)
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+ struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why)
{
struct inode *inode;
+ int ret;
inode = xrcd->inode;
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
- return;
+ return 0;
- ib_dealloc_xrcd(xrcd);
+ ret = ib_dealloc_xrcd(xrcd);
- if (inode)
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ atomic_inc(&xrcd->usecnt);
+ else if (inode)
xrcd_table_delete(dev, inode);
+
+ return ret;
}
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
@@ -985,14 +657,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(mr), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1020,9 +689,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
- if (ret)
- goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
@@ -1035,29 +701,20 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mr_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
+ uobj_put_obj_read(pd);
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
-err_unreg:
ib_dereg_mr(mr);
err_put:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1093,11 +750,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
- file->ucontext);
-
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
mr = uobj->object;
@@ -1108,7 +764,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -1141,11 +797,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
put_uobjs:
-
- put_uobj_write(mr->uobject);
+ uobj_put_write(uobj);
return ret;
}
@@ -1156,37 +811,20 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dereg_mr cmd;
- struct ib_mr *mr;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- mr = uobj->object;
+ ret = uobj_remove_commit(uobj);
- ret = ib_dereg_mr(mr);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return ret ?: in_len;
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
@@ -1208,14 +846,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(mw), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
- down_write(&uobj->mutex);
-
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1238,9 +873,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mw;
- ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
- if (ret)
- goto err_unalloc;
memset(&resp, 0, sizeof(resp));
resp.rkey = mw->rkey;
@@ -1252,29 +884,17 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mw_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
-err_unalloc:
uverbs_dealloc_mw(mw);
-
err_put:
- put_pd_read(pd);
-
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1284,37 +904,19 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dealloc_mw cmd;
- struct ib_mw *mw;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mw = uobj->object;
+ uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- ret = uverbs_dealloc_mw(mw);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
@@ -1324,8 +926,8 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
- struct file *filp;
- int ret;
+ struct ib_uobject *uobj;
+ struct ib_uverbs_completion_event_file *ev_file;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -1333,25 +935,23 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- return ret;
- resp.fd = ret;
+ uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
- if (IS_ERR(filp)) {
- put_unused_fd(resp.fd);
- return PTR_ERR(filp);
- }
+ resp.fd = uobj->id;
+
+ ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj_file.uobj);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
- put_unused_fd(resp.fd);
- fput(filp);
+ uobj_alloc_abort(uobj);
return -EFAULT;
}
- fd_install(resp.fd, filp);
+ uobj_alloc_commit(uobj);
return in_len;
}
@@ -1369,7 +969,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
void *context)
{
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file = NULL;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
struct ib_uverbs_ex_create_cq_resp resp;
@@ -1378,21 +978,21 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return ERR_PTR(-ENOMEM);
-
- init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
- down_write(&obj->uobject.mutex);
+ obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return obj;
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
- if (!ev_file) {
- ret = -EINVAL;
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
+ file->ucontext);
+ if (IS_ERR(ev_file)) {
+ ret = PTR_ERR(ev_file);
goto err;
}
}
+ obj->uobject.user_handle = cmd->user_handle;
obj->uverbs_file = file;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
@@ -1405,8 +1005,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
- cq = ib_dev->create_cq(ib_dev, &attr,
- file->ucontext, uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1416,14 +1015,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
cq->uobject = &obj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file;
+ cq->cq_context = &ev_file->ev_queue;
atomic_set(&cq->usecnt, 0);
obj->uobject.object = cq;
- ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
- if (ret)
- goto err_free;
-
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uobject.id;
resp.base.cqe = cq->cqe;
@@ -1435,20 +1030,11 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (ret)
goto err_cb;
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
-
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
return obj;
err_cb:
- idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
-
-err_free:
ib_destroy_cq(cq);
err_file:
@@ -1456,7 +1042,7 @@ err_file:
ib_uverbs_release_ucq(file, ev_file, obj);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
return ERR_PTR(ret);
}
@@ -1579,7 +1165,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1594,7 +1180,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
ret = -EFAULT;
out:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret ? ret : in_len;
}
@@ -1641,7 +1227,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1673,7 +1259,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
ret = in_len;
out_put:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret;
}
@@ -1688,14 +1274,14 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return in_len;
}
@@ -1710,42 +1296,36 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_cq *cq;
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
cq = uobj->object;
- ev_file = cq->cq_context;
obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
- ret = ib_destroy_cq(cq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
+ }
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_ucq(file, ev_file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.comp_events_reported = obj->comp_events_reported;
resp.async_events_reported = obj->async_events_reported;
- put_uobj(uobj);
-
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
@@ -1767,7 +1347,7 @@ static int create_qp(struct ib_uverbs_file *file,
struct ib_device *device;
struct ib_pd *pd = NULL;
struct ib_xrcd *xrcd = NULL;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT);
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
@@ -1781,18 +1361,20 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = kzalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+ obj->uxrcd = NULL;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
+ mutex_init(&obj->mcast_lock);
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
- &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
- file->ucontext);
+ ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ cmd->rwq_ind_tbl_handle,
+ file->ucontext);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1816,8 +1398,15 @@ static int create_qp(struct ib_uverbs_file *file,
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
- &xrcd_uobj);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle,
+ file->ucontext);
+
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
goto err_put;
@@ -1829,8 +1418,8 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = idr_read_srq(cmd->srq_handle,
- file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd->srq_handle,
+ file->ucontext);
if (!srq || srq->srq_type != IB_SRQT_BASIC) {
ret = -EINVAL;
goto err_put;
@@ -1839,8 +1428,8 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = idr_read_cq(cmd->recv_cq_handle,
- file->ucontext, 0);
+ rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle,
+ file->ucontext);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1850,10 +1439,11 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
+ scq = uobj_get_obj_read(cq, cmd->send_cq_handle,
+ file->ucontext);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1891,7 +1481,8 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
- IB_QP_CREATE_SCATTER_FCS)) {
+ IB_QP_CREATE_SCATTER_FCS |
+ IB_QP_CREATE_CVLAN_STRIPPING)) {
ret = -EINVAL;
goto err_put;
}
@@ -1915,6 +1506,10 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (cmd->qp_type != IB_QPT_XRC_TGT) {
+ ret = ib_create_qp_security(qp, device);
+ if (ret)
+ goto err_cb;
+
qp->real_qp = qp;
qp->device = device;
qp->pd = pd;
@@ -1939,9 +1534,6 @@ static int create_qp(struct ib_uverbs_file *file,
qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
memset(&resp, 0, sizeof resp);
resp.base.qpn = qp->qp_num;
@@ -1963,50 +1555,41 @@ static int create_qp(struct ib_uverbs_file *file,
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
+ uobj_put_obj_read(ind_tbl);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_cb:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_qp(qp);
err_put:
- if (xrcd)
- put_xrcd_read(xrcd_uobj);
+ if (!IS_ERR(xrcd_uobj))
+ uobj_put_read(xrcd_uobj);
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
+ uobj_put_obj_read(ind_tbl);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2142,17 +1725,22 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
- goto err_put;
+ goto err_xrcd;
}
attr.event_handler = ib_uverbs_qp_event_handler;
@@ -2167,15 +1755,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
qp = ib_open_qp(xrcd, &attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_xrcd;
}
- qp->uobject = &obj->uevent.uobject;
-
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd.user_handle;
memset(&resp, 0, sizeof resp);
resp.qpn = qp->qp_num;
@@ -2184,32 +1768,25 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_remove;
+ goto err_destroy;
}
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
+ qp->uobject = &obj->uevent.uobject;
+ uobj_put_read(xrcd_uobj);
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return in_len;
-err_remove:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
err_destroy:
ib_destroy_qp(qp);
-
+err_xrcd:
+ uobj_put_read(xrcd_uobj);
err_put:
- put_xrcd_read(xrcd_uobj);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2223,6 +1800,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_qp_attr *attr;
struct ib_qp_init_attr *init_attr;
+ const struct ib_global_route *grh;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -2235,7 +1813,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2243,7 +1821,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
if (ret)
goto out;
@@ -2272,29 +1850,39 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.alt_port_num = attr->alt_port_num;
resp.alt_timeout = attr->alt_timeout;
- memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
- resp.dest.flow_label = attr->ah_attr.grh.flow_label;
- resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
- resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
- resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
- resp.dest.dlid = attr->ah_attr.dlid;
- resp.dest.sl = attr->ah_attr.sl;
- resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
- resp.dest.static_rate = attr->ah_attr.static_rate;
- resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
- resp.dest.port_num = attr->ah_attr.port_num;
-
- memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
- resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
- resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
- resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
- resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
- resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
- resp.alt_dest.sl = attr->alt_ah_attr.sl;
- resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
- resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
- resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
- resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+ resp.dest.dlid = rdma_ah_get_dlid(&attr->ah_attr);
+ resp.dest.sl = rdma_ah_get_sl(&attr->ah_attr);
+ resp.dest.src_path_bits = rdma_ah_get_path_bits(&attr->ah_attr);
+ resp.dest.static_rate = rdma_ah_get_static_rate(&attr->ah_attr);
+ resp.dest.is_global = !!(rdma_ah_get_ah_flags(&attr->ah_attr) &
+ IB_AH_GRH);
+ if (resp.dest.is_global) {
+ grh = rdma_ah_read_grh(&attr->ah_attr);
+ memcpy(resp.dest.dgid, grh->dgid.raw, 16);
+ resp.dest.flow_label = grh->flow_label;
+ resp.dest.sgid_index = grh->sgid_index;
+ resp.dest.hop_limit = grh->hop_limit;
+ resp.dest.traffic_class = grh->traffic_class;
+ }
+ resp.dest.port_num = rdma_ah_get_port_num(&attr->ah_attr);
+
+ resp.alt_dest.dlid = rdma_ah_get_dlid(&attr->alt_ah_attr);
+ resp.alt_dest.sl = rdma_ah_get_sl(&attr->alt_ah_attr);
+ resp.alt_dest.src_path_bits = rdma_ah_get_path_bits(&attr->alt_ah_attr);
+ resp.alt_dest.static_rate
+ = rdma_ah_get_static_rate(&attr->alt_ah_attr);
+ resp.alt_dest.is_global
+ = !!(rdma_ah_get_ah_flags(&attr->alt_ah_attr) &
+ IB_AH_GRH);
+ if (resp.alt_dest.is_global) {
+ grh = rdma_ah_read_grh(&attr->alt_ah_attr);
+ memcpy(resp.alt_dest.dgid, grh->dgid.raw, 16);
+ resp.alt_dest.flow_label = grh->flow_label;
+ resp.alt_dest.sgid_index = grh->sgid_index;
+ resp.alt_dest.hop_limit = grh->hop_limit;
+ resp.alt_dest.traffic_class = grh->traffic_class;
+ }
+ resp.alt_dest.port_num = rdma_ah_get_port_num(&attr->alt_ah_attr);
resp.max_send_wr = init_attr->cap.max_send_wr;
resp.max_recv_wr = init_attr->cap.max_recv_wr;
@@ -2339,12 +1927,18 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
}
+ if ((cmd->base.attr_mask & IB_QP_PORT) &&
+ !rdma_is_port_valid(qp->device, cmd->base.port_num)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
attr->qp_state = cmd->base.qp_state;
attr->cur_qp_state = cmd->base.cur_qp_state;
attr->path_mtu = cmd->base.path_mtu;
@@ -2368,51 +1962,55 @@ static int modify_qp(struct ib_uverbs_file *file,
attr->alt_timeout = cmd->base.alt_timeout;
attr->rate_limit = cmd->rate_limit;
- memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class;
- attr->ah_attr.dlid = cmd->base.dest.dlid;
- attr->ah_attr.sl = cmd->base.dest.sl;
- attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd->base.dest.static_rate;
- attr->ah_attr.ah_flags = cmd->base.dest.is_global ?
- IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd->base.dest.port_num;
-
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd->base.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ?
- IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num;
-
- if (qp->real_qp == qp) {
- if (cmd->base.attr_mask & IB_QP_AV) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
- goto release_qp;
- }
- ret = qp->device->modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type,
- cmd->base.attr_mask),
- udata);
+ attr->ah_attr.type = rdma_ah_find_type(qp->device,
+ cmd->base.dest.port_num);
+ if (cmd->base.dest.is_global) {
+ rdma_ah_set_grh(&attr->ah_attr, NULL,
+ cmd->base.dest.flow_label,
+ cmd->base.dest.sgid_index,
+ cmd->base.dest.hop_limit,
+ cmd->base.dest.traffic_class);
+ rdma_ah_set_dgid_raw(&attr->ah_attr, cmd->base.dest.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr->ah_attr, 0);
+ }
+ rdma_ah_set_dlid(&attr->ah_attr, cmd->base.dest.dlid);
+ rdma_ah_set_sl(&attr->ah_attr, cmd->base.dest.sl);
+ rdma_ah_set_path_bits(&attr->ah_attr, cmd->base.dest.src_path_bits);
+ rdma_ah_set_static_rate(&attr->ah_attr, cmd->base.dest.static_rate);
+ rdma_ah_set_port_num(&attr->ah_attr,
+ cmd->base.dest.port_num);
+
+ attr->alt_ah_attr.type = rdma_ah_find_type(qp->device,
+ cmd->base.dest.port_num);
+ if (cmd->base.alt_dest.is_global) {
+ rdma_ah_set_grh(&attr->alt_ah_attr, NULL,
+ cmd->base.alt_dest.flow_label,
+ cmd->base.alt_dest.sgid_index,
+ cmd->base.alt_dest.hop_limit,
+ cmd->base.alt_dest.traffic_class);
+ rdma_ah_set_dgid_raw(&attr->alt_ah_attr,
+ cmd->base.alt_dest.dgid);
} else {
- ret = ib_modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type,
- cmd->base.attr_mask));
+ rdma_ah_set_ah_flags(&attr->alt_ah_attr, 0);
}
-release_qp:
- put_qp_read(qp);
+ rdma_ah_set_dlid(&attr->alt_ah_attr, cmd->base.alt_dest.dlid);
+ rdma_ah_set_sl(&attr->alt_ah_attr, cmd->base.alt_dest.sl);
+ rdma_ah_set_path_bits(&attr->alt_ah_attr,
+ cmd->base.alt_dest.src_path_bits);
+ rdma_ah_set_static_rate(&attr->alt_ah_attr,
+ cmd->base.alt_dest.static_rate);
+ rdma_ah_set_port_num(&attr->alt_ah_attr,
+ cmd->base.alt_dest.port_num);
+
+ ret = ib_modify_qp_with_udata(qp, attr,
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask),
+ udata);
+release_qp:
+ uobj_put_obj_read(qp);
out:
kfree(attr);
@@ -2489,7 +2087,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
struct ib_uobject *uobj;
- struct ib_qp *qp;
struct ib_uqp_object *obj;
int ret = -EINVAL;
@@ -2498,40 +2095,26 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- qp = uobj->object;
- obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- if (!list_empty(&obj->mcast_list)) {
- put_uobj_write(uobj);
- return -EBUSY;
- }
-
- ret = ib_destroy_qp(qp);
- if (!ret)
- uobj->live = 0;
+ uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- put_uobj_write(uobj);
+ obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- if (obj->uxrcd)
- atomic_dec(&obj->uxrcd->refcnt);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, &obj->uevent);
+ }
resp.events_reported = obj->uevent.events_reported;
-
- put_uobj(uobj);
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2542,9 +2125,13 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
static void *alloc_wr(size_t wr_size, __u32 num_sge)
{
+ if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge))
+ return NULL;
+
return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
num_sge * sizeof (struct ib_sge), GFP_KERNEL);
-};
+}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
@@ -2575,7 +2162,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2611,7 +2198,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
+ ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah,
+ file->ucontext);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2718,11 +2306,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
ret = -EFAULT;
out_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
while (wr) {
if (is_ud && ud_wr(wr)->ah)
- put_ah_read(ud_wr(wr)->ah);
+ uobj_put_obj_read(ud_wr(wr)->ah);
next = wr->next;
kfree(wr);
wr = next;
@@ -2771,6 +2359,13 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
goto err;
}
+ if (user_wr->num_sge >=
+ (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
@@ -2839,21 +2434,21 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
resp.bad_wr = 0;
ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
- put_qp_read(qp);
-
- if (ret)
+ uobj_put_obj_read(qp);
+ if (ret) {
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
+ }
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2889,14 +2484,14 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
goto out;
resp.bad_wr = 0;
ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
for (next = wr; next; next = next->next) {
@@ -2929,9 +2524,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_ah *ah;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
int ret;
struct ib_udata udata;
+ u8 *dmac;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -2939,35 +2535,42 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
+ return -EINVAL;
+
INIT_UDATA(&udata, buf + sizeof(cmd),
(unsigned long)cmd.response + sizeof(resp),
in_len - sizeof(cmd), out_len - sizeof(resp));
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(ah), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
}
- attr.dlid = cmd.attr.dlid;
- attr.sl = cmd.attr.sl;
- attr.src_path_bits = cmd.attr.src_path_bits;
- attr.static_rate = cmd.attr.static_rate;
- attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0;
- attr.port_num = cmd.attr.port_num;
- attr.grh.flow_label = cmd.attr.grh.flow_label;
- attr.grh.sgid_index = cmd.attr.grh.sgid_index;
- attr.grh.hop_limit = cmd.attr.grh.hop_limit;
- attr.grh.traffic_class = cmd.attr.grh.traffic_class;
- memset(&attr.dmac, 0, sizeof(attr.dmac));
- memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+ attr.type = rdma_ah_find_type(ib_dev, cmd.attr.port_num);
+ rdma_ah_set_dlid(&attr, cmd.attr.dlid);
+ rdma_ah_set_sl(&attr, cmd.attr.sl);
+ rdma_ah_set_path_bits(&attr, cmd.attr.src_path_bits);
+ rdma_ah_set_static_rate(&attr, cmd.attr.static_rate);
+ rdma_ah_set_port_num(&attr, cmd.attr.port_num);
+
+ if (cmd.attr.is_global) {
+ rdma_ah_set_grh(&attr, NULL, cmd.attr.grh.flow_label,
+ cmd.attr.grh.sgid_index,
+ cmd.attr.grh.hop_limit,
+ cmd.attr.grh.traffic_class);
+ rdma_ah_set_dgid_raw(&attr, cmd.attr.grh.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr, 0);
+ }
+ dmac = rdma_ah_retrieve_dmac(&attr);
+ if (dmac)
+ memset(dmac, 0, ETH_ALEN);
ah = pd->device->create_ah(pd, &attr, &udata);
@@ -2980,12 +2583,9 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
ah->pd = pd;
atomic_inc(&pd->usecnt);
ah->uobject = uobj;
+ uobj->user_handle = cmd.user_handle;
uobj->object = ah;
- ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
- if (ret)
- goto err_destroy;
-
resp.ah_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -2994,29 +2594,19 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->ah_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
-
-err_destroy:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err_put:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -3025,36 +2615,19 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_destroy_ah cmd;
- struct ib_ah *ah;
struct ib_uobject *uobj;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- ah = uobj->object;
-
- ret = ib_destroy_ah(ah);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
+ uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
@@ -3071,12 +2644,13 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
@@ -3100,7 +2674,8 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
kfree(mcast);
out_put:
- put_qp_write(qp);
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
@@ -3115,34 +2690,66 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
int ret = -EINVAL;
+ bool found = false;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
- ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
- if (ret)
- goto out_put;
-
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
list_del(&mcast->list);
kfree(mcast);
+ found = true;
break;
}
-out_put:
- put_qp_write(qp);
+ if (!found) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+ ret = ib_detach_mcast(qp, (union ib_gid *)cmd.gid, cmd.mlid);
+
+out_put:
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (kern_spec->flow_tag.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_tag))
+ return -EINVAL;
+
+ ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+ ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (kern_spec->drop.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_drop))
+ return -EINVAL;
+
+ ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
@@ -3167,8 +2774,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
ssize_t kern_filter_sz;
@@ -3263,6 +2870,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
return 0;
}
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+ return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+ else
+ return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
@@ -3300,20 +2919,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = kmalloc(sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
- &wq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3325,6 +2942,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.wq_context = file;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
+ sizeof(cmd.create_flags)))
+ wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@@ -3345,9 +2965,6 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
atomic_inc(&cq->usecnt);
wq->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = wq;
- err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
- if (err)
- goto destroy_wq;
memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
@@ -3360,27 +2977,19 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_pd_read(pd);
- put_cq_read(cq);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_put_obj_read(cq);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
-destroy_wq:
ib_destroy_wq(wq);
err_put_cq:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
err_put_pd:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_uobj:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return err;
}
@@ -3392,7 +3001,6 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
{
struct ib_uverbs_ex_destroy_wq cmd = {};
struct ib_uverbs_ex_destroy_wq_resp resp = {};
- struct ib_wq *wq;
struct ib_uobject *uobj;
struct ib_uwq_object *obj;
size_t required_cmd_sz;
@@ -3421,36 +3029,25 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- wq = uobj->object;
obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
- ret = ib_destroy_wq(wq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- ib_uverbs_release_uevent(file, &obj->uevent);
+ ret = uobj_remove_commit(uobj);
resp.events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
-
- ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
+ uverbs_uobject_put(uobj);
if (ret)
return ret;
- return 0;
+ return ib_copy_to_udata(ucore, &resp, resp.response_length);
}
int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
@@ -3480,17 +3077,21 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (!cmd.attr_mask)
return -EINVAL;
- if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+ if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = idr_read_wq(cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext);
if (!wq)
return -EINVAL;
wq_attr.curr_wq_state = cmd.curr_wq_state;
wq_attr.wq_state = cmd.wq_state;
+ if (cmd.attr_mask & IB_WQ_FLAGS) {
+ wq_attr.flags = cmd.flags;
+ wq_attr.flags_mask = cmd.flags_mask;
+ }
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
- put_wq_read(wq);
+ uobj_put_obj_read(wq);
return ret;
}
@@ -3568,7 +3169,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
+ wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs],
+ file->ucontext);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3577,14 +3179,12 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto put_wqs;
}
- init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
- down_write(&uobj->mutex);
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
@@ -3604,10 +3204,6 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (i = 0; i < num_wq_handles; i++)
atomic_inc(&wqs[i]->usecnt);
- err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- if (err)
- goto destroy_ind_tbl;
-
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
resp.response_length = required_resp_len;
@@ -3620,26 +3216,18 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
kfree(wqs_handles);
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
- mutex_unlock(&file->mutex);
+ uobj_put_obj_read(wqs[j]);
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-destroy_ind_tbl:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
put_wqs:
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ uobj_put_obj_read(wqs[j]);
err_free:
kfree(wqs_handles);
kfree(wqs);
@@ -3652,10 +3240,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_uobject *uobj;
int ret;
- struct ib_wq **ind_tbl;
size_t required_cmd_sz;
required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
@@ -3675,31 +3261,12 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- rwq_ind_tbl = uobj->object;
- ind_tbl = rwq_ind_tbl->ind_tbl;
-
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- kfree(ind_tbl);
- return ret;
+ return uobj_remove_commit(uobj);
}
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
@@ -3773,15 +3340,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(flow), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto err_free_attr;
}
- init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
- down_write(&uobj->mutex);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3830,10 +3395,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
flow_id->uobject = uobj;
uobj->object = flow_id;
- err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
- if (err)
- goto destroy_flow;
-
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
@@ -3842,28 +3403,20 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_qp_read(qp);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rule_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(qp);
+ uobj_alloc_commit(uobj);
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-destroy_flow:
ib_destroy_flow(flow_id);
err_free:
kfree(flow_attr);
err_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
err_free_attr:
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
@@ -3876,7 +3429,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_destroy_flow cmd;
- struct ib_flow *flow_id;
struct ib_uobject *uobj;
int ret;
@@ -3890,26 +3442,12 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- flow_id = uobj->object;
-
- ret = ib_destroy_flow(flow_id);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
+ uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+ ret = uobj_remove_commit(uobj);
return ret;
}
@@ -3926,31 +3464,37 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_srq_init_attr attr;
int ret;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
if (cmd->srq_type == IB_SRQT_XRC) {
- attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
- if (!attr.ext.xrc.xrcd) {
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
}
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd) {
+ ret = -EINVAL;
+ goto err_put_xrcd;
+ }
+
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+ attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle,
+ file->ucontext);
if (!attr.ext.xrc.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -3990,9 +3534,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
atomic_set(&srq->usecnt, 0);
obj->uevent.uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
memset(&resp, 0, sizeof resp);
resp.srq_handle = obj->uevent.uobject.id;
@@ -4008,42 +3550,32 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (cmd->srq_type == IB_SRQT_XRC) {
- put_uobj_read(xrcd_uobj);
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_read(xrcd_uobj);
+ uobj_put_obj_read(attr.ext.xrc.cq);
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_srq(srq);
err_put:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_put_cq:
if (cmd->srq_type == IB_SRQT_XRC)
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_obj_read(attr.ext.xrc.cq);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
atomic_dec(&obj->uxrcd->refcnt);
- put_uobj_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
err:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -4128,7 +3660,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -4137,7 +3669,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
return ret ? ret : in_len;
}
@@ -4159,13 +3691,13 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
ret = ib_query_srq(srq, &attr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
return ret;
@@ -4191,54 +3723,38 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
struct ib_uobject *uobj;
- struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
- struct ib_usrq_object *us;
- enum ib_srq_type srq_type;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- srq = uobj->object;
- obj = container_of(uobj, struct ib_uevent_object, uobject);
- srq_type = srq->srq_type;
+ uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- ret = ib_destroy_srq(srq);
- if (!ret)
- uobj->live = 0;
+ obj = container_of(uobj, struct ib_uevent_object, uobject);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- if (srq_type == IB_SRQT_XRC) {
- us = container_of(obj, struct ib_usrq_object, uevent);
- atomic_dec(&us->uxrcd->refcnt);
}
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.events_reported = obj->events_reported;
+ uverbs_uobject_put(uobj);
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ return -EFAULT;
- put_uobj(uobj);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
-
- return ret ? ret : in_len;
+ return in_len;
}
int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
@@ -4323,6 +3839,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.max_wq_type_rq = attr.max_wq_type_rq;
resp.response_length += sizeof(resp.max_wq_type_rq);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
+ goto end;
+
+ resp.raw_packet_caps = attr.raw_packet_caps;
+ resp.response_length += sizeof(resp.raw_packet_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index b3f95d453fba..3d2609608f58 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -51,6 +51,8 @@
#include <rdma/ib.h>
#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -66,19 +68,6 @@ enum {
static struct class *uverbs_class;
-DEFINE_SPINLOCK(ib_uverbs_idr_lock);
-DEFINE_IDR(ib_uverbs_pd_idr);
-DEFINE_IDR(ib_uverbs_mr_idr);
-DEFINE_IDR(ib_uverbs_mw_idr);
-DEFINE_IDR(ib_uverbs_ah_idr);
-DEFINE_IDR(ib_uverbs_cq_idr);
-DEFINE_IDR(ib_uverbs_qp_idr);
-DEFINE_IDR(ib_uverbs_srq_idr);
-DEFINE_IDR(ib_uverbs_xrcd_idr);
-DEFINE_IDR(ib_uverbs_rule_idr);
-DEFINE_IDR(ib_uverbs_wq_idr);
-DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
-
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -167,37 +156,37 @@ static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
-static void ib_uverbs_release_event_file(struct kref *ref)
+static void ib_uverbs_release_async_event_file(struct kref *ref)
{
- struct ib_uverbs_event_file *file =
- container_of(ref, struct ib_uverbs_event_file, ref);
+ struct ib_uverbs_async_event_file *file =
+ container_of(ref, struct ib_uverbs_async_event_file, ref);
kfree(file);
}
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
if (ev_file) {
- spin_lock_irq(&ev_file->lock);
+ spin_lock_irq(&ev_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&ev_file->lock);
+ spin_unlock_irq(&ev_file->ev_queue.lock);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ uverbs_uobject_put(&ev_file->uobj_file.uobj);
}
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
@@ -205,16 +194,16 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
{
struct ib_uverbs_event *evt, *tmp;
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
-static void ib_uverbs_detach_umcast(struct ib_qp *qp,
- struct ib_uqp_object *uobj)
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj)
{
struct ib_uverbs_mcast_entry *mcast, *tmp;
@@ -226,124 +215,16 @@ static void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
- struct ib_ucontext *context)
+ struct ib_ucontext *context,
+ bool device_removed)
{
- struct ib_uobject *uobj, *tmp;
-
context->closing = 1;
-
- list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
- struct ib_ah *ah = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
- ib_destroy_ah(ah);
- kfree(uobj);
- }
-
- /* Remove MWs before QPs, in order to support type 2A MWs. */
- list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
- struct ib_mw *mw = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- uverbs_dealloc_mw(mw);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
- struct ib_flow *flow_id = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
- ib_destroy_flow(flow_id);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
- struct ib_qp *qp = uobj->object;
- struct ib_uqp_object *uqp =
- container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- if (qp == qp->real_qp)
- ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
- ib_uverbs_release_uevent(file, &uqp->uevent);
- kfree(uqp);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
- struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
- struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
- kfree(ind_tbl);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
- struct ib_wq *wq = uobj->object;
- struct ib_uwq_object *uwq =
- container_of(uobj, struct ib_uwq_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
- ib_destroy_wq(wq);
- ib_uverbs_release_uevent(file, &uwq->uevent);
- kfree(uwq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
- struct ib_srq *srq = uobj->object;
- struct ib_uevent_object *uevent =
- container_of(uobj, struct ib_uevent_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
- ib_destroy_srq(srq);
- ib_uverbs_release_uevent(file, uevent);
- kfree(uevent);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
- struct ib_cq *cq = uobj->object;
- struct ib_uverbs_event_file *ev_file = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobj, struct ib_ucq_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
- ib_destroy_cq(cq);
- ib_uverbs_release_ucq(file, ev_file, ucq);
- kfree(ucq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
- struct ib_mr *mr = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
- ib_dereg_mr(mr);
- kfree(uobj);
- }
-
- mutex_lock(&file->device->xrcd_tree_mutex);
- list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
- struct ib_xrcd *xrcd = uobj->object;
- struct ib_uxrcd_object *uxrcd =
- container_of(uobj, struct ib_uxrcd_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- ib_uverbs_dealloc_xrcd(file->device, xrcd);
- kfree(uxrcd);
- }
- mutex_unlock(&file->device->xrcd_tree_mutex);
-
- list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
- struct ib_pd *pd = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
- ib_dealloc_pd(pd);
- kfree(uobj);
- }
-
+ uverbs_cleanup_ucontext(context, device_removed);
put_pid(context->tgid);
+ ib_rdmacg_uncharge(&context->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
+
return context->device->dealloc_ucontext(context);
}
@@ -352,7 +233,7 @@ static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
complete(&dev->comp);
}
-static void ib_uverbs_release_file(struct kref *ref)
+void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
@@ -372,58 +253,54 @@ static void ib_uverbs_release_file(struct kref *ref)
kfree(file);
}
-static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
- size_t count, loff_t *pos)
+static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
+ struct ib_uverbs_file *uverbs_file,
+ struct file *filp, char __user *buf,
+ size_t count, loff_t *pos,
+ size_t eventsz)
{
- struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
- int eventsz;
int ret = 0;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
- while (list_empty(&file->event_list)) {
- spin_unlock_irq(&file->lock);
+ while (list_empty(&ev_queue->event_list)) {
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
- if (wait_event_interruptible(file->poll_wait,
- (!list_empty(&file->event_list) ||
+ if (wait_event_interruptible(ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
/* The barriers built into wait_event_interruptible()
* and wake_up() guarentee this will see the null set
* without using RCU
*/
- !file->uverbs_file->device->ib_dev)))
+ !uverbs_file->device->ib_dev)))
return -ERESTARTSYS;
/* If device was disassociated and no event exists set an error */
- if (list_empty(&file->event_list) &&
- !file->uverbs_file->device->ib_dev)
+ if (list_empty(&ev_queue->event_list) &&
+ !uverbs_file->device->ib_dev)
return -EIO;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
}
- event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
-
- if (file->is_async)
- eventsz = sizeof (struct ib_uverbs_async_event_desc);
- else
- eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else {
- list_del(file->event_list.next);
+ list_del(ev_queue->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
if (event) {
if (copy_to_user(buf, event, eventsz))
@@ -437,87 +314,158 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return ret;
}
-static unsigned int ib_uverbs_event_poll(struct file *filp,
+static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_async_event_desc));
+}
+
+static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue,
+ comp_ev_file->uobj_file.ufile, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_comp_event_desc));
+}
+
+static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
+ struct file *filp,
struct poll_table_struct *wait)
{
unsigned int pollflags = 0;
- struct ib_uverbs_event_file *file = filp->private_data;
- poll_wait(filp, &file->poll_wait, wait);
+ poll_wait(filp, &ev_queue->poll_wait, wait);
- spin_lock_irq(&file->lock);
- if (!list_empty(&file->event_list))
+ spin_lock_irq(&ev_queue->lock);
+ if (!list_empty(&ev_queue->event_list))
pollflags = POLLIN | POLLRDNORM;
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
return pollflags;
}
-static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
+static unsigned int ib_uverbs_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ return ib_uverbs_event_poll(filp->private_data, filp, wait);
+}
+
+static unsigned int ib_uverbs_comp_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
- return fasync_helper(fd, filp, on, &file->async_queue);
+ return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
}
-static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_event_queue *ev_queue = filp->private_data;
+
+ return fasync_helper(fd, filp, on, &ev_queue->async_queue);
+}
+
+static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
+}
+
+static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+ struct ib_uverbs_file *uverbs_file = file->uverbs_file;
struct ib_uverbs_event *entry, *tmp;
int closed_already = 0;
- mutex_lock(&file->uverbs_file->device->lists_mutex);
- spin_lock_irq(&file->lock);
- closed_already = file->is_closed;
- file->is_closed = 1;
- list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+ mutex_lock(&uverbs_file->device->lists_mutex);
+ spin_lock_irq(&file->ev_queue.lock);
+ closed_already = file->ev_queue.is_closed;
+ file->ev_queue.is_closed = 1;
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
if (entry->counter)
list_del(&entry->obj_list);
kfree(entry);
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&file->ev_queue.lock);
if (!closed_already) {
list_del(&file->list);
- if (file->is_async)
- ib_unregister_event_handler(&file->uverbs_file->
- event_handler);
+ ib_unregister_event_handler(&uverbs_file->event_handler);
}
- mutex_unlock(&file->uverbs_file->device->lists_mutex);
+ mutex_unlock(&uverbs_file->device->lists_mutex);
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_event_file);
+ kref_put(&uverbs_file->ref, ib_uverbs_release_file);
+ kref_put(&file->ref, ib_uverbs_release_async_event_file);
return 0;
}
-static const struct file_operations uverbs_event_fops = {
+static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_completion_event_file *file = filp->private_data;
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ kfree(entry);
+ }
+ spin_unlock_irq(&file->ev_queue.lock);
+
+ uverbs_close_fd(filp);
+
+ return 0;
+}
+
+const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
- .poll = ib_uverbs_event_poll,
- .release = ib_uverbs_event_close,
- .fasync = ib_uverbs_event_fasync,
+ .read = ib_uverbs_comp_event_read,
+ .poll = ib_uverbs_comp_event_poll,
+ .release = ib_uverbs_comp_event_close,
+ .fasync = ib_uverbs_comp_event_fasync,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations uverbs_async_event_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_uverbs_async_event_read,
+ .poll = ib_uverbs_async_event_poll,
+ .release = ib_uverbs_async_event_close,
+ .fasync = ib_uverbs_async_event_fasync,
.llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
- struct ib_uverbs_event_file *file = cq_context;
+ struct ib_uverbs_event_queue *ev_queue = cq_context;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
- if (!file)
+ if (!ev_queue)
return;
- spin_lock_irqsave(&file->lock, flags);
- if (file->is_closed) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ if (ev_queue->is_closed) {
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
@@ -526,12 +474,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
- list_add_tail(&entry->list, &file->event_list);
+ list_add_tail(&entry->list, &ev_queue->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
- wake_up_interruptible(&file->poll_wait);
- kill_fasync(&file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&ev_queue->poll_wait);
+ kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -542,15 +490,15 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
struct ib_uverbs_event *entry;
unsigned long flags;
- spin_lock_irqsave(&file->async_file->lock, flags);
- if (file->async_file->is_closed) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
+ if (file->async_file->ev_queue.is_closed) {
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
@@ -559,13 +507,13 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
entry->desc.async.reserved = 0;
entry->counter = counter;
- list_add_tail(&entry->list, &file->async_file->event_list);
+ list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
- wake_up_interruptible(&file->async_file->poll_wait);
- kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
+ kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
@@ -583,7 +531,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
struct ib_uevent_object *uobj;
/* for XRC target qp's, check that qp is live */
- if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+ if (!event->element.qp->uobject)
return;
uobj = container_of(event->element.qp->uobject,
@@ -628,15 +576,23 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
file->async_file = NULL;
}
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async)
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
+{
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ ev_queue->is_closed = 0;
+ ev_queue->async_queue = NULL;
+}
+
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev)
{
- struct ib_uverbs_event_file *ev_file;
+ struct ib_uverbs_async_event_file *ev_file;
struct file *filp;
int ret;
@@ -644,16 +600,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
if (!ev_file)
return ERR_PTR(-ENOMEM);
- kref_init(&ev_file->ref);
- spin_lock_init(&ev_file->lock);
- INIT_LIST_HEAD(&ev_file->event_list);
- init_waitqueue_head(&ev_file->poll_wait);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
ev_file->uverbs_file = uverbs_file;
kref_get(&ev_file->uverbs_file->ref);
- ev_file->async_queue = NULL;
- ev_file->is_closed = 0;
-
- filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
+ kref_init(&ev_file->ref);
+ filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
ev_file, O_RDONLY);
if (IS_ERR(filp))
goto err_put_refs;
@@ -663,64 +614,33 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
&uverbs_file->device->uverbs_events_file_list);
mutex_unlock(&uverbs_file->device->lists_mutex);
- if (is_async) {
- WARN_ON(uverbs_file->async_file);
- uverbs_file->async_file = ev_file;
- kref_get(&uverbs_file->async_file->ref);
- INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
- ib_dev,
- ib_uverbs_event_handler);
- ret = ib_register_event_handler(&uverbs_file->event_handler);
- if (ret)
- goto err_put_file;
-
- /* At that point async file stuff was fully set */
- ev_file->is_async = 1;
- }
+ WARN_ON(uverbs_file->async_file);
+ uverbs_file->async_file = ev_file;
+ kref_get(&uverbs_file->async_file->ref);
+ INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
+ ib_dev,
+ ib_uverbs_event_handler);
+ ret = ib_register_event_handler(&uverbs_file->event_handler);
+ if (ret)
+ goto err_put_file;
+
+ /* At that point async file stuff was fully set */
return filp;
err_put_file:
fput(filp);
- kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&uverbs_file->async_file->ref,
+ ib_uverbs_release_async_event_file);
uverbs_file->async_file = NULL;
return ERR_PTR(ret);
err_put_refs:
kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
return filp;
}
-/*
- * Look up a completion event file by FD. If lookup is successful,
- * takes a ref to the event file struct that it returns; if
- * unsuccessful, returns NULL.
- */
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
-{
- struct ib_uverbs_event_file *ev_file = NULL;
- struct fd f = fdget(fd);
-
- if (!f.file)
- return NULL;
-
- if (f.file->f_op != &uverbs_event_fops)
- goto out;
-
- ev_file = f.file->private_data;
- if (ev_file->is_async) {
- ev_file = NULL;
- goto out;
- }
-
- kref_get(&ev_file->ref);
-
-out:
- fdput(f);
- return ev_file;
-}
-
static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
{
u64 mask;
@@ -966,6 +886,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
}
file->device = dev;
+ spin_lock_init(&file->idr_lock);
+ idr_init(&file->idr);
file->ucontext = NULL;
file->async_file = NULL;
kref_init(&file->ref);
@@ -999,10 +921,11 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_lock(&file->cleanup_mutex);
if (file->ucontext) {
- ib_uverbs_cleanup_ucontext(file, file->ucontext);
+ ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
file->ucontext = NULL;
}
mutex_unlock(&file->cleanup_mutex);
+ idr_destroy(&file->idr);
mutex_lock(&file->device->lists_mutex);
if (!file->is_closed) {
@@ -1012,7 +935,8 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->device->lists_mutex);
if (file->async_file)
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref,
+ ib_uverbs_release_async_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
kobject_put(&dev->kobj);
@@ -1169,12 +1093,12 @@ static void ib_uverbs_add_one(struct ib_device *device)
cdev_init(&uverbs_dev->cdev, NULL);
uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
+ cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
- uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
+ uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
uverbs_dev->cdev.dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
@@ -1211,7 +1135,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
- struct ib_uverbs_event_file *event_file;
+ struct ib_uverbs_async_event_file *event_file;
struct ib_event event;
/* Pending running commands to terminate */
@@ -1248,7 +1172,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
* (e.g mmput).
*/
ib_dev->disassociate_ucontext(ucontext);
- ib_uverbs_cleanup_ucontext(file, ucontext);
+ mutex_lock(&file->cleanup_mutex);
+ ib_uverbs_cleanup_ucontext(file, ucontext, true);
+ mutex_unlock(&file->cleanup_mutex);
}
mutex_lock(&uverbs_dev->lists_mutex);
@@ -1258,21 +1184,20 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
event_file = list_first_entry(&uverbs_dev->
uverbs_events_file_list,
- struct ib_uverbs_event_file,
+ struct ib_uverbs_async_event_file,
list);
- spin_lock_irq(&event_file->lock);
- event_file->is_closed = 1;
- spin_unlock_irq(&event_file->lock);
+ spin_lock_irq(&event_file->ev_queue.lock);
+ event_file->ev_queue.is_closed = 1;
+ spin_unlock_irq(&event_file->ev_queue.lock);
list_del(&event_file->list);
- if (event_file->is_async) {
- ib_unregister_event_handler(&event_file->uverbs_file->
- event_handler);
- event_file->uverbs_file->event_handler.device = NULL;
- }
+ ib_unregister_event_handler(
+ &event_file->uverbs_file->event_handler);
+ event_file->uverbs_file->event_handler.device =
+ NULL;
- wake_up_interruptible(&event_file->poll_wait);
- kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&event_file->ev_queue.poll_wait);
+ kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
mutex_unlock(&uverbs_dev->lists_mutex);
}
@@ -1376,13 +1301,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
if (overflow_maj)
unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
- idr_destroy(&ib_uverbs_pd_idr);
- idr_destroy(&ib_uverbs_mr_idr);
- idr_destroy(&ib_uverbs_mw_idr);
- idr_destroy(&ib_uverbs_ah_idr);
- idr_destroy(&ib_uverbs_cq_idr);
- idr_destroy(&ib_uverbs_qp_idr);
- idr_destroy(&ib_uverbs_srq_idr);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index af020f80d50f..94fd989c9060 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -34,20 +34,25 @@
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
- struct ib_ah_attr *src)
+ struct rdma_ah_attr *src)
{
- memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
- dst->grh.flow_label = src->grh.flow_label;
- dst->grh.sgid_index = src->grh.sgid_index;
- dst->grh.hop_limit = src->grh.hop_limit;
- dst->grh.traffic_class = src->grh.traffic_class;
memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
- dst->dlid = src->dlid;
- dst->sl = src->sl;
- dst->src_path_bits = src->src_path_bits;
- dst->static_rate = src->static_rate;
- dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
- dst->port_num = src->port_num;
+ dst->dlid = rdma_ah_get_dlid(src);
+ dst->sl = rdma_ah_get_sl(src);
+ dst->src_path_bits = rdma_ah_get_path_bits(src);
+ dst->static_rate = rdma_ah_get_static_rate(src);
+ dst->is_global = rdma_ah_get_ah_flags(src) &
+ IB_AH_GRH ? 1 : 0;
+ if (dst->is_global) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(src);
+
+ memcpy(dst->grh.dgid, grh->dgid.raw, sizeof(grh->dgid));
+ dst->grh.flow_label = grh->flow_label;
+ dst->grh.sgid_index = grh->sgid_index;
+ dst->grh.hop_limit = grh->hop_limit;
+ dst->grh.traffic_class = grh->traffic_class;
+ }
+ dst->port_num = rdma_ah_get_port_num(src);
dst->reserved = 0;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
@@ -91,15 +96,15 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
-void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
- struct ib_sa_path_rec *src)
+static void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
{
- memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
- memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
+ memcpy(dst->dgid, src->dgid.raw, sizeof(src->dgid));
+ memcpy(dst->sgid, src->sgid.raw, sizeof(src->sgid));
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
+ dst->dlid = htons(ntohl(sa_path_get_dlid(src)));
+ dst->slid = htons(ntohl(sa_path_get_slid(src)));
+ dst->raw_traffic = sa_path_get_raw_traffic(src);
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
@@ -115,17 +120,43 @@ void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
}
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
+{
+ struct sa_path_rec rec;
+
+ if (src->rec_type == SA_PATH_REC_TYPE_OPA) {
+ sa_convert_path_opa_to_ib(&rec, src);
+ __ib_copy_path_rec_to_user(dst, &rec);
+ return;
+ }
+ __ib_copy_path_rec_to_user(dst, src);
+}
EXPORT_SYMBOL(ib_copy_path_rec_to_user);
-void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
struct ib_user_path_rec *src)
{
+ __be32 slid, dlid;
+
+ memset(dst, 0, sizeof(*dst));
+ if ((ib_is_opa_gid((union ib_gid *)src->sgid)) ||
+ (ib_is_opa_gid((union ib_gid *)src->dgid))) {
+ dst->rec_type = SA_PATH_REC_TYPE_OPA;
+ slid = htonl(opa_get_lid_from_gid((union ib_gid *)src->sgid));
+ dlid = htonl(opa_get_lid_from_gid((union ib_gid *)src->dgid));
+ } else {
+ dst->rec_type = SA_PATH_REC_TYPE_IB;
+ slid = htonl(ntohs(src->slid));
+ dlid = htonl(ntohs(src->dlid));
+ }
memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
+ sa_path_set_dlid(dst, dlid);
+ sa_path_set_slid(dst, slid);
+ sa_path_set_raw_traffic(dst, src->raw_traffic);
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
@@ -141,9 +172,9 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
- memset(dst->dmac, 0, sizeof(dst->dmac));
- dst->net = NULL;
- dst->ifindex = 0;
- dst->gid_type = IB_GID_TYPE_IB;
+ /* TODO: No need to set this */
+ sa_path_set_dmac_zero(dst);
+ sa_path_set_ndev(dst, NULL);
+ sa_path_set_ifindex(dst, 0);
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
new file mode 100644
index 000000000000..ef293379f37a
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/bug.h>
+#include <linux/file.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_ah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return rdma_destroy_ah((struct ib_ah *)uobject->object);
+}
+
+static int uverbs_free_flow(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_destroy_flow((struct ib_flow *)uobject->object);
+}
+
+static int uverbs_free_mw(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
+}
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp(qp);
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent);
+ return ret;
+}
+
+static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
+ struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
+ int ret;
+
+ ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ kfree(ind_tbl);
+ return ret;
+}
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq(wq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
+ return ret;
+}
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq(srq);
+
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uevent, struct ib_usrq_object, uevent);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uobject->context->ufile, uevent);
+ return ret;
+}
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uobject);
+ int ret;
+
+ ret = ib_destroy_cq(cq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
+ container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) : NULL,
+ ucq);
+ return ret;
+}
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_dereg_mr((struct ib_mr *)uobject->object);
+}
+
+static int uverbs_free_xrcd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_xrcd *xrcd = uobject->object;
+ struct ib_uxrcd_object *uxrcd =
+ container_of(uobject, struct ib_uxrcd_object, uobject);
+ int ret;
+
+ mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
+ ret = -EBUSY;
+ else
+ ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
+ xrcd, why);
+ mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
+
+ return ret;
+}
+
+static int uverbs_free_pd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_pd *pd = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
+ return -EBUSY;
+
+ ib_dealloc_pd((struct ib_pd *)uobject->object);
+ return 0;
+}
+
+static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_completion_event_file *comp_event_file =
+ container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
+ struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
+
+ spin_lock_irq(&event_queue->lock);
+ event_queue->is_closed = 1;
+ spin_unlock_irq(&event_queue->lock);
+
+ if (why == RDMA_REMOVE_DRIVER_REMOVE) {
+ wake_up_interruptible(&event_queue->poll_wait);
+ kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+ }
+ return 0;
+};
+
+const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
+ .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
+ .context_closed = uverbs_hot_unplug_completion_event_file,
+ .fops = &uverbs_event_fops,
+ .name = "[infinibandevent]",
+ .flags = O_RDONLY,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_cq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0),
+ .destroy_object = uverbs_free_cq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_qp = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0),
+ .destroy_object = uverbs_free_qp,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mw = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_mw,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mr = {
+ /* 1 is used in order to free the MR after all the MWs */
+ .type = UVERBS_TYPE_ALLOC_IDR(1),
+ .destroy_object = uverbs_free_mr,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_srq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0),
+ .destroy_object = uverbs_free_srq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_ah = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_ah,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_flow = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_flow,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_wq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0),
+ .destroy_object = uverbs_free_wq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_rwq_ind_tbl,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0),
+ .destroy_object = uverbs_free_xrcd,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_pd = {
+ /* 2 is used in order to free the PD after MRs */
+ .type = UVERBS_TYPE_ALLOC_IDR(2),
+ .destroy_object = uverbs_free_pd,
+};
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 71580cc28c9e..fb98ed67d5bc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -44,6 +44,7 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <net/addrconf.h>
+#include <linux/security.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -311,7 +312,7 @@ EXPORT_SYMBOL(ib_dealloc_pd);
/* Address handles */
-struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
{
struct ib_ah *ah;
@@ -321,12 +322,13 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
ah->device = pd->device;
ah->pd = pd;
ah->uobject = NULL;
+ ah->type = ah_attr->type;
atomic_inc(&pd->usecnt);
}
return ah;
}
-EXPORT_SYMBOL(ib_create_ah);
+EXPORT_SYMBOL(rdma_create_ah);
int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
{
@@ -450,9 +452,22 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
}
EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
+/*
+ * This function creates ah from the incoming packet.
+ * Incoming packet has dgid of the receiver node on which this code is
+ * getting executed and, sgid contains the GID of the sender.
+ *
+ * When resolving mac address of destination, the arrived dgid is used
+ * as sgid and, sgid is used as dgid because sgid contains destinations
+ * GID whom to respond to.
+ *
+ * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the
+ * position of arguments dgid and sgid do not match the order of the
+ * parameters.
+ */
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
u16 gid_index;
@@ -464,6 +479,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
union ib_gid sgid;
memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->type = rdma_ah_find_type(device, port_num);
if (rdma_cap_eth_ah(device, port_num)) {
if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
net_type = wc->network_hdr_type;
@@ -494,7 +510,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
return -ENODEV;
ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->dmac,
+ ah_attr->roce.dmac,
wc->wc_flags & IB_WC_WITH_VLAN ?
NULL : &vlan_id,
&if_index, &hoplimit);
@@ -504,11 +520,6 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
}
resolved_dev = dev_get_by_index(&init_net, if_index);
- if (resolved_dev->flags & IFF_LOOPBACK) {
- dev_put(resolved_dev);
- resolved_dev = idev;
- dev_hold(resolved_dev);
- }
rcu_read_lock();
if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
resolved_dev))
@@ -525,15 +536,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
return ret;
}
- ah_attr->dlid = wc->slid;
- ah_attr->sl = wc->sl;
- ah_attr->src_path_bits = wc->dlid_path_bits;
- ah_attr->port_num = port_num;
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
+ rdma_ah_set_port_num(ah_attr, port_num);
if (wc->wc_flags & IB_WC_GRH) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = sgid;
-
if (!rdma_cap_eth_ah(device, port_num)) {
if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
ret = ib_find_cached_gid_by_port(device, &dgid,
@@ -547,11 +555,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
}
}
- ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
- ah_attr->grh.flow_label = flow_class & 0xFFFFF;
- ah_attr->grh.hop_limit = hoplimit;
- ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+
}
return 0;
}
@@ -560,34 +569,37 @@ EXPORT_SYMBOL(ib_init_ah_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
{
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
int ret;
ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
- return ib_create_ah(pd, &ah_attr);
+ return rdma_create_ah(pd, &ah_attr);
}
EXPORT_SYMBOL(ib_create_ah_from_wc);
-int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
+ if (ah->type != ah_attr->type)
+ return -EINVAL;
+
return ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
-ENOSYS;
}
-EXPORT_SYMBOL(ib_modify_ah);
+EXPORT_SYMBOL(rdma_modify_ah);
-int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
-ENOSYS;
}
-EXPORT_SYMBOL(ib_query_ah);
+EXPORT_SYMBOL(rdma_query_ah);
-int ib_destroy_ah(struct ib_ah *ah)
+int rdma_destroy_ah(struct ib_ah *ah)
{
struct ib_pd *pd;
int ret;
@@ -599,7 +611,7 @@ int ib_destroy_ah(struct ib_ah *ah)
return ret;
}
-EXPORT_SYMBOL(ib_destroy_ah);
+EXPORT_SYMBOL(rdma_destroy_ah);
/* Shared receive queues */
@@ -710,12 +722,20 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
{
struct ib_qp *qp;
unsigned long flags;
+ int err;
qp = kzalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
qp->real_qp = real_qp;
+ err = ib_open_shared_qp_security(qp, real_qp->device);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->real_qp = real_qp;
atomic_inc(&real_qp->usecnt);
qp->device = real_qp->device;
qp->event_handler = event_handler;
@@ -801,6 +821,12 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
if (IS_ERR(qp))
return qp;
+ ret = ib_create_qp_security(qp, device);
+ if (ret) {
+ ib_destroy_qp(qp);
+ return ERR_PTR(ret);
+ }
+
qp->device = device;
qp->real_qp = qp;
qp->uobject = NULL;
@@ -869,6 +895,7 @@ static const struct {
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
[IB_QPS_INIT] = {
.valid = 1,
.req_param = {
@@ -1201,20 +1228,22 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
EXPORT_SYMBOL(ib_modify_qp_is_ok);
int ib_resolve_eth_dmac(struct ib_device *device,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int ret = 0;
+ struct ib_global_route *grh;
- if (ah_attr->port_num < rdma_start_port(device) ||
- ah_attr->port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
return -EINVAL;
- if (!rdma_cap_eth_ah(device, ah_attr->port_num))
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE)
return 0;
- if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw,
- ah_attr->dmac);
+ grh = rdma_ah_retrieve_grh(ah_attr);
+
+ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
+ ah_attr->roce.dmac);
} else {
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
@@ -1222,8 +1251,8 @@ int ib_resolve_eth_dmac(struct ib_device *device,
int hop_limit;
ret = ib_query_gid(device,
- ah_attr->port_num,
- ah_attr->grh.sgid_index,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index,
&sgid, &sgid_attr);
if (ret || !sgid_attr.ndev) {
@@ -1234,34 +1263,50 @@ int ib_resolve_eth_dmac(struct ib_device *device,
ifindex = sgid_attr.ndev->ifindex;
- ret = rdma_addr_find_l2_eth_by_grh(&sgid,
- &ah_attr->grh.dgid,
- ah_attr->dmac,
- NULL, &ifindex, &hop_limit);
+ ret =
+ rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac,
+ NULL, &ifindex, &hop_limit);
dev_put(sgid_attr.ndev);
- ah_attr->grh.hop_limit = hop_limit;
+ grh->hop_limit = hop_limit;
}
out:
return ret;
}
EXPORT_SYMBOL(ib_resolve_eth_dmac);
-int ib_modify_qp(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr,
- int qp_attr_mask)
+/**
+ * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
+ * @qp: The QP to modify.
+ * @attr: On input, specifies the QP attributes to modify. On output,
+ * the current values of selected QP attributes are returned.
+ * @attr_mask: A bit-mask used to specify which attributes of the QP
+ * are being modified.
+ * @udata: pointer to user's input output buffer information
+ * are being modified.
+ * It returns 0 on success and returns appropriate error code on error.
+ */
+int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
{
+ int ret;
- if (qp_attr_mask & IB_QP_AV) {
- int ret;
-
- ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr);
+ if (attr_mask & IB_QP_AV) {
+ ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
if (ret)
return ret;
}
+ return ib_security_modify_qp(qp, attr, attr_mask, udata);
+}
+EXPORT_SYMBOL(ib_modify_qp_with_udata);
- return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
+int ib_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask)
+{
+ return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -1290,6 +1335,7 @@ int ib_close_qp(struct ib_qp *qp)
spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
atomic_dec(&real_qp->usecnt);
+ ib_close_shared_qp_security(qp->qp_sec);
kfree(qp);
return 0;
@@ -1330,6 +1376,7 @@ int ib_destroy_qp(struct ib_qp *qp)
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
struct ib_rwq_ind_table *ind_tbl;
+ struct ib_qp_security *sec;
int ret;
WARN_ON_ONCE(qp->mrs_used > 0);
@@ -1345,6 +1392,9 @@ int ib_destroy_qp(struct ib_qp *qp)
rcq = qp->recv_cq;
srq = qp->srq;
ind_tbl = qp->rwq_ind_tbl;
+ sec = qp->qp_sec;
+ if (sec)
+ ib_destroy_qp_security_begin(sec);
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
@@ -1361,6 +1411,11 @@ int ib_destroy_qp(struct ib_qp *qp)
atomic_dec(&srq->usecnt);
if (ind_tbl)
atomic_dec(&ind_tbl->usecnt);
+ if (sec)
+ ib_destroy_qp_security_end(sec);
+ } else {
+ if (sec)
+ ib_destroy_qp_security_abort(sec);
}
return ret;
@@ -1520,7 +1575,9 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->attach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
@@ -1536,7 +1593,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->detach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);
@@ -1949,17 +2008,12 @@ static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
*/
static void __ib_drain_sq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
struct ib_send_wr swr = {}, *bad_swr;
int ret;
- if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
swr.wr_cqe = &sdrain.cqe;
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
@@ -1976,7 +2030,11 @@ static void __ib_drain_sq(struct ib_qp *qp)
return;
}
- wait_for_completion(&sdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&sdrain.done);
}
/*
@@ -1984,17 +2042,12 @@ static void __ib_drain_sq(struct ib_qp *qp)
*/
static void __ib_drain_rq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->recv_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe rdrain;
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
- if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
rwr.wr_cqe = &rdrain.cqe;
rdrain.cqe.done = ib_drain_qp_done;
init_completion(&rdrain.done);
@@ -2011,7 +2064,11 @@ static void __ib_drain_rq(struct ib_qp *qp)
return;
}
- wait_for_completion(&rdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&rdrain.done);
}
/**
@@ -2028,8 +2085,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
* ensure there is room in the CQ and SQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2057,8 +2113,7 @@ EXPORT_SYMBOL(ib_drain_sq);
* ensure there is room in the CQ and RQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2082,8 +2137,7 @@ EXPORT_SYMBOL(ib_drain_rq);
* ensure there is room in the CQ(s), SQ, and RQ for drain work requests
* and completions.
*
- * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQs using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.