aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/cache.c1
-rw-r--r--drivers/infiniband/core/cm.c4
-rw-r--r--drivers/infiniband/core/cma.c106
-rw-r--r--drivers/infiniband/core/device.c138
-rw-r--r--drivers/infiniband/core/umem.c (renamed from drivers/infiniband/core/uverbs_mem.c)158
-rw-r--r--drivers/infiniband/core/uverbs.h6
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c60
-rw-r--r--drivers/infiniband/core/uverbs_main.c11
9 files changed, 316 insertions, 172 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 189e5d4b9b17..cb1ab3ea4998 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o
+ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \
- uverbs_marshall.o
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 558c9a0fc8b9..e85f7013de57 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -38,6 +38,7 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/workqueue.h>
#include <rdma/ib_cache.h>
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index eff591deeb46..e840434a96d8 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -306,7 +306,9 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
do {
spin_lock_irqsave(&cm.lock, flags);
ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
- next_id++, &id);
+ next_id, &id);
+ if (!ret)
+ next_id = ((unsigned) id + 1) & MAX_ID_MASK;
spin_unlock_irqrestore(&cm.lock, flags);
} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index fde92ce45153..2eb52b7a71da 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -346,12 +346,33 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
complete(&id_priv->comp);
}
-static void cma_release_remove(struct rdma_id_private *id_priv)
+static int cma_disable_remove(struct rdma_id_private *id_priv,
+ enum cma_state state)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&id_priv->lock, flags);
+ if (id_priv->state == state) {
+ atomic_inc(&id_priv->dev_remove);
+ ret = 0;
+ } else
+ ret = -EINVAL;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return ret;
+}
+
+static void cma_enable_remove(struct rdma_id_private *id_priv)
{
if (atomic_dec_and_test(&id_priv->dev_remove))
wake_up(&id_priv->wait_remove);
}
+static int cma_has_cm_dev(struct rdma_id_private *id_priv)
+{
+ return (id_priv->id.device && id_priv->cm_id.ib);
+}
+
struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
void *context, enum rdma_port_space ps)
{
@@ -884,9 +905,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
struct rdma_cm_event event;
int ret = 0;
- atomic_inc(&id_priv->dev_remove);
- if (!cma_comp(id_priv, CMA_CONNECT))
- goto out;
+ if (cma_disable_remove(id_priv, CMA_CONNECT))
+ return 0;
memset(&event, 0, sizeof event);
switch (ib_event->event) {
@@ -942,12 +962,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
cma_exch(id_priv, CMA_DESTROYING);
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
rdma_destroy_id(&id_priv->id);
return ret;
}
out:
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
return ret;
}
@@ -1057,11 +1077,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
int offset, ret;
listen_id = cm_id->context;
- atomic_inc(&listen_id->dev_remove);
- if (!cma_comp(listen_id, CMA_LISTEN)) {
- ret = -ECONNABORTED;
- goto out;
- }
+ if (cma_disable_remove(listen_id, CMA_LISTEN))
+ return -ECONNABORTED;
memset(&event, 0, sizeof event);
offset = cma_user_data_offset(listen_id->id.ps);
@@ -1101,11 +1118,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
release_conn_id:
cma_exch(conn_id, CMA_DESTROYING);
- cma_release_remove(conn_id);
+ cma_enable_remove(conn_id);
rdma_destroy_id(&conn_id->id);
out:
- cma_release_remove(listen_id);
+ cma_enable_remove(listen_id);
return ret;
}
@@ -1171,9 +1188,10 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
struct sockaddr_in *sin;
int ret = 0;
- memset(&event, 0, sizeof event);
- atomic_inc(&id_priv->dev_remove);
+ if (cma_disable_remove(id_priv, CMA_CONNECT))
+ return 0;
+ memset(&event, 0, sizeof event);
switch (iw_event->event) {
case IW_CM_EVENT_CLOSE:
event.event = RDMA_CM_EVENT_DISCONNECTED;
@@ -1214,12 +1232,12 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
cma_exch(id_priv, CMA_DESTROYING);
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
rdma_destroy_id(&id_priv->id);
return ret;
}
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
return ret;
}
@@ -1234,11 +1252,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
int ret;
listen_id = cm_id->context;
- atomic_inc(&listen_id->dev_remove);
- if (!cma_comp(listen_id, CMA_LISTEN)) {
- ret = -ECONNABORTED;
- goto out;
- }
+ if (cma_disable_remove(listen_id, CMA_LISTEN))
+ return -ECONNABORTED;
/* Create a new RDMA id for the new IW CM ID */
new_cm_id = rdma_create_id(listen_id->id.event_handler,
@@ -1255,13 +1270,13 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
if (!dev) {
ret = -EADDRNOTAVAIL;
- cma_release_remove(conn_id);
+ cma_enable_remove(conn_id);
rdma_destroy_id(new_cm_id);
goto out;
}
ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
if (ret) {
- cma_release_remove(conn_id);
+ cma_enable_remove(conn_id);
rdma_destroy_id(new_cm_id);
goto out;
}
@@ -1270,7 +1285,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
ret = cma_acquire_dev(conn_id);
mutex_unlock(&lock);
if (ret) {
- cma_release_remove(conn_id);
+ cma_enable_remove(conn_id);
rdma_destroy_id(new_cm_id);
goto out;
}
@@ -1293,14 +1308,14 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
cma_exch(conn_id, CMA_DESTROYING);
- cma_release_remove(conn_id);
+ cma_enable_remove(conn_id);
rdma_destroy_id(&conn_id->id);
}
out:
if (dev)
dev_put(dev);
- cma_release_remove(listen_id);
+ cma_enable_remove(listen_id);
return ret;
}
@@ -1519,7 +1534,7 @@ static void cma_work_handler(struct work_struct *_work)
destroy = 1;
}
out:
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
cma_deref_id(id_priv);
if (destroy)
rdma_destroy_id(&id_priv->id);
@@ -1711,13 +1726,13 @@ static void addr_handler(int status, struct sockaddr *src_addr,
if (id_priv->id.event_handler(&id_priv->id, &event)) {
cma_exch(id_priv, CMA_DESTROYING);
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
cma_deref_id(id_priv);
rdma_destroy_id(&id_priv->id);
return;
}
out:
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
cma_deref_id(id_priv);
}
@@ -2042,11 +2057,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
int ret = 0;
- memset(&event, 0, sizeof event);
- atomic_inc(&id_priv->dev_remove);
- if (!cma_comp(id_priv, CMA_CONNECT))
- goto out;
+ if (cma_disable_remove(id_priv, CMA_CONNECT))
+ return 0;
+ memset(&event, 0, sizeof event);
switch (ib_event->event) {
case IB_CM_SIDR_REQ_ERROR:
event.event = RDMA_CM_EVENT_UNREACHABLE;
@@ -2084,12 +2098,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
cma_exch(id_priv, CMA_DESTROYING);
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
rdma_destroy_id(&id_priv->id);
return ret;
}
out:
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
return ret;
}
@@ -2413,7 +2427,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, CMA_CONNECT))
+ if (!cma_has_cm_dev(id_priv))
return -EINVAL;
switch (id->device->node_type) {
@@ -2435,7 +2449,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, CMA_CONNECT))
+ if (!cma_has_cm_dev(id_priv))
return -EINVAL;
switch (rdma_node_get_transport(id->device->node_type)) {
@@ -2466,8 +2480,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, CMA_CONNECT) &&
- !cma_comp(id_priv, CMA_DISCONNECT))
+ if (!cma_has_cm_dev(id_priv))
return -EINVAL;
switch (rdma_node_get_transport(id->device->node_type)) {
@@ -2499,10 +2512,9 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
int ret;
id_priv = mc->id_priv;
- atomic_inc(&id_priv->dev_remove);
- if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
- !cma_comp(id_priv, CMA_ADDR_RESOLVED))
- goto out;
+ if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) &&
+ cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
+ return 0;
if (!status && id_priv->id.qp)
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
@@ -2524,12 +2536,12 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
ret = id_priv->id.event_handler(&id_priv->id, &event);
if (ret) {
cma_exch(id_priv, CMA_DESTROYING);
- cma_release_remove(id_priv);
+ cma_enable_remove(id_priv);
rdma_destroy_id(&id_priv->id);
return 0;
}
-out:
- cma_release_remove(id_priv);
+
+ cma_enable_remove(id_priv);
return 0;
}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7fabb425b033..3ada17c0f239 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -40,6 +40,7 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/mutex.h>
+#include <linux/workqueue.h>
#include "core_priv.h"
@@ -149,6 +150,18 @@ static int alloc_name(char *name)
return 0;
}
+static int start_port(struct ib_device *device)
+{
+ return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
+}
+
+
+static int end_port(struct ib_device *device)
+{
+ return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+ 0 : device->phys_port_cnt;
+}
+
/**
* ib_alloc_device - allocate an IB device struct
* @size:size of structure to allocate
@@ -208,6 +221,45 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
return 0;
}
+static int read_port_table_lengths(struct ib_device *device)
+{
+ struct ib_port_attr *tprops = NULL;
+ int num_ports, ret = -ENOMEM;
+ u8 port_index;
+
+ tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
+ if (!tprops)
+ goto out;
+
+ num_ports = end_port(device) - start_port(device) + 1;
+
+ device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports,
+ GFP_KERNEL);
+ device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports,
+ GFP_KERNEL);
+ if (!device->pkey_tbl_len || !device->gid_tbl_len)
+ goto err;
+
+ for (port_index = 0; port_index < num_ports; ++port_index) {
+ ret = ib_query_port(device, port_index + start_port(device),
+ tprops);
+ if (ret)
+ goto err;
+ device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
+ device->gid_tbl_len[port_index] = tprops->gid_tbl_len;
+ }
+
+ ret = 0;
+ goto out;
+
+err:
+ kfree(device->gid_tbl_len);
+ kfree(device->pkey_tbl_len);
+out:
+ kfree(tprops);
+ return ret;
+}
+
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
@@ -239,10 +291,19 @@ int ib_register_device(struct ib_device *device)
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
+ ret = read_port_table_lengths(device);
+ if (ret) {
+ printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n",
+ device->name);
+ goto out;
+ }
+
ret = ib_device_register_sysfs(device);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
+ kfree(device->gid_tbl_len);
+ kfree(device->pkey_tbl_len);
goto out;
}
@@ -284,6 +345,9 @@ void ib_unregister_device(struct ib_device *device)
list_del(&device->core_list);
+ kfree(device->gid_tbl_len);
+ kfree(device->pkey_tbl_len);
+
mutex_unlock(&device_mutex);
spin_lock_irqsave(&device->client_data_lock, flags);
@@ -506,10 +570,7 @@ int ib_query_port(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr)
{
- if (device->node_type == RDMA_NODE_IB_SWITCH) {
- if (port_num)
- return -EINVAL;
- } else if (port_num < 1 || port_num > device->phys_port_cnt)
+ if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
return device->query_port(device, port_num, port_attr);
@@ -581,10 +642,7 @@ int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
- if (device->node_type == RDMA_NODE_IB_SWITCH) {
- if (port_num)
- return -EINVAL;
- } else if (port_num < 1 || port_num > device->phys_port_cnt)
+ if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
@@ -592,6 +650,68 @@ int ib_modify_port(struct ib_device *device,
}
EXPORT_SYMBOL(ib_modify_port);
+/**
+ * ib_find_gid - Returns the port number and GID table index where
+ * a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the GID table where the GID was found. This
+ * parameter may be NULL.
+ */
+int ib_find_gid(struct ib_device *device, union ib_gid *gid,
+ u8 *port_num, u16 *index)
+{
+ union ib_gid tmp_gid;
+ int ret, port, i;
+
+ for (port = start_port(device); port <= end_port(device); ++port) {
+ for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
+ ret = ib_query_gid(device, port, i, &tmp_gid);
+ if (ret)
+ return ret;
+ if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
+ *port_num = port;
+ if (index)
+ *index = i;
+ return 0;
+ }
+ }
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(ib_find_gid);
+
+/**
+ * ib_find_pkey - Returns the PKey table index where a specified
+ * PKey value occurs.
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the PKey table where the PKey was found.
+ */
+int ib_find_pkey(struct ib_device *device,
+ u8 port_num, u16 pkey, u16 *index)
+{
+ int ret, i;
+ u16 tmp_pkey;
+
+ for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
+ ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
+ if (ret)
+ return ret;
+
+ if (pkey == tmp_pkey) {
+ *index = i;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(ib_find_pkey);
+
static int __init ib_core_init(void)
{
int ret;
@@ -613,6 +733,8 @@ static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
ib_sysfs_cleanup();
+ /* Make sure that any pending umem accounting work is done. */
+ flush_scheduled_work();
}
module_init(ib_core_init);
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/umem.c
index c95fe952abd5..b4aec5103c99 100644
--- a/drivers/infiniband/core/uverbs_mem.c
+++ b/drivers/infiniband/core/umem.c
@@ -36,16 +36,10 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
+#include <linux/sched.h>
#include "uverbs.h"
-struct ib_umem_account_work {
- struct work_struct work;
- struct mm_struct *mm;
- unsigned long diff;
-};
-
-
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
struct ib_umem_chunk *chunk, *tmp;
@@ -64,35 +58,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
}
}
-int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
- void *addr, size_t size, int write)
+/**
+ * ib_umem_get - Pin and DMA map userspace memory.
+ * @context: userspace context to pin memory for
+ * @addr: userspace virtual address to start at
+ * @size: length of region to pin
+ * @access: IB_ACCESS_xxx flags for memory being pinned
+ */
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+ size_t size, int access)
{
+ struct ib_umem *umem;
struct page **page_list;
struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
- int ret = 0;
+ int ret;
int off;
int i;
if (!can_do_mlock())
- return -EPERM;
+ return ERR_PTR(-EPERM);
- page_list = (struct page **) __get_free_page(GFP_KERNEL);
- if (!page_list)
- return -ENOMEM;
+ umem = kmalloc(sizeof *umem, GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
- mem->user_base = (unsigned long) addr;
- mem->length = size;
- mem->offset = (unsigned long) addr & ~PAGE_MASK;
- mem->page_size = PAGE_SIZE;
- mem->writable = write;
+ umem->context = context;
+ umem->length = size;
+ umem->offset = addr & ~PAGE_MASK;
+ umem->page_size = PAGE_SIZE;
+ /*
+ * We ask for writable memory if any access flags other than
+ * "remote read" are set. "Local write" and "remote write"
+ * obviously require write access. "Remote atomic" can do
+ * things like fetch and add, which will modify memory, and
+ * "MW bind" can change permissions by binding a window.
+ */
+ umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
- INIT_LIST_HEAD(&mem->chunk_list);
+ INIT_LIST_HEAD(&umem->chunk_list);
- npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
+ page_list = (struct page **) __get_free_page(GFP_KERNEL);
+ if (!page_list) {
+ kfree(umem);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
@@ -104,13 +119,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
goto out;
}
- cur_base = (unsigned long) addr & PAGE_MASK;
+ cur_base = addr & PAGE_MASK;
while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(int, npages,
PAGE_SIZE / sizeof (struct page *)),
- 1, !write, page_list, NULL);
+ 1, !umem->writable, page_list, NULL);
if (ret < 0)
goto out;
@@ -136,7 +151,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
chunk->page_list[i].length = PAGE_SIZE;
}
- chunk->nmap = ib_dma_map_sg(dev,
+ chunk->nmap = ib_dma_map_sg(context->device,
&chunk->page_list[0],
chunk->nents,
DMA_BIDIRECTIONAL);
@@ -151,75 +166,96 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
ret -= chunk->nents;
off += chunk->nents;
- list_add_tail(&chunk->list, &mem->chunk_list);
+ list_add_tail(&chunk->list, &umem->chunk_list);
}
ret = 0;
}
out:
- if (ret < 0)
- __ib_umem_release(dev, mem, 0);
- else
+ if (ret < 0) {
+ __ib_umem_release(context->device, umem, 0);
+ kfree(umem);
+ } else
current->mm->locked_vm = locked;
up_write(&current->mm->mmap_sem);
free_page((unsigned long) page_list);
- return ret;
+ return ret < 0 ? ERR_PTR(ret) : umem;
}
+EXPORT_SYMBOL(ib_umem_get);
-void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
+static void ib_umem_account(struct work_struct *work)
{
- __ib_umem_release(dev, umem, 1);
+ struct ib_umem *umem = container_of(work, struct ib_umem, work);
- down_write(&current->mm->mmap_sem);
- current->mm->locked_vm -=
- PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
- up_write(&current->mm->mmap_sem);
+ down_write(&umem->mm->mmap_sem);
+ umem->mm->locked_vm -= umem->diff;
+ up_write(&umem->mm->mmap_sem);
+ mmput(umem->mm);
+ kfree(umem);
}
-static void ib_umem_account(struct work_struct *_work)
-{
- struct ib_umem_account_work *work =
- container_of(_work, struct ib_umem_account_work, work);
-
- down_write(&work->mm->mmap_sem);
- work->mm->locked_vm -= work->diff;
- up_write(&work->mm->mmap_sem);
- mmput(work->mm);
- kfree(work);
-}
-
-void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
+/**
+ * ib_umem_release - release memory pinned with ib_umem_get
+ * @umem: umem struct to release
+ */
+void ib_umem_release(struct ib_umem *umem)
{
- struct ib_umem_account_work *work;
+ struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
+ unsigned long diff;
- __ib_umem_release(dev, umem, 1);
+ __ib_umem_release(umem->context->device, umem, 1);
mm = get_task_mm(current);
- if (!mm)
+ if (!mm) {
+ kfree(umem);
return;
+ }
+
+ diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
/*
* We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
- * up here and not be able to take the mmap_sem. Therefore we
- * defer the vm_locked accounting to the system workqueue.
+ * up here and not be able to take the mmap_sem. In that case
+ * we defer the vm_locked accounting to the system workqueue.
*/
+ if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
+ INIT_WORK(&umem->work, ib_umem_account);
+ umem->mm = mm;
+ umem->diff = diff;
- work = kmalloc(sizeof *work, GFP_KERNEL);
- if (!work) {
- mmput(mm);
+ schedule_work(&umem->work);
return;
- }
+ } else
+ down_write(&mm->mmap_sem);
+
+ current->mm->locked_vm -= diff;
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ kfree(umem);
+}
+EXPORT_SYMBOL(ib_umem_release);
+
+int ib_umem_page_count(struct ib_umem *umem)
+{
+ struct ib_umem_chunk *chunk;
+ int shift;
+ int i;
+ int n;
+
+ shift = ilog2(umem->page_size);
- INIT_WORK(&work->work, ib_umem_account);
- work->mm = mm;
- work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+ n = 0;
+ list_for_each_entry(chunk, &umem->chunk_list, list)
+ for (i = 0; i < chunk->nmap; ++i)
+ n += sg_dma_len(&chunk->page_list[i]) >> shift;
- schedule_work(&work->work);
+ return n;
}
+EXPORT_SYMBOL(ib_umem_page_count);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 102a59c033ff..c33546f9e961 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -45,6 +45,7 @@
#include <linux/completion.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
/*
@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
- void *addr, size_t size, int write);
-void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
-void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
-
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
const char __user *buf, int in_len, \
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index bab66769be14..01d70084aebe 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
* Copyright (c) 2006 Mellanox Technologies. All rights reserved.
*
@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
+ ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
struct ib_udata udata;
- struct ib_umem_object *obj;
+ struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
!(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
return -EINVAL;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
return -ENOMEM;
- init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key);
- down_write(&obj->uobject.mutex);
-
- /*
- * We ask for writable memory if any access flags other than
- * "remote read" are set. "Local write" and "remote write"
- * obviously require write access. "Remote atomic" can do
- * things like fetch and add, which will modify memory, and
- * "MW bind" can change permissions by binding a window.
- */
- ret = ib_umem_get(file->device->ib_dev, &obj->umem,
- (void *) (unsigned long) cmd.start, cmd.length,
- !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
- if (ret)
- goto err_free;
-
- obj->umem.virt_base = cmd.hca_va;
+ init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
+ down_write(&uobj->mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
- goto err_release;
+ goto err_free;
}
- mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
+ mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
+ cmd.access_flags, &udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->device = pd->device;
mr->pd = pd;
- mr->uobject = &obj->uobject;
+ mr->uobject = uobj;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
- obj->uobject.object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject);
+ uobj->object = mr;
+ ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
if (ret)
goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
- resp.mr_handle = obj->uobject.id;
+ resp.mr_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
put_pd_read(pd);
mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+ list_add_tail(&uobj->list, &file->ucontext->mr_list);
mutex_unlock(&file->mutex);
- obj->uobject.live = 1;
+ uobj->live = 1;
- up_write(&obj->uobject.mutex);
+ up_write(&uobj->mutex);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject);
+ idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
err_unreg:
ib_dereg_mr(mr);
@@ -676,11 +663,8 @@ err_unreg:
err_put:
put_pd_read(pd);
-err_release:
- ib_umem_release(file->device->ib_dev, &obj->umem);
-
err_free:
- put_uobj_write(&obj->uobject);
+ put_uobj_write(uobj);
return ret;
}
@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_dereg_mr cmd;
struct ib_mr *mr;
struct ib_uobject *uobj;
- struct ib_umem_object *memobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (!uobj)
return -EINVAL;
- memobj = container_of(uobj, struct ib_umem_object, uobject);
- mr = uobj->object;
+ mr = uobj->object;
ret = ib_dereg_mr(mr);
if (!ret)
@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
list_del(&uobj->list);
mutex_unlock(&file->mutex);
- ib_umem_release(file->device->ib_dev, &memobj->umem);
-
put_uobj(uobj);
return in_len;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index d44e54799651..14d7ccd89195 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
if (!context)
return 0;
+ context->closing = 1;
+
list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
struct ib_ah *ah = uobj->object;
@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
- struct ib_device *mrdev = mr->device;
- struct ib_umem_object *memobj;
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr);
-
- memobj = container_of(uobj, struct ib_umem_object, uobject);
- ib_umem_release_on_close(mrdev, &memobj->umem);
-
- kfree(memobj);
+ kfree(uobj);
}
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_filesystem(&uverbs_event_fs);
class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
- flush_scheduled_work();
idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr);