diff options
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/Makefile | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/cache.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/cm.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 106 | ||||
-rw-r--r-- | drivers/infiniband/core/device.c | 138 | ||||
-rw-r--r-- | drivers/infiniband/core/umem.c (renamed from drivers/infiniband/core/uverbs_mem.c) | 158 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs.h | 6 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 60 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 11 |
9 files changed, 316 insertions, 172 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 189e5d4b9b17..cb1ab3ea4998 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ device.o fmr_pool.o cache.o +ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_mad-y := mad.o smi.o agent.o mad_rmpp.o @@ -28,5 +29,4 @@ ib_umad-y := user_mad.o ib_ucm-y := ucm.o -ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ - uverbs_marshall.o +ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 558c9a0fc8b9..e85f7013de57 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -38,6 +38,7 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> +#include <linux/workqueue.h> #include <rdma/ib_cache.h> diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index eff591deeb46..e840434a96d8 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -306,7 +306,9 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) do { spin_lock_irqsave(&cm.lock, flags); ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, - next_id++, &id); + next_id, &id); + if (!ret) + next_id = ((unsigned) id + 1) & MAX_ID_MASK; spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fde92ce45153..2eb52b7a71da 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -346,12 +346,33 @@ static void cma_deref_id(struct rdma_id_private *id_priv) complete(&id_priv->comp); } -static void cma_release_remove(struct rdma_id_private *id_priv) +static int cma_disable_remove(struct rdma_id_private *id_priv, + enum cma_state state) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&id_priv->lock, flags); + if (id_priv->state == state) { + atomic_inc(&id_priv->dev_remove); + ret = 0; + } else + ret = -EINVAL; + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} + +static void cma_enable_remove(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->dev_remove)) wake_up(&id_priv->wait_remove); } +static int cma_has_cm_dev(struct rdma_id_private *id_priv) +{ + return (id_priv->id.device && id_priv->cm_id.ib); +} + struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps) { @@ -884,9 +905,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_cm_event event; int ret = 0; - atomic_inc(&id_priv->dev_remove); - if (!cma_comp(id_priv, CMA_CONNECT)) - goto out; + if (cma_disable_remove(id_priv, CMA_CONNECT)) + return 0; memset(&event, 0, sizeof event); switch (ib_event->event) { @@ -942,12 +962,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); rdma_destroy_id(&id_priv->id); return ret; } out: - cma_release_remove(id_priv); + cma_enable_remove(id_priv); return ret; } @@ -1057,11 +1077,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) int offset, ret; listen_id = cm_id->context; - atomic_inc(&listen_id->dev_remove); - if (!cma_comp(listen_id, CMA_LISTEN)) { - ret = -ECONNABORTED; - goto out; - } + if (cma_disable_remove(listen_id, CMA_LISTEN)) + return -ECONNABORTED; memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id->id.ps); @@ -1101,11 +1118,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) release_conn_id: cma_exch(conn_id, CMA_DESTROYING); - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(&conn_id->id); out: - cma_release_remove(listen_id); + cma_enable_remove(listen_id); return ret; } @@ -1171,9 +1188,10 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) struct sockaddr_in *sin; int ret = 0; - memset(&event, 0, sizeof event); - atomic_inc(&id_priv->dev_remove); + if (cma_disable_remove(id_priv, CMA_CONNECT)) + return 0; + memset(&event, 0, sizeof event); switch (iw_event->event) { case IW_CM_EVENT_CLOSE: event.event = RDMA_CM_EVENT_DISCONNECTED; @@ -1214,12 +1232,12 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); rdma_destroy_id(&id_priv->id); return ret; } - cma_release_remove(id_priv); + cma_enable_remove(id_priv); return ret; } @@ -1234,11 +1252,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, int ret; listen_id = cm_id->context; - atomic_inc(&listen_id->dev_remove); - if (!cma_comp(listen_id, CMA_LISTEN)) { - ret = -ECONNABORTED; - goto out; - } + if (cma_disable_remove(listen_id, CMA_LISTEN)) + return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ new_cm_id = rdma_create_id(listen_id->id.event_handler, @@ -1255,13 +1270,13 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(new_cm_id); goto out; } ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); if (ret) { - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(new_cm_id); goto out; } @@ -1270,7 +1285,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, ret = cma_acquire_dev(conn_id); mutex_unlock(&lock); if (ret) { - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(new_cm_id); goto out; } @@ -1293,14 +1308,14 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; cma_exch(conn_id, CMA_DESTROYING); - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(&conn_id->id); } out: if (dev) dev_put(dev); - cma_release_remove(listen_id); + cma_enable_remove(listen_id); return ret; } @@ -1519,7 +1534,7 @@ static void cma_work_handler(struct work_struct *_work) destroy = 1; } out: - cma_release_remove(id_priv); + cma_enable_remove(id_priv); cma_deref_id(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); @@ -1711,13 +1726,13 @@ static void addr_handler(int status, struct sockaddr *src_addr, if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); return; } out: - cma_release_remove(id_priv); + cma_enable_remove(id_priv); cma_deref_id(id_priv); } @@ -2042,11 +2057,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; - memset(&event, 0, sizeof event); - atomic_inc(&id_priv->dev_remove); - if (!cma_comp(id_priv, CMA_CONNECT)) - goto out; + if (cma_disable_remove(id_priv, CMA_CONNECT)) + return 0; + memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_SIDR_REQ_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; @@ -2084,12 +2098,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); rdma_destroy_id(&id_priv->id); return ret; } out: - cma_release_remove(id_priv); + cma_enable_remove(id_priv); return ret; } @@ -2413,7 +2427,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_CONNECT)) + if (!cma_has_cm_dev(id_priv)) return -EINVAL; switch (id->device->node_type) { @@ -2435,7 +2449,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_CONNECT)) + if (!cma_has_cm_dev(id_priv)) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { @@ -2466,8 +2480,7 @@ int rdma_disconnect(struct rdma_cm_id *id) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_CONNECT) && - !cma_comp(id_priv, CMA_DISCONNECT)) + if (!cma_has_cm_dev(id_priv)) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { @@ -2499,10 +2512,9 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) int ret; id_priv = mc->id_priv; - atomic_inc(&id_priv->dev_remove); - if (!cma_comp(id_priv, CMA_ADDR_BOUND) && - !cma_comp(id_priv, CMA_ADDR_RESOLVED)) - goto out; + if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) && + cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) + return 0; if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, @@ -2524,12 +2536,12 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); rdma_destroy_id(&id_priv->id); return 0; } -out: - cma_release_remove(id_priv); + + cma_enable_remove(id_priv); return 0; } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7fabb425b033..3ada17c0f239 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -40,6 +40,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/mutex.h> +#include <linux/workqueue.h> #include "core_priv.h" @@ -149,6 +150,18 @@ static int alloc_name(char *name) return 0; } +static int start_port(struct ib_device *device) +{ + return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; +} + + +static int end_port(struct ib_device *device) +{ + return (device->node_type == RDMA_NODE_IB_SWITCH) ? + 0 : device->phys_port_cnt; +} + /** * ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate @@ -208,6 +221,45 @@ static int add_client_context(struct ib_device *device, struct ib_client *client return 0; } +static int read_port_table_lengths(struct ib_device *device) +{ + struct ib_port_attr *tprops = NULL; + int num_ports, ret = -ENOMEM; + u8 port_index; + + tprops = kmalloc(sizeof *tprops, GFP_KERNEL); + if (!tprops) + goto out; + + num_ports = end_port(device) - start_port(device) + 1; + + device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports, + GFP_KERNEL); + device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports, + GFP_KERNEL); + if (!device->pkey_tbl_len || !device->gid_tbl_len) + goto err; + + for (port_index = 0; port_index < num_ports; ++port_index) { + ret = ib_query_port(device, port_index + start_port(device), + tprops); + if (ret) + goto err; + device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len; + device->gid_tbl_len[port_index] = tprops->gid_tbl_len; + } + + ret = 0; + goto out; + +err: + kfree(device->gid_tbl_len); + kfree(device->pkey_tbl_len); +out: + kfree(tprops); + return ret; +} + /** * ib_register_device - Register an IB device with IB core * @device:Device to register @@ -239,10 +291,19 @@ int ib_register_device(struct ib_device *device) spin_lock_init(&device->event_handler_lock); spin_lock_init(&device->client_data_lock); + ret = read_port_table_lengths(device); + if (ret) { + printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n", + device->name); + goto out; + } + ret = ib_device_register_sysfs(device); if (ret) { printk(KERN_WARNING "Couldn't register device %s with driver model\n", device->name); + kfree(device->gid_tbl_len); + kfree(device->pkey_tbl_len); goto out; } @@ -284,6 +345,9 @@ void ib_unregister_device(struct ib_device *device) list_del(&device->core_list); + kfree(device->gid_tbl_len); + kfree(device->pkey_tbl_len); + mutex_unlock(&device_mutex); spin_lock_irqsave(&device->client_data_lock, flags); @@ -506,10 +570,7 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { - if (device->node_type == RDMA_NODE_IB_SWITCH) { - if (port_num) - return -EINVAL; - } else if (port_num < 1 || port_num > device->phys_port_cnt) + if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; return device->query_port(device, port_num, port_attr); @@ -581,10 +642,7 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { - if (device->node_type == RDMA_NODE_IB_SWITCH) { - if (port_num) - return -EINVAL; - } else if (port_num < 1 || port_num > device->phys_port_cnt) + if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; return device->modify_port(device, port_num, port_modify_mask, @@ -592,6 +650,68 @@ int ib_modify_port(struct ib_device *device, } EXPORT_SYMBOL(ib_modify_port); +/** + * ib_find_gid - Returns the port number and GID table index where + * a specified GID value occurs. + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value was found. + * @index: The index into the GID table where the GID was found. This + * parameter may be NULL. + */ +int ib_find_gid(struct ib_device *device, union ib_gid *gid, + u8 *port_num, u16 *index) +{ + union ib_gid tmp_gid; + int ret, port, i; + + for (port = start_port(device); port <= end_port(device); ++port) { + for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) { + ret = ib_query_gid(device, port, i, &tmp_gid); + if (ret) + return ret; + if (!memcmp(&tmp_gid, gid, sizeof *gid)) { + *port_num = port; + if (index) + *index = i; + return 0; + } + } + } + + return -ENOENT; +} +EXPORT_SYMBOL(ib_find_gid); + +/** + * ib_find_pkey - Returns the PKey table index where a specified + * PKey value occurs. + * @device: The device to query. + * @port_num: The port number of the device to search for the PKey. + * @pkey: The PKey value to search for. + * @index: The index into the PKey table where the PKey was found. + */ +int ib_find_pkey(struct ib_device *device, + u8 port_num, u16 pkey, u16 *index) +{ + int ret, i; + u16 tmp_pkey; + + for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { + ret = ib_query_pkey(device, port_num, i, &tmp_pkey); + if (ret) + return ret; + + if (pkey == tmp_pkey) { + *index = i; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL(ib_find_pkey); + static int __init ib_core_init(void) { int ret; @@ -613,6 +733,8 @@ static void __exit ib_core_cleanup(void) { ib_cache_cleanup(); ib_sysfs_cleanup(); + /* Make sure that any pending umem accounting work is done. */ + flush_scheduled_work(); } module_init(ib_core_init); diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/umem.c index c95fe952abd5..b4aec5103c99 100644 --- a/drivers/infiniband/core/uverbs_mem.c +++ b/drivers/infiniband/core/umem.c @@ -36,16 +36,10 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> +#include <linux/sched.h> #include "uverbs.h" -struct ib_umem_account_work { - struct work_struct work; - struct mm_struct *mm; - unsigned long diff; -}; - - static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { struct ib_umem_chunk *chunk, *tmp; @@ -64,35 +58,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d } } -int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, - void *addr, size_t size, int write) +/** + * ib_umem_get - Pin and DMA map userspace memory. + * @context: userspace context to pin memory for + * @addr: userspace virtual address to start at + * @size: length of region to pin + * @access: IB_ACCESS_xxx flags for memory being pinned + */ +struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, + size_t size, int access) { + struct ib_umem *umem; struct page **page_list; struct ib_umem_chunk *chunk; unsigned long locked; unsigned long lock_limit; unsigned long cur_base; unsigned long npages; - int ret = 0; + int ret; int off; int i; if (!can_do_mlock()) - return -EPERM; + return ERR_PTR(-EPERM); - page_list = (struct page **) __get_free_page(GFP_KERNEL); - if (!page_list) - return -ENOMEM; + umem = kmalloc(sizeof *umem, GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); - mem->user_base = (unsigned long) addr; - mem->length = size; - mem->offset = (unsigned long) addr & ~PAGE_MASK; - mem->page_size = PAGE_SIZE; - mem->writable = write; + umem->context = context; + umem->length = size; + umem->offset = addr & ~PAGE_MASK; + umem->page_size = PAGE_SIZE; + /* + * We ask for writable memory if any access flags other than + * "remote read" are set. "Local write" and "remote write" + * obviously require write access. "Remote atomic" can do + * things like fetch and add, which will modify memory, and + * "MW bind" can change permissions by binding a window. + */ + umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ); - INIT_LIST_HEAD(&mem->chunk_list); + INIT_LIST_HEAD(&umem->chunk_list); - npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; + page_list = (struct page **) __get_free_page(GFP_KERNEL); + if (!page_list) { + kfree(umem); + return ERR_PTR(-ENOMEM); + } + + npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT; down_write(¤t->mm->mmap_sem); @@ -104,13 +119,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, goto out; } - cur_base = (unsigned long) addr & PAGE_MASK; + cur_base = addr & PAGE_MASK; while (npages) { ret = get_user_pages(current, current->mm, cur_base, min_t(int, npages, PAGE_SIZE / sizeof (struct page *)), - 1, !write, page_list, NULL); + 1, !umem->writable, page_list, NULL); if (ret < 0) goto out; @@ -136,7 +151,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, chunk->page_list[i].length = PAGE_SIZE; } - chunk->nmap = ib_dma_map_sg(dev, + chunk->nmap = ib_dma_map_sg(context->device, &chunk->page_list[0], chunk->nents, DMA_BIDIRECTIONAL); @@ -151,75 +166,96 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, ret -= chunk->nents; off += chunk->nents; - list_add_tail(&chunk->list, &mem->chunk_list); + list_add_tail(&chunk->list, &umem->chunk_list); } ret = 0; } out: - if (ret < 0) - __ib_umem_release(dev, mem, 0); - else + if (ret < 0) { + __ib_umem_release(context->device, umem, 0); + kfree(umem); + } else current->mm->locked_vm = locked; up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); - return ret; + return ret < 0 ? ERR_PTR(ret) : umem; } +EXPORT_SYMBOL(ib_umem_get); -void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) +static void ib_umem_account(struct work_struct *work) { - __ib_umem_release(dev, umem, 1); + struct ib_umem *umem = container_of(work, struct ib_umem, work); - down_write(¤t->mm->mmap_sem); - current->mm->locked_vm -= - PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; - up_write(¤t->mm->mmap_sem); + down_write(&umem->mm->mmap_sem); + umem->mm->locked_vm -= umem->diff; + up_write(&umem->mm->mmap_sem); + mmput(umem->mm); + kfree(umem); } -static void ib_umem_account(struct work_struct *_work) -{ - struct ib_umem_account_work *work = - container_of(_work, struct ib_umem_account_work, work); - - down_write(&work->mm->mmap_sem); - work->mm->locked_vm -= work->diff; - up_write(&work->mm->mmap_sem); - mmput(work->mm); - kfree(work); -} - -void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) +/** + * ib_umem_release - release memory pinned with ib_umem_get + * @umem: umem struct to release + */ +void ib_umem_release(struct ib_umem *umem) { - struct ib_umem_account_work *work; + struct ib_ucontext *context = umem->context; struct mm_struct *mm; + unsigned long diff; - __ib_umem_release(dev, umem, 1); + __ib_umem_release(umem->context->device, umem, 1); mm = get_task_mm(current); - if (!mm) + if (!mm) { + kfree(umem); return; + } + + diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; /* * We may be called with the mm's mmap_sem already held. This * can happen when a userspace munmap() is the call that drops * the last reference to our file and calls our release * method. If there are memory regions to destroy, we'll end - * up here and not be able to take the mmap_sem. Therefore we - * defer the vm_locked accounting to the system workqueue. + * up here and not be able to take the mmap_sem. In that case + * we defer the vm_locked accounting to the system workqueue. */ + if (context->closing && !down_write_trylock(&mm->mmap_sem)) { + INIT_WORK(&umem->work, ib_umem_account); + umem->mm = mm; + umem->diff = diff; - work = kmalloc(sizeof *work, GFP_KERNEL); - if (!work) { - mmput(mm); + schedule_work(&umem->work); return; - } + } else + down_write(&mm->mmap_sem); + + current->mm->locked_vm -= diff; + up_write(&mm->mmap_sem); + mmput(mm); + kfree(umem); +} +EXPORT_SYMBOL(ib_umem_release); + +int ib_umem_page_count(struct ib_umem *umem) +{ + struct ib_umem_chunk *chunk; + int shift; + int i; + int n; + + shift = ilog2(umem->page_size); - INIT_WORK(&work->work, ib_umem_account); - work->mm = mm; - work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + n = 0; + list_for_each_entry(chunk, &umem->chunk_list, list) + for (i = 0; i < chunk->nmap; ++i) + n += sg_dma_len(&chunk->page_list[i]) >> shift; - schedule_work(&work->work); + return n; } +EXPORT_SYMBOL(ib_umem_page_count); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 102a59c033ff..c33546f9e961 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -45,6 +45,7 @@ #include <linux/completion.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_umem.h> #include <rdma/ib_user_verbs.h> /* @@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); -int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, - void *addr, size_t size, int write); -void ib_umem_release(struct ib_device *dev, struct ib_umem *umem); -void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); - #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ const char __user *buf, int in_len, \ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index bab66769be14..01d70084aebe 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * Copyright (c) 2006 Mellanox Technologies. All rights reserved. * @@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); + ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; @@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr_resp resp; struct ib_udata udata; - struct ib_umem_object *obj; + struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mr *mr; int ret; @@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) return -ENOMEM; - init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key); - down_write(&obj->uobject.mutex); - - /* - * We ask for writable memory if any access flags other than - * "remote read" are set. "Local write" and "remote write" - * obviously require write access. "Remote atomic" can do - * things like fetch and add, which will modify memory, and - * "MW bind" can change permissions by binding a window. - */ - ret = ib_umem_get(file->device->ib_dev, &obj->umem, - (void *) (unsigned long) cmd.start, cmd.length, - !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ)); - if (ret) - goto err_free; - - obj->umem.virt_base = cmd.hca_va; + init_uobj(uobj, 0, file->ucontext, &mr_lock_key); + down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; - goto err_release; + goto err_free; } - mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); + mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, + cmd.access_flags, &udata); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto err_put; @@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, mr->device = pd->device; mr->pd = pd; - mr->uobject = &obj->uobject; + mr->uobject = uobj; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); - obj->uobject.object = mr; - ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject); + uobj->object = mr; + ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); if (ret) goto err_unreg; memset(&resp, 0, sizeof resp); resp.lkey = mr->lkey; resp.rkey = mr->rkey; - resp.mr_handle = obj->uobject.id; + resp.mr_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { @@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, put_pd_read(pd); mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + list_add_tail(&uobj->list, &file->ucontext->mr_list); mutex_unlock(&file->mutex); - obj->uobject.live = 1; + uobj->live = 1; - up_write(&obj->uobject.mutex); + up_write(&uobj->mutex); return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject); + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); err_unreg: ib_dereg_mr(mr); @@ -676,11 +663,8 @@ err_unreg: err_put: put_pd_read(pd); -err_release: - ib_umem_release(file->device->ib_dev, &obj->umem); - err_free: - put_uobj_write(&obj->uobject); + put_uobj_write(uobj); return ret; } @@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, struct ib_uverbs_dereg_mr cmd; struct ib_mr *mr; struct ib_uobject *uobj; - struct ib_umem_object *memobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, if (!uobj) return -EINVAL; - memobj = container_of(uobj, struct ib_umem_object, uobject); - mr = uobj->object; + mr = uobj->object; ret = ib_dereg_mr(mr); if (!ret) @@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, list_del(&uobj->list); mutex_unlock(&file->mutex); - ib_umem_release(file->device->ib_dev, &memobj->umem); - put_uobj(uobj); return in_len; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index d44e54799651..14d7ccd89195 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, if (!context) return 0; + context->closing = 1; + list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { struct ib_ah *ah = uobj->object; @@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; - struct ib_device *mrdev = mr->device; - struct ib_umem_object *memobj; idr_remove_uobj(&ib_uverbs_mr_idr, uobj); ib_dereg_mr(mr); - - memobj = container_of(uobj, struct ib_umem_object, uobject); - ib_umem_release_on_close(mrdev, &memobj->umem); - - kfree(memobj); + kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { @@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void) unregister_filesystem(&uverbs_event_fs); class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); - flush_scheduled_work(); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mw_idr); |