diff options
157 files changed, 6026 insertions, 3412 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 9d5eeff51b5f..1e53fe99eb63 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3506,7 +3506,6 @@ F: drivers/net/ethernet/cisco/enic/ CISCO VIC LOW LATENCY NIC DRIVER M: Christian Benvenuti <[email protected]> -M: Dave Goodell <[email protected]> S: Supported F: drivers/infiniband/hw/usnic/ @@ -7562,9 +7561,8 @@ S: Maintained F: drivers/firmware/iscsi_ibft* ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR -M: Or Gerlitz <[email protected]> M: Sagi Grimberg <[email protected]> -M: Roi Dayan <[email protected]> +M: Max Gurtovoy <[email protected]> S: Supported W: http://www.openfabrics.org @@ -12621,15 +12619,21 @@ S: Maintained F: drivers/scsi/sr* SCSI RDMA PROTOCOL (SRP) INITIATOR -M: Bart Van Assche <[email protected]> +M: Bart Van Assche <[email protected]> S: Supported -W: http://www.openfabrics.org Q: http://patchwork.kernel.org/project/linux-rdma/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/dad/srp-initiator.git F: drivers/infiniband/ulp/srp/ F: include/scsi/srp.h +SCSI RDMA PROTOCOL (SRP) TARGET +M: Bart Van Assche <[email protected]> +S: Supported +Q: http://patchwork.kernel.org/project/linux-rdma/list/ +F: drivers/infiniband/ulp/srpt/ + SCSI SG DRIVER M: Doug Gilbert <[email protected]> diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 4f32c4062fb6..1b817fdb97a4 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -315,19 +315,17 @@ static int dst_fetch_ha(const struct dst_entry *dst, int ret = 0; n = dst_neigh_lookup(dst, daddr); + if (!n) + return -ENODATA; - rcu_read_lock(); - if (!n || !(n->nud_state & NUD_VALID)) { - if (n) - neigh_event_send(n, NULL); + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); ret = -ENODATA; } else { rdma_copy_addr(dev_addr, dst->dev, n->ha); } - rcu_read_unlock(); - if (n) - neigh_release(n); + neigh_release(n); return ret; } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 81d66f56e38f..0bee1f4b914e 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -66,20 +66,28 @@ enum gid_attr_find_mask { GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, }; -enum gid_table_entry_props { - GID_TABLE_ENTRY_INVALID = 1UL << 0, - GID_TABLE_ENTRY_DEFAULT = 1UL << 1, +enum gid_table_entry_state { + GID_TABLE_ENTRY_INVALID = 1, + GID_TABLE_ENTRY_VALID = 2, + /* + * Indicates that entry is pending to be removed, there may + * be active users of this GID entry. + * When last user of the GID entry releases reference to it, + * GID entry is detached from the table. + */ + GID_TABLE_ENTRY_PENDING_DEL = 3, }; struct ib_gid_table_entry { - unsigned long props; - union ib_gid gid; - struct ib_gid_attr attr; - void *context; + struct kref kref; + struct work_struct del_work; + struct ib_gid_attr attr; + void *context; + enum gid_table_entry_state state; }; struct ib_gid_table { - int sz; + int sz; /* In RoCE, adding a GID to the table requires: * (a) Find if this GID is already exists. * (b) Find a free space. @@ -91,13 +99,16 @@ struct ib_gid_table { * **/ /* Any writer to data_vec must hold this lock and the write side of - * rwlock. readers must hold only rwlock. All writers must be in a + * rwlock. Readers must hold only rwlock. All writers must be in a * sleepable context. */ - struct mutex lock; - /* rwlock protects data_vec[ix]->props. */ - rwlock_t rwlock; - struct ib_gid_table_entry *data_vec; + struct mutex lock; + /* rwlock protects data_vec[ix]->state and entry pointer. + */ + rwlock_t rwlock; + struct ib_gid_table_entry **data_vec; + /* bit field, each bit indicates the index of default GID */ + u32 default_gid_indices; }; static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) @@ -135,6 +146,19 @@ bool rdma_is_zero_gid(const union ib_gid *gid) } EXPORT_SYMBOL(rdma_is_zero_gid); +/** is_gid_index_default - Check if a given index belongs to + * reserved default GIDs or not. + * @table: GID table pointer + * @index: Index to check in GID table + * Returns true if index is one of the reserved default GID index otherwise + * returns false. + */ +static bool is_gid_index_default(const struct ib_gid_table *table, + unsigned int index) +{ + return index < 32 && (BIT(index) & table->default_gid_indices); +} + int ib_cache_gid_parse_type_str(const char *buf) { unsigned int i; @@ -164,26 +188,136 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port) return device->cache.ports[port - rdma_start_port(device)].gid; } -static void del_roce_gid(struct ib_device *device, u8 port_num, - struct ib_gid_table *table, int ix) +static bool is_gid_entry_free(const struct ib_gid_table_entry *entry) +{ + return !entry; +} + +static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry) +{ + return entry && entry->state == GID_TABLE_ENTRY_VALID; +} + +static void schedule_free_gid(struct kref *kref) { + struct ib_gid_table_entry *entry = + container_of(kref, struct ib_gid_table_entry, kref); + + queue_work(ib_wq, &entry->del_work); +} + +static void free_gid_entry_locked(struct ib_gid_table_entry *entry) +{ + struct ib_device *device = entry->attr.device; + u8 port_num = entry->attr.port_num; + struct ib_gid_table *table = rdma_gid_table(device, port_num); + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - device->name, port_num, ix, - table->data_vec[ix].gid.raw); + device->name, port_num, entry->attr.index, + entry->attr.gid.raw); + + if (rdma_cap_roce_gid_table(device, port_num) && + entry->state != GID_TABLE_ENTRY_INVALID) + device->del_gid(&entry->attr, &entry->context); + + write_lock_irq(&table->rwlock); - if (rdma_cap_roce_gid_table(device, port_num)) - device->del_gid(&table->data_vec[ix].attr, - &table->data_vec[ix].context); - dev_put(table->data_vec[ix].attr.ndev); + /* + * The only way to avoid overwriting NULL in table is + * by comparing if it is same entry in table or not! + * If new entry in table is added by the time we free here, + * don't overwrite the table entry. + */ + if (entry == table->data_vec[entry->attr.index]) + table->data_vec[entry->attr.index] = NULL; + /* Now this index is ready to be allocated */ + write_unlock_irq(&table->rwlock); + + if (entry->attr.ndev) + dev_put(entry->attr.ndev); + kfree(entry); } -static int add_roce_gid(struct ib_gid_table *table, - const union ib_gid *gid, - const struct ib_gid_attr *attr) +static void free_gid_entry(struct kref *kref) +{ + struct ib_gid_table_entry *entry = + container_of(kref, struct ib_gid_table_entry, kref); + + free_gid_entry_locked(entry); +} + +/** + * free_gid_work - Release reference to the GID entry + * @work: Work structure to refer to GID entry which needs to be + * deleted. + * + * free_gid_work() frees the entry from the HCA's hardware table + * if provider supports it. It releases reference to netdevice. + */ +static void free_gid_work(struct work_struct *work) +{ + struct ib_gid_table_entry *entry = + container_of(work, struct ib_gid_table_entry, del_work); + struct ib_device *device = entry->attr.device; + u8 port_num = entry->attr.port_num; + struct ib_gid_table *table = rdma_gid_table(device, port_num); + + mutex_lock(&table->lock); + free_gid_entry_locked(entry); + mutex_unlock(&table->lock); +} + +static struct ib_gid_table_entry * +alloc_gid_entry(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry; - int ix = attr->index; - int ret = 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + kref_init(&entry->kref); + memcpy(&entry->attr, attr, sizeof(*attr)); + if (entry->attr.ndev) + dev_hold(entry->attr.ndev); + INIT_WORK(&entry->del_work, free_gid_work); + entry->state = GID_TABLE_ENTRY_INVALID; + return entry; +} + +static void store_gid_entry(struct ib_gid_table *table, + struct ib_gid_table_entry *entry) +{ + entry->state = GID_TABLE_ENTRY_VALID; + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + entry->attr.device->name, entry->attr.port_num, + entry->attr.index, entry->attr.gid.raw); + + lockdep_assert_held(&table->lock); + write_lock_irq(&table->rwlock); + table->data_vec[entry->attr.index] = entry; + write_unlock_irq(&table->rwlock); +} + +static void get_gid_entry(struct ib_gid_table_entry *entry) +{ + kref_get(&entry->kref); +} + +static void put_gid_entry(struct ib_gid_table_entry *entry) +{ + kref_put(&entry->kref, schedule_free_gid); +} + +static void put_gid_entry_locked(struct ib_gid_table_entry *entry) +{ + kref_put(&entry->kref, free_gid_entry); +} + +static int add_roce_gid(struct ib_gid_table_entry *entry) +{ + const struct ib_gid_attr *attr = &entry->attr; + int ret; if (!attr->ndev) { pr_err("%s NULL netdev device=%s port=%d index=%d\n", @@ -191,38 +325,22 @@ static int add_roce_gid(struct ib_gid_table *table, attr->index); return -EINVAL; } - - entry = &table->data_vec[ix]; - if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) { - WARN(1, "GID table corruption device=%s port=%d index=%d\n", - attr->device->name, attr->port_num, - attr->index); - return -EINVAL; - } - if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { - ret = attr->device->add_gid(gid, attr, &entry->context); + ret = attr->device->add_gid(attr, &entry->context); if (ret) { pr_err("%s GID add failed device=%s port=%d index=%d\n", __func__, attr->device->name, attr->port_num, attr->index); - goto add_err; + return ret; } } - dev_hold(attr->ndev); - -add_err: - if (!ret) - pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - attr->device->name, attr->port_num, ix, gid->raw); - return ret; + return 0; } /** * add_modify_gid - Add or modify GID table entry * * @table: GID table in which GID to be added or modified - * @gid: GID content * @attr: Attributes of the GID * * Returns 0 on success or appropriate error code. It accepts zero @@ -230,34 +348,42 @@ add_err: * GID. However such zero GIDs are not added to the cache. */ static int add_modify_gid(struct ib_gid_table *table, - const union ib_gid *gid, const struct ib_gid_attr *attr) { - int ret; + struct ib_gid_table_entry *entry; + int ret = 0; + + /* + * Invalidate any old entry in the table to make it safe to write to + * this index. + */ + if (is_gid_entry_valid(table->data_vec[attr->index])) + put_gid_entry(table->data_vec[attr->index]); + + /* + * Some HCA's report multiple GID entries with only one valid GID, and + * leave other unused entries as the zero GID. Convert zero GIDs to + * empty table entries instead of storing them. + */ + if (rdma_is_zero_gid(&attr->gid)) + return 0; + + entry = alloc_gid_entry(attr); + if (!entry) + return -ENOMEM; if (rdma_protocol_roce(attr->device, attr->port_num)) { - ret = add_roce_gid(table, gid, attr); + ret = add_roce_gid(entry); if (ret) - return ret; - } else { - /* - * Some HCA's report multiple GID entries with only one - * valid GID, but remaining as zero GID. - * So ignore such behavior for IB link layer and don't - * fail the call, but don't add such entry to GID cache. - */ - if (rdma_is_zero_gid(gid)) - return 0; + goto done; } - lockdep_assert_held(&table->lock); - memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid)); - memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr)); - - write_lock_irq(&table->rwlock); - table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID; - write_unlock_irq(&table->rwlock); + store_gid_entry(table, entry); return 0; + +done: + put_gid_entry(entry); + return ret; } /** @@ -272,16 +398,25 @@ static int add_modify_gid(struct ib_gid_table *table, static void del_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix) { + struct ib_gid_table_entry *entry; + lockdep_assert_held(&table->lock); + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + ib_dev->name, port, ix, + table->data_vec[ix]->attr.gid.raw); + write_lock_irq(&table->rwlock); - table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; + entry = table->data_vec[ix]; + entry->state = GID_TABLE_ENTRY_PENDING_DEL; + /* + * For non RoCE protocol, GID entry slot is ready to use. + */ + if (!rdma_protocol_roce(ib_dev, port)) + table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); - if (rdma_protocol_roce(ib_dev, port)) - del_roce_gid(ib_dev, port, table, ix); - memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid)); - memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr)); - table->data_vec[ix].context = NULL; + put_gid_entry_locked(entry); } /* rwlock should be read locked, or lock should be held */ @@ -294,8 +429,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, int empty = pempty ? -1 : 0; while (i < table->sz && (found < 0 || empty < 0)) { - struct ib_gid_table_entry *data = &table->data_vec[i]; - struct ib_gid_attr *attr = &data->attr; + struct ib_gid_table_entry *data = table->data_vec[i]; + struct ib_gid_attr *attr; int curr_index = i; i++; @@ -306,9 +441,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, * so lookup free slot only if requested. */ if (pempty && empty < 0) { - if (data->props & GID_TABLE_ENTRY_INVALID && - (default_gid == - !!(data->props & GID_TABLE_ENTRY_DEFAULT))) { + if (is_gid_entry_free(data) && + default_gid == + is_gid_index_default(table, curr_index)) { /* * Found an invalid (free) entry; allocate it. * If default GID is requested, then our @@ -323,22 +458,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, /* * Additionally find_gid() is used to find valid entry during - * lookup operation, where validity needs to be checked. So - * find the empty entry first to continue to search for a free - * slot and ignore its INVALID flag. + * lookup operation; so ignore the entries which are marked as + * pending for removal and the entries which are marked as + * invalid. */ - if (data->props & GID_TABLE_ENTRY_INVALID) + if (!is_gid_entry_valid(data)) continue; if (found >= 0) continue; + attr = &data->attr; if (mask & GID_ATTR_FIND_MASK_GID_TYPE && attr->gid_type != val->gid_type) continue; if (mask & GID_ATTR_FIND_MASK_GID && - memcmp(gid, &data->gid, sizeof(*gid))) + memcmp(gid, &data->attr.gid, sizeof(*gid))) continue; if (mask & GID_ATTR_FIND_MASK_NETDEV && @@ -346,8 +482,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, continue; if (mask & GID_ATTR_FIND_MASK_DEFAULT && - !!(data->props & GID_TABLE_ENTRY_DEFAULT) != - default_gid) + is_gid_index_default(table, curr_index) != default_gid) continue; found = curr_index; @@ -396,7 +531,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, attr->device = ib_dev; attr->index = empty; attr->port_num = port; - ret = add_modify_gid(table, gid, attr); + attr->gid = *gid; + ret = add_modify_gid(table, attr); if (!ret) dispatch_gid_change_event(ib_dev, port); @@ -492,7 +628,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); for (ix = 0; ix < table->sz; ix++) { - if (table->data_vec[ix].attr.ndev == ndev) { + if (is_gid_entry_valid(table->data_vec[ix]) && + table->data_vec[ix]->attr.ndev == ndev) { del_gid(ib_dev, port, table, ix); deleted = true; } @@ -506,103 +643,37 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, return 0; } -static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, - union ib_gid *gid, struct ib_gid_attr *attr) -{ - struct ib_gid_table *table; - - table = rdma_gid_table(ib_dev, port); - - if (index < 0 || index >= table->sz) - return -EINVAL; - - if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) - return -EINVAL; - - memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); - if (attr) { - memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); - if (attr->ndev) - dev_hold(attr->ndev); - } - - return 0; -} - -static int _ib_cache_gid_table_find(struct ib_device *ib_dev, - const union ib_gid *gid, - const struct ib_gid_attr *val, - unsigned long mask, - u8 *port, u16 *index) -{ - struct ib_gid_table *table; - u8 p; - int local_index; - unsigned long flags; - - for (p = 0; p < ib_dev->phys_port_cnt; p++) { - table = ib_dev->cache.ports[p].gid; - read_lock_irqsave(&table->rwlock, flags); - local_index = find_gid(table, gid, val, false, mask, NULL); - if (local_index >= 0) { - if (index) - *index = local_index; - if (port) - *port = p + rdma_start_port(ib_dev); - read_unlock_irqrestore(&table->rwlock, flags); - return 0; - } - read_unlock_irqrestore(&table->rwlock, flags); - } - - return -ENOENT; -} - -static int ib_cache_gid_find(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, u8 *port, - u16 *index) -{ - unsigned long mask = GID_ATTR_FIND_MASK_GID | - GID_ATTR_FIND_MASK_GID_TYPE; - struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; - - if (ndev) - mask |= GID_ATTR_FIND_MASK_NETDEV; - - return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, - mask, port, index); -} - /** - * ib_find_cached_gid_by_port - Returns the GID table index where a specified - * GID value occurs. It searches for the specified GID value in the local - * software cache. + * rdma_find_gid_by_port - Returns the GID entry attributes when it finds + * a valid GID entry for given search parameters. It searches for the specified + * GID value in the local software cache. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @port_num: The port number of the device where the GID value should be * searched. - * @ndev: In RoCE, the net device of the device. Null means ignore. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. + * @ndev: In RoCE, the net device of the device. NULL means ignore. + * + * Returns sgid attributes if the GID is found with valid reference or + * returns ERR_PTR for the error. + * The caller must invoke rdma_put_gid_attr() to release the reference. */ -int ib_find_cached_gid_by_port(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - u8 port, struct net_device *ndev, - u16 *index) +const struct ib_gid_attr * +rdma_find_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + enum ib_gid_type gid_type, + u8 port, struct net_device *ndev) { int local_index; struct ib_gid_table *table; unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; + const struct ib_gid_attr *attr; unsigned long flags; if (!rdma_is_port_valid(ib_dev, port)) - return -ENOENT; + return ERR_PTR(-ENOENT); table = rdma_gid_table(ib_dev, port); @@ -612,89 +683,73 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev, read_lock_irqsave(&table->rwlock, flags); local_index = find_gid(table, gid, &val, false, mask, NULL); if (local_index >= 0) { - if (index) - *index = local_index; + get_gid_entry(table->data_vec[local_index]); + attr = &table->data_vec[local_index]->attr; read_unlock_irqrestore(&table->rwlock, flags); - return 0; + return attr; } read_unlock_irqrestore(&table->rwlock, flags); - return -ENOENT; + return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL(ib_find_cached_gid_by_port); +EXPORT_SYMBOL(rdma_find_gid_by_port); /** - * ib_cache_gid_find_by_filter - Returns the GID table index where a specified - * GID value occurs + * rdma_find_gid_by_filter - Returns the GID table attribute where a + * specified GID value occurs * @device: The device to query. * @gid: The GID value to search for. - * @port_num: The port number of the device where the GID value could be + * @port: The port number of the device where the GID value could be * searched. * @filter: The filter function is executed on any matching GID in the table. * If the filter function returns true, the corresponding index is returned, * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. * - * ib_cache_gid_find_by_filter() searches for the specified GID value + * rdma_find_gid_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. - * This function is only supported on RoCE ports. * */ -static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, - const union ib_gid *gid, - u8 port, - bool (*filter)(const union ib_gid *, - const struct ib_gid_attr *, - void *), - void *context, - u16 *index) +const struct ib_gid_attr *rdma_find_gid_by_filter( + struct ib_device *ib_dev, const union ib_gid *gid, u8 port, + bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, + void *), + void *context) { + const struct ib_gid_attr *res = ERR_PTR(-ENOENT); struct ib_gid_table *table; - unsigned int i; unsigned long flags; - bool found = false; - + unsigned int i; - if (!rdma_is_port_valid(ib_dev, port) || - !rdma_protocol_roce(ib_dev, port)) - return -EPROTONOSUPPORT; + if (!rdma_is_port_valid(ib_dev, port)) + return ERR_PTR(-EINVAL); table = rdma_gid_table(ib_dev, port); read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { - struct ib_gid_attr attr; + struct ib_gid_table_entry *entry = table->data_vec[i]; - if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) + if (!is_gid_entry_valid(entry)) continue; - if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) + if (memcmp(gid, &entry->attr.gid, sizeof(*gid))) continue; - memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); - - if (filter(gid, &attr, context)) { - found = true; - if (index) - *index = i; + if (filter(gid, &entry->attr, context)) { + get_gid_entry(entry); + res = &entry->attr; break; } } read_unlock_irqrestore(&table->rwlock, flags); - - if (!found) - return -ENOENT; - return 0; + return res; } static struct ib_gid_table *alloc_gid_table(int sz) { - struct ib_gid_table *table = - kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); - int i; + struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return NULL; @@ -707,12 +762,6 @@ static struct ib_gid_table *alloc_gid_table(int sz) table->sz = sz; rwlock_init(&table->rwlock); - - /* Mark all entries as invalid so that allocator can allocate - * one of the invalid (free) entry. - */ - for (i = 0; i < sz; i++) - table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID; return table; err_free_table: @@ -720,12 +769,30 @@ err_free_table: return NULL; } -static void release_gid_table(struct ib_gid_table *table) +static void release_gid_table(struct ib_device *device, u8 port, + struct ib_gid_table *table) { - if (table) { - kfree(table->data_vec); - kfree(table); + bool leak = false; + int i; + + if (!table) + return; + + for (i = 0; i < table->sz; i++) { + if (is_gid_entry_free(table->data_vec[i])) + continue; + if (kref_read(&table->data_vec[i]->kref) > 1) { + pr_err("GID entry ref leak for %s (index %d) ref=%d\n", + device->name, i, + kref_read(&table->data_vec[i]->kref)); + leak = true; + } } + if (leak) + return; + + kfree(table->data_vec); + kfree(table); } static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, @@ -739,7 +806,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { - if (!rdma_is_zero_gid(&table->data_vec[i].gid)) { + if (is_gid_entry_valid(table->data_vec[i])) { del_gid(ib_dev, port, table, i); deleted = true; } @@ -757,12 +824,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, { union ib_gid gid = { }; struct ib_gid_attr gid_attr; - struct ib_gid_table *table; unsigned int gid_type; unsigned long mask; - table = rdma_gid_table(ib_dev, port); - mask = GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_DEFAULT | GID_ATTR_FIND_MASK_NETDEV; @@ -792,19 +856,12 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port, unsigned int i; unsigned long roce_gid_type_mask; unsigned int num_default_gids; - unsigned int current_gid = 0; roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); num_default_gids = hweight_long(roce_gid_type_mask); - for (i = 0; i < num_default_gids && i < table->sz; i++) { - struct ib_gid_table_entry *entry = &table->data_vec[i]; - - entry->props |= GID_TABLE_ENTRY_DEFAULT; - current_gid = find_next_bit(&roce_gid_type_mask, - BITS_PER_LONG, - current_gid); - entry->attr.gid_type = current_gid++; - } + /* Reserve starting indices for default GIDs */ + for (i = 0; i < num_default_gids && i < table->sz; i++) + table->default_gid_indices |= BIT(i); } @@ -815,7 +872,7 @@ static void gid_table_release_one(struct ib_device *ib_dev) for (port = 0; port < ib_dev->phys_port_cnt; port++) { table = ib_dev->cache.ports[port].gid; - release_gid_table(table); + release_gid_table(ib_dev, port, table); ib_dev->cache.ports[port].gid = NULL; } } @@ -869,69 +926,94 @@ static int gid_table_setup_one(struct ib_device *ib_dev) return err; } -int ib_get_cached_gid(struct ib_device *device, - u8 port_num, - int index, - union ib_gid *gid, - struct ib_gid_attr *gid_attr) +/** + * rdma_query_gid - Read the GID content from the GID software cache + * @device: Device to query the GID + * @port_num: Port number of the device + * @index: Index of the GID table entry to read + * @gid: Pointer to GID where to store the entry's GID + * + * rdma_query_gid() only reads the GID entry content for requested device, + * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't + * hold any reference to the GID table entry in the HCA or software cache. + * + * Returns 0 on success or appropriate error code. + * + */ +int rdma_query_gid(struct ib_device *device, u8 port_num, + int index, union ib_gid *gid) { - int res; - unsigned long flags; struct ib_gid_table *table; + unsigned long flags; + int res = -EINVAL; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); - res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); - read_unlock_irqrestore(&table->rwlock, flags); + if (index < 0 || index >= table->sz || + !is_gid_entry_valid(table->data_vec[index])) + goto done; + + memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); + res = 0; + +done: + read_unlock_irqrestore(&table->rwlock, flags); return res; } -EXPORT_SYMBOL(ib_get_cached_gid); +EXPORT_SYMBOL(rdma_query_gid); /** - * ib_find_cached_gid - Returns the port number and GID table index where - * a specified GID value occurs. + * rdma_find_gid - Returns SGID attributes if the matching GID is found. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @ndev: In RoCE, the net device of the device. NULL means ignore. - * @port_num: The port number of the device where the GID value was found. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. * - * ib_find_cached_gid() searches for the specified GID value in - * the local software cache. + * rdma_find_gid() searches for the specified GID value in the software cache. + * + * Returns GID attributes if a valid GID is found or returns ERR_PTR for the + * error. The caller must invoke rdma_put_gid_attr() to release the reference. + * */ -int ib_find_cached_gid(struct ib_device *device, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, - u8 *port_num, - u16 *index) -{ - return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); -} -EXPORT_SYMBOL(ib_find_cached_gid); - -int ib_find_gid_by_filter(struct ib_device *device, - const union ib_gid *gid, - u8 port_num, - bool (*filter)(const union ib_gid *gid, - const struct ib_gid_attr *, - void *), - void *context, u16 *index) +const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, + const union ib_gid *gid, + enum ib_gid_type gid_type, + struct net_device *ndev) { - /* Only RoCE GID table supports filter function */ - if (!rdma_protocol_roce(device, port_num) && filter) - return -EPROTONOSUPPORT; + unsigned long mask = GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_GID_TYPE; + struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; + u8 p; + + if (ndev) + mask |= GID_ATTR_FIND_MASK_NETDEV; + + for (p = 0; p < device->phys_port_cnt; p++) { + struct ib_gid_table *table; + unsigned long flags; + int index; + + table = device->cache.ports[p].gid; + read_lock_irqsave(&table->rwlock, flags); + index = find_gid(table, gid, &gid_attr_val, false, mask, NULL); + if (index >= 0) { + const struct ib_gid_attr *attr; + + get_gid_entry(table->data_vec[index]); + attr = &table->data_vec[index]->attr; + read_unlock_irqrestore(&table->rwlock, flags); + return attr; + } + read_unlock_irqrestore(&table->rwlock, flags); + } - return ib_cache_gid_find_by_filter(device, gid, - port_num, filter, - context, index); + return ERR_PTR(-ENOENT); } +EXPORT_SYMBOL(rdma_find_gid); int ib_get_cached_pkey(struct ib_device *device, u8 port_num, @@ -1089,12 +1171,92 @@ int ib_get_cached_port_state(struct ib_device *device, } EXPORT_SYMBOL(ib_get_cached_port_state); +/** + * rdma_get_gid_attr - Returns GID attributes for a port of a device + * at a requested gid_index, if a valid GID entry exists. + * @device: The device to query. + * @port_num: The port number on the device where the GID value + * is to be queried. + * @index: Index of the GID table entry whose attributes are to + * be queried. + * + * rdma_get_gid_attr() acquires reference count of gid attributes from the + * cached GID table. Caller must invoke rdma_put_gid_attr() to release + * reference to gid attribute regardless of link layer. + * + * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error + * code. + */ +const struct ib_gid_attr * +rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index) +{ + const struct ib_gid_attr *attr = ERR_PTR(-EINVAL); + struct ib_gid_table *table; + unsigned long flags; + + if (!rdma_is_port_valid(device, port_num)) + return ERR_PTR(-EINVAL); + + table = rdma_gid_table(device, port_num); + if (index < 0 || index >= table->sz) + return ERR_PTR(-EINVAL); + + read_lock_irqsave(&table->rwlock, flags); + if (!is_gid_entry_valid(table->data_vec[index])) + goto done; + + get_gid_entry(table->data_vec[index]); + attr = &table->data_vec[index]->attr; +done: + read_unlock_irqrestore(&table->rwlock, flags); + return attr; +} +EXPORT_SYMBOL(rdma_get_gid_attr); + +/** + * rdma_put_gid_attr - Release reference to the GID attribute + * @attr: Pointer to the GID attribute whose reference + * needs to be released. + * + * rdma_put_gid_attr() must be used to release reference whose + * reference is acquired using rdma_get_gid_attr() or any APIs + * which returns a pointer to the ib_gid_attr regardless of link layer + * of IB or RoCE. + * + */ +void rdma_put_gid_attr(const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry = + container_of(attr, struct ib_gid_table_entry, attr); + + put_gid_entry(entry); +} +EXPORT_SYMBOL(rdma_put_gid_attr); + +/** + * rdma_hold_gid_attr - Get reference to existing GID attribute + * + * @attr: Pointer to the GID attribute whose reference + * needs to be taken. + * + * Increase the reference count to a GID attribute to keep it from being + * freed. Callers are required to already be holding a reference to attribute. + * + */ +void rdma_hold_gid_attr(const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry = + container_of(attr, struct ib_gid_table_entry, attr); + + get_gid_entry(entry); +} +EXPORT_SYMBOL(rdma_hold_gid_attr); + static int config_non_roce_gid_cache(struct ib_device *device, u8 port, int gid_tbl_len) { struct ib_gid_attr gid_attr = {}; struct ib_gid_table *table; - union ib_gid gid; int ret = 0; int i; @@ -1106,14 +1268,14 @@ static int config_non_roce_gid_cache(struct ib_device *device, for (i = 0; i < gid_tbl_len; ++i) { if (!device->query_gid) continue; - ret = device->query_gid(device, port, i, &gid); + ret = device->query_gid(device, port, i, &gid_attr.gid); if (ret) { pr_warn("query_gid failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } gid_attr.index = i; - add_modify_gid(table, &gid, &gid_attr); + add_modify_gid(table, &gid_attr); } err: mutex_unlock(&table->lock); @@ -1128,13 +1290,10 @@ static void ib_cache_update(struct ib_device *device, struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; int i; int ret; - struct ib_gid_table *table; if (!rdma_is_port_valid(device, port)) return; - table = rdma_gid_table(device, port); - tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) return; @@ -1296,4 +1455,9 @@ void ib_cache_cleanup_one(struct ib_device *device) ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); + + /* + * Flush the wq second time for any pending GID delete work. + */ + flush_workqueue(ib_wq); } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 27a7b0a2e27a..4724cb09b69d 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -474,7 +474,7 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, if (ret) return ret; - memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } @@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, return ret; } -static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path) +static struct cm_port * +get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) { struct cm_device *cm_dev; struct cm_port *port = NULL; unsigned long flags; - u8 p; - struct net_device *ndev = ib_get_ndev_from_path(path); - - read_lock_irqsave(&cm.device_lock, flags); - list_for_each_entry(cm_dev, &cm.device_list, list) { - if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - sa_conv_pathrec_to_gid_type(path), - ndev, &p, NULL)) { - port = cm_dev->port[p - 1]; - break; + + if (attr) { + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + if (cm_dev->ib_device == attr->device) { + port = cm_dev->port[attr->port_num - 1]; + break; + } + } + read_unlock_irqrestore(&cm.device_lock, flags); + } else { + /* SGID attribute can be NULL in following + * conditions. + * (a) Alternative path + * (b) IB link layer without GRH + * (c) LAP send messages + */ + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + attr = rdma_find_gid(cm_dev->ib_device, + &path->sgid, + sa_conv_pathrec_to_gid_type(path), + NULL); + if (!IS_ERR(attr)) { + port = cm_dev->port[attr->port_num - 1]; + break; + } } + read_unlock_irqrestore(&cm.device_lock, flags); + if (port) + rdma_put_gid_attr(attr); } - read_unlock_irqrestore(&cm.device_lock, flags); - - if (ndev) - dev_put(ndev); return port; } -static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, +static int cm_init_av_by_path(struct sa_path_rec *path, + const struct ib_gid_attr *sgid_attr, + struct cm_av *av, struct cm_id_private *cm_id_priv) { struct rdma_ah_attr new_ah_attr; @@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, struct cm_port *port; int ret; - port = get_cm_port_from_path(path); + port = get_cm_port_from_path(path, sgid_attr); if (!port) return -EINVAL; cm_dev = port->cm_dev; @@ -554,22 +573,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, /* * av->ah_attr might be initialized based on wc or during - * request processing time. So initialize a new ah_attr on stack. + * request processing time which might have reference to sgid_attr. + * So initialize a new ah_attr on stack. * If initialization fails, old ah_attr is used for sending any * responses. If initialization is successful, than new ah_attr - * is used by overwriting the old one. + * is used by overwriting the old one. So that right ah_attr + * can be used to return an error response. */ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path, - &new_ah_attr); + &new_ah_attr, sgid_attr); if (ret) return ret; av->timeout = path->packet_life_time + 1; ret = add_cm_id_to_port_list(cm_id_priv, av, port); - if (ret) + if (ret) { + rdma_destroy_ah_attr(&new_ah_attr); return ret; - memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + } + rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } @@ -1091,6 +1114,9 @@ retest: wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); + + rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr); + rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr); kfree(cm_id_priv->private_data); kfree(cm_id_priv); } @@ -1230,14 +1256,12 @@ new_id: } EXPORT_SYMBOL(ib_cm_insert_listen); -static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, - enum cm_msg_sequence msg_seq) +static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) { u64 hi_tid, low_tid; hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; - low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | - (msg_seq << 30)); + low_tid = (u64)cm_id_priv->id.local_id; return cpu_to_be64(hi_tid | low_tid); } @@ -1265,7 +1289,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, pri_path->opa.slid); cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); + cm_form_tid(cm_id_priv)); req_msg->local_comm_id = cm_id_priv->id.local_id; req_msg->service_id = param->service_id; @@ -1413,12 +1437,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, + ret = cm_init_av_by_path(param->primary_path, + param->ppath_sgid_attr, &cm_id_priv->av, cm_id_priv); if (ret) goto error1; if (param->alternate_path) { - ret = cm_init_av_by_path(param->alternate_path, + ret = cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto error1; @@ -1646,7 +1671,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work, (ib_is_opa_gid(&path->sgid))) { union ib_gid sgid; - if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) { + if (rdma_query_gid(dev, port_num, 0, &sgid)) { dev_warn(&dev->dev, "Error updating sgid in CM request\n"); return; @@ -1914,9 +1939,8 @@ static int cm_req_handler(struct cm_work *work) struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_req_msg *req_msg; - union ib_gid gid; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh; + const struct ib_gid_attr *gid_attr; int ret; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1961,24 +1985,13 @@ static int cm_req_handler(struct cm_work *work) if (cm_req_has_alt_path(req_msg)) memset(&work->path[1], 0, sizeof(work->path[1])); grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr); - ret = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, - grh->sgid_index, - &gid, &gid_attr); - if (ret) { - ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0); - goto rejected; - } + gid_attr = grh->sgid_attr; - if (gid_attr.ndev) { + if (gid_attr && gid_attr->ndev) { work->path[0].rec_type = - sa_conv_gid_to_pathrec_type(gid_attr.gid_type); - sa_path_set_ifindex(&work->path[0], - gid_attr.ndev->ifindex); - sa_path_set_ndev(&work->path[0], - dev_net(gid_attr.ndev)); - dev_put(gid_attr.ndev); + sa_conv_gid_to_pathrec_type(gid_attr->gid_type); } else { + /* If no GID attribute or ndev is null, it is not RoCE. */ cm_path_set_rec_type(work->port->cm_dev->ib_device, work->port->port_num, &work->path[0], @@ -1992,15 +2005,14 @@ static int cm_req_handler(struct cm_work *work) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); work->path[0].hop_limit = grh->hop_limit; - ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, + ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av, cm_id_priv); if (ret) { int err; - err = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, 0, - &work->path[0].sgid, - NULL); + err = rdma_query_gid(work->port->cm_dev->ib_device, + work->port->port_num, 0, + &work->path[0].sgid); if (err) ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, NULL, 0, NULL, 0); @@ -2012,8 +2024,8 @@ static int cm_req_handler(struct cm_work *work) goto rejected; } if (cm_req_has_alt_path(req_msg)) { - ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, - cm_id_priv); + ret = cm_init_av_by_path(&work->path[1], NULL, + &cm_id_priv->alt_av, cm_id_priv); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, @@ -2451,7 +2463,7 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, u8 private_data_len) { cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); + cm_form_tid(cm_id_priv)); dreq_msg->local_comm_id = cm_id_priv->id.local_id; dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); @@ -3082,7 +3094,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, alt_ext = opa_is_extended_lid(alternate_path->opa.dlid, alternate_path->opa.slid); cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); + cm_form_tid(cm_id_priv)); lap_msg->local_comm_id = cm_id_priv->id.local_id; lap_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); @@ -3136,7 +3148,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, + ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto out; @@ -3279,7 +3291,7 @@ static int cm_lap_handler(struct cm_work *work) if (ret) goto unlock; - cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, + cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; @@ -3458,7 +3470,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, struct ib_cm_sidr_req_param *param) { cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); + cm_form_tid(cm_id_priv)); sidr_req_msg->request_id = cm_id_priv->id.local_id; sidr_req_msg->pkey = param->path->pkey; sidr_req_msg->service_id = param->service_id; @@ -3481,7 +3493,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); + ret = cm_init_av_by_path(param->path, param->sgid_attr, + &cm_id_priv->av, + cm_id_priv); if (ret) goto out; @@ -3665,7 +3679,8 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_sidr_rep); -static void cm_format_sidr_rep_event(struct cm_work *work) +static void cm_format_sidr_rep_event(struct cm_work *work, + const struct cm_id_private *cm_id_priv) { struct cm_sidr_rep_msg *sidr_rep_msg; struct ib_cm_sidr_rep_event_param *param; @@ -3678,6 +3693,7 @@ static void cm_format_sidr_rep_event(struct cm_work *work) param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); param->info = &sidr_rep_msg->info; param->info_len = sidr_rep_msg->info_length; + param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &sidr_rep_msg->private_data; } @@ -3701,7 +3717,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); - cm_format_sidr_rep_event(work); + cm_format_sidr_rep_event(work, cm_id_priv); cm_process_work(cm_id_priv, work); return 0; out: diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 8b76f0ef965e..476d4309576d 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -44,13 +44,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -enum cm_msg_sequence { - CM_MSG_SEQUENCE_REQ, - CM_MSG_SEQUENCE_LAP, - CM_MSG_SEQUENCE_DREQ, - CM_MSG_SEQUENCE_SIDR -}; - struct cm_req_msg { struct ib_mad_hdr hdr; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index bff10ab141b0..f2bf997b62cd 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -366,7 +366,6 @@ struct cma_multicast { void *context; struct sockaddr_storage addr; struct kref mcref; - bool igmp_joined; u8 join_state; }; @@ -603,46 +602,54 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a return ret; } -static inline int cma_validate_port(struct ib_device *device, u8 port, - enum ib_gid_type gid_type, - union ib_gid *gid, - struct rdma_id_private *id_priv) +static const struct ib_gid_attr * +cma_validate_port(struct ib_device *device, u8 port, + enum ib_gid_type gid_type, + union ib_gid *gid, + struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int bound_if_index = dev_addr->bound_dev_if; + const struct ib_gid_attr *sgid_attr; int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; - int ret = -ENODEV; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) - return ret; + return ERR_PTR(-ENODEV); if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) - return ret; + return ERR_PTR(-ENODEV); if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(dev_addr->net, bound_if_index); if (!ndev) - return ret; + return ERR_PTR(-ENODEV); } else { gid_type = IB_GID_TYPE_IB; } - ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, - ndev, NULL); - + sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev); if (ndev) dev_put(ndev); + return sgid_attr; +} - return ret; +static void cma_bind_sgid_attr(struct rdma_id_private *id_priv, + const struct ib_gid_attr *sgid_attr) +{ + WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr); + id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr; } static int cma_acquire_dev(struct rdma_id_private *id_priv, struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + const struct ib_gid_attr *sgid_attr; struct cma_device *cma_dev; union ib_gid gid, iboe_gid, *gidp; + enum ib_gid_type gid_type; + enum ib_gid_type default_type; int ret = -ENODEV; u8 port; @@ -662,14 +669,15 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, port = listen_id_priv->id.port_num; gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - listen_id_priv->gid_type, gidp, - id_priv); - if (!ret) { + gid_type = rdma_protocol_ib(cma_dev->device, port) ? + IB_GID_TYPE_IB : + listen_id_priv->gid_type; + sgid_attr = cma_validate_port(cma_dev->device, port, + gid_type, gidp, id_priv); + if (!IS_ERR(sgid_attr)) { id_priv->id.port_num = port; + cma_bind_sgid_attr(id_priv, sgid_attr); + ret = 0; goto out; } } @@ -683,14 +691,16 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - cma_dev->default_gid_type[port - 1], - gidp, id_priv); - if (!ret) { + default_type = cma_dev->default_gid_type[port - 1]; + gid_type = + rdma_protocol_ib(cma_dev->device, port) ? + IB_GID_TYPE_IB : default_type; + sgid_attr = cma_validate_port(cma_dev->device, port, + gid_type, gidp, id_priv); + if (!IS_ERR(sgid_attr)) { id_priv->id.port_num = port; + cma_bind_sgid_attr(id_priv, sgid_attr); + ret = 0; goto out; } } @@ -732,8 +742,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) continue; - for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, - &gid, NULL); + for (i = 0; !rdma_query_gid(cur_dev->device, + p, i, &gid); i++) { if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; @@ -1629,6 +1639,21 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_unlock(&lock); } +static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); + if (ndev) { + cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); + dev_put(ndev); + } + kref_put(&mc->mcref, release_mc); +} + static void cma_leave_mc_groups(struct rdma_id_private *id_priv) { struct cma_multicast *mc; @@ -1642,22 +1667,7 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(&init_net, - dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, - &mc->multicast.ib->rec.mgid, - false); - dev_put(ndev); - } - } - kref_put(&mc->mcref, release_mc); + cma_leave_roce_mc_group(id_priv, mc); } } } @@ -1699,6 +1709,10 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); + + if (id_priv->id.route.addr.dev_addr.sgid_attr) + rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); + put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } @@ -2561,8 +2575,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); route->path_rec->roce.route_resolved = true; - sa_path_set_ndev(route->path_rec, addr->dev_addr.net); - sa_path_set_ifindex(route->path_rec, ndev->ifindex); sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); return ndev; } @@ -2791,7 +2803,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) p = 1; port_found: - ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); + ret = rdma_query_gid(cma_dev->device, p, 0, &gid); if (ret) goto out; @@ -3488,7 +3500,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, ib_init_ah_attr_from_path(id_priv->id.device, id_priv->id.port_num, id_priv->id.route.path_rec, - &event.param.ud.ah_attr); + &event.param.ud.ah_attr, + rep->sgid_attr); event.param.ud.qp_num = rep->qpn; event.param.ud.qkey = rep->qkey; event.event = RDMA_CM_EVENT_ESTABLISHED; @@ -3501,6 +3514,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, } ret = id_priv->id.event_handler(&id_priv->id, &event); + + rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; @@ -3557,6 +3572,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, id_priv->cm_id.ib = id; req.path = id_priv->id.route.path_rec; + req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -3618,6 +3634,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, if (route->num_paths == 2) req.alternate_path = &route->path_rec[1]; + req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; + /* Alternate path SGID attribute currently unsupported */ req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.qp_num = id_priv->qp_num; req.qp_type = id_priv->id.qp_type; @@ -3981,6 +3999,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) event.event = RDMA_CM_EVENT_MULTICAST_ERROR; ret = id_priv->id.event_handler(&id_priv->id, &event); + + rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); @@ -4168,8 +4188,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (!send_only) { err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, true); - if (!err) - mc->igmp_joined = true; } } } else { @@ -4221,26 +4239,29 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, memcpy(&mc->addr, addr, rdma_addr_size(addr)); mc->context = context; mc->id_priv = id_priv; - mc->igmp_joined = false; mc->join_state = join_state; - spin_lock(&id_priv->lock); - list_add(&mc->list, &id_priv->mc_list); - spin_unlock(&id_priv->lock); if (rdma_protocol_roce(id->device, id->port_num)) { kref_init(&mc->mcref); ret = cma_iboe_join_multicast(id_priv, mc); - } else if (rdma_cap_ib_mcast(id->device, id->port_num)) + if (ret) + goto out_err; + } else if (rdma_cap_ib_mcast(id->device, id->port_num)) { ret = cma_join_ib_multicast(id_priv, mc); - else + if (ret) + goto out_err; + } else { ret = -ENOSYS; - - if (ret) { - spin_lock_irq(&id_priv->lock); - list_del(&mc->list); - spin_unlock_irq(&id_priv->lock); - kfree(mc); + goto out_err; } + + spin_lock(&id_priv->lock); + list_add(&mc->list, &id_priv->mc_list); + spin_unlock(&id_priv->lock); + + return 0; +out_err: + kfree(mc); return ret; } EXPORT_SYMBOL(rdma_join_multicast); @@ -4268,23 +4289,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else if (rdma_protocol_roce(id->device, id->port_num)) { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id->route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, - dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, - &mc->multicast.ib->rec.mgid, - false); - dev_put(ndev); - } - mc->igmp_joined = false; - } - kref_put(&mc->mcref, release_mc); + cma_leave_roce_mc_group(id_priv, mc); } return; } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 6fa4c59dc7a7..b8144f194777 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -862,25 +862,6 @@ int ib_query_port(struct ib_device *device, EXPORT_SYMBOL(ib_query_port); /** - * ib_query_gid - Get GID table entry - * @device:Device to query - * @port_num:Port number to query - * @index:GID table index to query - * @gid:Returned GID - * @attr: Returned GID attributes related to this GID index (only in RoCE). - * NULL means ignore. - * - * ib_query_gid() fetches the specified GID table entry from the cache. - */ -int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid, - struct ib_gid_attr *attr) -{ - return ib_get_cached_gid(device, port_num, index, gid, attr); -} -EXPORT_SYMBOL(ib_query_gid); - -/** * ib_enum_roce_netdev - enumerate all RoCE ports * @ib_dev : IB device we want to query * @filter: Should we call the callback? @@ -1057,7 +1038,7 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, continue; for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { - ret = ib_query_gid(device, port, i, &tmp_gid, NULL); + ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) return ret; if (!memcmp(&tmp_gid, gid, sizeof *gid)) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index f742ae7a768b..34e9b2768324 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -38,6 +38,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/dma-mapping.h> +#include <linux/idr.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/security.h> @@ -58,8 +59,13 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); +/* + * The mlx4 driver uses the top byte to distinguish which virtual function + * generated the MAD, so we must avoid using it. + */ +#define AGENT_ID_LIMIT (1 << 24) +static DEFINE_IDR(ib_mad_clients); static struct list_head ib_mad_port_list; -static atomic_t ib_mad_client_id = ATOMIC_INIT(0); /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); @@ -190,6 +196,8 @@ EXPORT_SYMBOL(ib_response_mad); /* * ib_register_mad_agent - Register to send/receive MADs + * + * Context: Process context. */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u8 port_num, @@ -210,7 +218,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, struct ib_mad_mgmt_vendor_class *vendor_class; struct ib_mad_mgmt_method_table *method; int ret2, qpn; - unsigned long flags; u8 mgmt_class, vclass; /* Validate parameters */ @@ -376,13 +383,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error4; } - spin_lock_irqsave(&port_priv->reg_lock, flags); - mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id); + idr_preload(GFP_KERNEL); + idr_lock(&ib_mad_clients); + ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0, + AGENT_ID_LIMIT, GFP_ATOMIC); + idr_unlock(&ib_mad_clients); + idr_preload_end(); + + if (ret2 < 0) { + ret = ERR_PTR(ret2); + goto error5; + } + mad_agent_priv->agent.hi_tid = ret2; /* * Make sure MAD registration (if supplied) * is non overlapping with any existing ones */ + spin_lock_irq(&port_priv->reg_lock); if (mad_reg_req) { mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); if (!is_vendor_class(mgmt_class)) { @@ -393,7 +411,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (method) { if (method_in_use(&method, mad_reg_req)) - goto error5; + goto error6; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, @@ -409,24 +427,25 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (is_vendor_method_in_use( vendor_class, mad_reg_req)) - goto error5; + goto error6; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); - goto error5; + goto error6; } } - - /* Add mad agent into port's agent list */ - list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); return &mad_agent_priv->agent; +error6: + spin_unlock_irq(&port_priv->reg_lock); + idr_lock(&ib_mad_clients); + idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); + idr_unlock(&ib_mad_clients); error5: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: kfree(reg_req); @@ -575,7 +594,6 @@ static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; - unsigned long flags; /* Note that we could still be handling received MADs */ @@ -587,10 +605,12 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) port_priv = mad_agent_priv->qp_info->port_priv; cancel_delayed_work(&mad_agent_priv->timed_work); - spin_lock_irqsave(&port_priv->reg_lock, flags); + spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); - list_del(&mad_agent_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); + idr_lock(&ib_mad_clients); + idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); + idr_unlock(&ib_mad_clients); flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); @@ -601,7 +621,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) ib_mad_agent_security_cleanup(&mad_agent_priv->agent); kfree(mad_agent_priv->reg_req); - kfree(mad_agent_priv); + kfree_rcu(mad_agent_priv, rcu); } static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) @@ -625,6 +645,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) /* * ib_unregister_mad_agent - Unregisters a client from using MAD services + * + * Context: Process context. */ void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { @@ -1720,22 +1742,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv, struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; - spin_lock_irqsave(&port_priv->reg_lock, flags); if (ib_response_mad(mad_hdr)) { u32 hi_tid; - struct ib_mad_agent_private *entry; /* * Routing is based on high 32 bits of transaction ID * of MAD. */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, agent_list) { - if (entry->agent.hi_tid == hi_tid) { - mad_agent = entry; - break; - } - } + rcu_read_lock(); + mad_agent = idr_find(&ib_mad_clients, hi_tid); + if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) + mad_agent = NULL; + rcu_read_unlock(); } else { struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; @@ -1744,6 +1763,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, const struct ib_vendor_mad *vendor_mad; int index; + spin_lock_irqsave(&port_priv->reg_lock, flags); /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI @@ -1783,20 +1803,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv, ~IB_MGMT_METHOD_RESP]; } } + if (mad_agent) + atomic_inc(&mad_agent->refcount); +out: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); } - if (mad_agent) { - if (mad_agent->agent.recv_handler) - atomic_inc(&mad_agent->refcount); - else { - dev_notice(&port_priv->device->dev, - "No receive handler for client %p on port %d\n", - &mad_agent->agent, port_priv->port_num); - mad_agent = NULL; - } + if (mad_agent && !mad_agent->agent.recv_handler) { + dev_notice(&port_priv->device->dev, + "No receive handler for client %p on port %d\n", + &mad_agent->agent, port_priv->port_num); + deref_mad_agent(mad_agent); + mad_agent = NULL; } -out: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); return mad_agent; } @@ -1896,8 +1915,8 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_ const struct ib_global_route *grh = rdma_ah_read_grh(&attr); - if (ib_get_cached_gid(device, port_num, - grh->sgid_index, &sgid, NULL)) + if (rdma_query_gid(device, port_num, + grh->sgid_index, &sgid)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); @@ -3159,7 +3178,6 @@ static int ib_mad_port_open(struct ib_device *device, port_priv->device = device; port_priv->port_num = port_num; spin_lock_init(&port_priv->reg_lock); - INIT_LIST_HEAD(&port_priv->agent_list); init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); @@ -3338,6 +3356,9 @@ int ib_mad_init(void) INIT_LIST_HEAD(&ib_mad_port_list); + /* Client ID 0 is used for snoop-only clients */ + idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL); + if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 28669f6419e1..d84ae1671898 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -89,7 +89,6 @@ struct ib_rmpp_segment { }; struct ib_mad_agent_private { - struct list_head agent_list; struct ib_mad_agent agent; struct ib_mad_reg_req *reg_req; struct ib_mad_qp_info *qp_info; @@ -105,7 +104,10 @@ struct ib_mad_agent_private { struct list_head rmpp_list; atomic_t refcount; - struct completion comp; + union { + struct completion comp; + struct rcu_head rcu; + }; }; struct ib_mad_snoop_private { @@ -203,7 +205,6 @@ struct ib_mad_port_private { spinlock_t reg_lock; struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION]; - struct list_head agent_list; struct workqueue_struct *wq; struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; }; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 6c48f4193dda..d50ff70bb24b 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -716,14 +716,28 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_sa_get_mcmember_rec); +/** + * ib_init_ah_from_mcmember - Initialize AH attribute from multicast + * member record and gid of the device. + * @device: RDMA device + * @port_num: Port of the rdma device to consider + * @ndev: Optional netdevice, applicable only for RoCE + * @gid_type: GID type to consider + * @ah_attr: AH attribute to fillup on successful completion + * + * ib_init_ah_from_mcmember() initializes AH attribute based on multicast + * member record and other device properties. On success the caller is + * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on + * success or appropriate error code. + * + */ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr) { - int ret; - u16 gid_index; + const struct ib_gid_attr *sgid_attr; /* GID table is not based on the netdevice for IB link layer, * so ignore ndev during search. @@ -733,26 +747,22 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, else if (!rdma_protocol_roce(device, port_num)) return -EINVAL; - ret = ib_find_cached_gid_by_port(device, &rec->port_gid, - gid_type, port_num, - ndev, - &gid_index); - if (ret) - return ret; + sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid, + gid_type, port_num, ndev); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); - memset(ah_attr, 0, sizeof *ah_attr); + memset(ah_attr, 0, sizeof(*ah_attr)); ah_attr->type = rdma_ah_find_type(device, port_num); rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid)); rdma_ah_set_sl(ah_attr, rec->sl); rdma_ah_set_port_num(ah_attr, port_num); rdma_ah_set_static_rate(ah_attr, rec->rate); - - rdma_ah_set_grh(ah_attr, &rec->mgid, - be32_to_cpu(rec->flow_label), - (u8)gid_index, - rec->hop_limit, - rec->traffic_class); + rdma_move_grh_sgid_attr(ah_attr, &rec->mgid, + be32_to_cpu(rec->flow_label), + rec->hop_limit, rec->traffic_class, + sgid_attr); return 0; } EXPORT_SYMBOL(ib_init_ah_from_mcmember); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 340c7bea45ab..0385ab438320 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -237,15 +237,15 @@ static int fill_port_info(struct sk_buff *msg, if (ret) return ret; - BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); - if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, - (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD)) - return -EMSGSIZE; - if (rdma_protocol_ib(device, port) && - nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, - attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) - return -EMSGSIZE; if (rdma_protocol_ib(device, port)) { + BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, + (u64)attr.port_cap_flags, + RDMA_NLDEV_ATTR_PAD)) + return -EMSGSIZE; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, + attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index a6e904973ba8..847c6a2f1346 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -52,10 +52,10 @@ int uverbs_ns_idx(u16 *id, unsigned int ns_count) return ret; } -const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, +const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, uint16_t object) { - const struct uverbs_root_spec *object_hash = ibdev->specs_root; + const struct uverbs_root_spec *object_hash = ufile->device->specs_root; const struct uverbs_object_spec_hash *objects; int ret = uverbs_ns_idx(&object, object_hash->num_buckets); @@ -128,7 +128,29 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; } -static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, +/* + * Does both rdma_lookup_get_uobject() and rdma_remove_commit_uobject(), then + * returns success_res on success (negative errno on failure). For use by + * callers that do not need the uobj. + */ +int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, + struct ib_uverbs_file *ufile, int success_res) +{ + struct ib_uobject *uobj; + int ret; + + uobj = rdma_lookup_get_uobject(type, ufile, id, true); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + + ret = rdma_remove_commit_uobject(uobj); + if (ret) + return ret; + + return success_res; +} + +static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, const struct uverbs_obj_type *type) { struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL); @@ -139,7 +161,8 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, * user_handle should be filled by the handler, * The object is added to the list in the commit stage. */ - uobj->context = context; + uobj->ufile = ufile; + uobj->context = ufile->ucontext; uobj->type = type; /* * Allocated objects start out as write locked to deny any other @@ -157,19 +180,19 @@ static int idr_add_uobj(struct ib_uobject *uobj) int ret; idr_preload(GFP_KERNEL); - spin_lock(&uobj->context->ufile->idr_lock); + spin_lock(&uobj->ufile->idr_lock); /* * We start with allocating an idr pointing to NULL. This represents an * object which isn't initialized yet. We'll replace it later on with * the real object once we commit. */ - ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0, + ret = idr_alloc(&uobj->ufile->idr, NULL, 0, min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); if (ret >= 0) uobj->id = ret; - spin_unlock(&uobj->context->ufile->idr_lock); + spin_unlock(&uobj->ufile->idr_lock); idr_preload_end(); return ret < 0 ? ret : 0; @@ -181,21 +204,21 @@ static int idr_add_uobj(struct ib_uobject *uobj) */ static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) { - spin_lock(&uobj->context->ufile->idr_lock); - idr_remove(&uobj->context->ufile->idr, uobj->id); - spin_unlock(&uobj->context->ufile->idr_lock); + spin_lock(&uobj->ufile->idr_lock); + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); } /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ -static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) +static struct ib_uobject * +lookup_get_idr_uobject(const struct uverbs_obj_type *type, + struct ib_uverbs_file *ufile, int id, bool exclusive) { struct ib_uobject *uobj; rcu_read_lock(); /* object won't be released as we're protected in rcu */ - uobj = idr_find(&ucontext->ufile->idr, id); + uobj = idr_find(&ufile->idr, id); if (!uobj) { uobj = ERR_PTR(-ENOENT); goto free; @@ -216,7 +239,7 @@ free: } static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, + struct ib_uverbs_file *ufile, int id, bool exclusive) { struct file *f; @@ -247,13 +270,13 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty } struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) + struct ib_uverbs_file *ufile, int id, + bool exclusive) { struct ib_uobject *uobj; int ret; - uobj = type->type_class->lookup_get(type, ucontext, id, exclusive); + uobj = type->type_class->lookup_get(type, ufile, id, exclusive); if (IS_ERR(uobj)) return uobj; @@ -264,7 +287,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, ret = uverbs_try_lock_object(uobj, exclusive); if (ret) { - WARN(ucontext->cleanup_reason, + WARN(uobj->ufile->cleanup_reason, "ib_uverbs: Trying to lookup_get while cleanup context\n"); goto free; } @@ -277,12 +300,12 @@ free: } static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) + struct ib_uverbs_file *ufile) { int ret; struct ib_uobject *uobj; - uobj = alloc_uobj(ucontext, type); + uobj = alloc_uobj(ufile, type); if (IS_ERR(uobj)) return uobj; @@ -290,7 +313,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type * if (ret) goto uobj_put; - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device, + ret = ib_rdmacg_try_charge(&uobj->cg_obj, ufile->ucontext->device, RDMACG_RESOURCE_HCA_OBJECT); if (ret) goto idr_remove; @@ -305,29 +328,27 @@ uobj_put: } static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) + struct ib_uverbs_file *ufile) { const struct uverbs_obj_fd_type *fd_type = container_of(type, struct uverbs_obj_fd_type, type); int new_fd; struct ib_uobject *uobj; - struct ib_uobject_file *uobj_file; struct file *filp; new_fd = get_unused_fd_flags(O_CLOEXEC); if (new_fd < 0) return ERR_PTR(new_fd); - uobj = alloc_uobj(ucontext, type); + uobj = alloc_uobj(ufile, type); if (IS_ERR(uobj)) { put_unused_fd(new_fd); return uobj; } - uobj_file = container_of(uobj, struct ib_uobject_file, uobj); filp = anon_inode_getfile(fd_type->name, fd_type->fops, - uobj_file, + uobj, fd_type->flags); if (IS_ERR(filp)) { put_unused_fd(new_fd); @@ -335,19 +356,19 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t return (void *)filp; } - uobj_file->uobj.id = new_fd; - uobj_file->uobj.object = filp; - uobj_file->ufile = ucontext->ufile; + uobj->id = new_fd; + uobj->object = filp; + uobj->ufile = ufile; INIT_LIST_HEAD(&uobj->list); - kref_get(&uobj_file->ufile->ref); + kref_get(&ufile->ref); return uobj; } struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) + struct ib_uverbs_file *ufile) { - return type->type_class->alloc_begin(type, ucontext); + return type->type_class->alloc_begin(type, ufile); } static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, @@ -360,9 +381,10 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, /* * We can only fail gracefully if the user requested to destroy the - * object. In the rest of the cases, just remove whatever you can. + * object or when a retry may be called upon an error. + * In the rest of the cases, just remove whatever you can. */ - if (why == RDMA_REMOVE_DESTROY && ret) + if (ib_is_destroy_retryable(ret, why, uobj)) return ret; ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, @@ -374,10 +396,8 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); struct file *filp = uobj->object; - int id = uobj_file->uobj.id; + int id = uobj->id; /* Unsuccessful NEW */ fput(filp); @@ -389,11 +409,9 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of(uobj->type, struct uverbs_obj_fd_type, type); - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); - int ret = fd_type->context_closed(uobj_file, why); + int ret = fd_type->context_closed(uobj, why); - if (why == RDMA_REMOVE_DESTROY && ret) + if (ib_is_destroy_retryable(ret, why, uobj)) return ret; if (why == RDMA_REMOVE_DURING_CLEANUP) { @@ -401,7 +419,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, return ret; } - uobj_file->uobj.context = NULL; + uobj->context = NULL; return ret; } @@ -418,18 +436,18 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, enum rdma_remove_reason why) { + struct ib_uverbs_file *ufile = uobj->ufile; int ret; - struct ib_ucontext *ucontext = uobj->context; ret = uobj->type->type_class->remove_commit(uobj, why); - if (ret && why == RDMA_REMOVE_DESTROY) { + if (ib_is_destroy_retryable(ret, why, uobj)) { /* We couldn't remove the object, so just unlock the uobject */ atomic_set(&uobj->usecnt, 0); uobj->type->type_class->lookup_put(uobj, true); } else { - mutex_lock(&ucontext->uobjects_lock); + mutex_lock(&ufile->uobjects_lock); list_del(&uobj->list); - mutex_unlock(&ucontext->uobjects_lock); + mutex_unlock(&ufile->uobjects_lock); /* put the ref we took when we created the object */ uverbs_uobject_put(uobj); } @@ -441,19 +459,19 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) { int ret; - struct ib_ucontext *ucontext = uobj->context; + struct ib_uverbs_file *ufile = uobj->ufile; /* put the ref count we took at lookup_get */ uverbs_uobject_put(uobj); /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ucontext->cleanup_rwsem)) { + if (!down_read_trylock(&ufile->cleanup_rwsem)) { WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } assert_uverbs_usecnt(uobj, true); ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); - up_read(&ucontext->cleanup_rwsem); + up_read(&ufile->cleanup_rwsem); return ret; } @@ -473,10 +491,10 @@ static const struct uverbs_obj_type null_obj_type = { int rdma_explicit_destroy(struct ib_uobject *uobject) { int ret; - struct ib_ucontext *ucontext = uobject->context; + struct ib_uverbs_file *ufile = uobject->ufile; /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ucontext->cleanup_rwsem)) { + if (!down_read_trylock(&ufile->cleanup_rwsem)) { WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } @@ -489,38 +507,36 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) uobject->type = &null_obj_type; out: - up_read(&ucontext->cleanup_rwsem); + up_read(&ufile->cleanup_rwsem); return ret; } static void alloc_commit_idr_uobject(struct ib_uobject *uobj) { - spin_lock(&uobj->context->ufile->idr_lock); + spin_lock(&uobj->ufile->idr_lock); /* * We already allocated this IDR with a NULL object, so * this shouldn't fail. */ - WARN_ON(idr_replace(&uobj->context->ufile->idr, - uobj, uobj->id)); - spin_unlock(&uobj->context->ufile->idr_lock); + WARN_ON(idr_replace(&uobj->ufile->idr, uobj, uobj->id)); + spin_unlock(&uobj->ufile->idr_lock); } static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); - - fd_install(uobj_file->uobj.id, uobj->object); + fd_install(uobj->id, uobj->object); /* This shouldn't be used anymore. Use the file object instead */ - uobj_file->uobj.id = 0; + uobj->id = 0; /* Get another reference as we export this to the fops */ - uverbs_uobject_get(&uobj_file->uobj); + uverbs_uobject_get(uobj); } int rdma_alloc_commit_uobject(struct ib_uobject *uobj) { + struct ib_uverbs_file *ufile = uobj->ufile; + /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&uobj->context->cleanup_rwsem)) { + if (!down_read_trylock(&ufile->cleanup_rwsem)) { int ret; WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); @@ -536,12 +552,12 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) assert_uverbs_usecnt(uobj, true); atomic_set(&uobj->usecnt, 0); - mutex_lock(&uobj->context->uobjects_lock); - list_add(&uobj->list, &uobj->context->uobjects); - mutex_unlock(&uobj->context->uobjects_lock); + mutex_lock(&ufile->uobjects_lock); + list_add(&uobj->list, &ufile->uobjects); + mutex_unlock(&ufile->uobjects_lock); uobj->type->type_class->alloc_commit(uobj); - up_read(&uobj->context->cleanup_rwsem); + up_read(&ufile->cleanup_rwsem); return 0; } @@ -611,23 +627,22 @@ const struct uverbs_obj_type_class uverbs_idr_class = { */ .needs_kfree_rcu = true, }; +EXPORT_SYMBOL(uverbs_idr_class); -static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) +static void _uverbs_close_fd(struct ib_uobject *uobj) { - struct ib_ucontext *ucontext; - struct ib_uverbs_file *ufile = uobj_file->ufile; + struct ib_uverbs_file *ufile = uobj->ufile; int ret; - mutex_lock(&uobj_file->ufile->cleanup_mutex); + mutex_lock(&ufile->cleanup_mutex); /* uobject was either already cleaned up or is cleaned up right now anyway */ - if (!uobj_file->uobj.context || - !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem)) + if (!uobj->context || + !down_read_trylock(&ufile->cleanup_rwsem)) goto unlock; - ucontext = uobj_file->uobj.context; - ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE); - up_read(&ucontext->cleanup_rwsem); + ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_CLOSE); + up_read(&ufile->cleanup_rwsem); if (ret) pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); unlock: @@ -636,78 +651,87 @@ unlock: void uverbs_close_fd(struct file *f) { - struct ib_uobject_file *uobj_file = f->private_data; - struct kref *uverbs_file_ref = &uobj_file->ufile->ref; + struct ib_uobject *uobj = f->private_data; + struct kref *uverbs_file_ref = &uobj->ufile->ref; - _uverbs_close_fd(uobj_file); - uverbs_uobject_put(&uobj_file->uobj); + _uverbs_close_fd(uobj); + uverbs_uobject_put(uobj); kref_put(uverbs_file_ref, ib_uverbs_release_file); } -void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) +static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) { - enum rdma_remove_reason reason = device_removed ? - RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; - unsigned int cur_order = 0; + struct ib_uobject *obj, *next_obj; + int ret = -EINVAL; + int err = 0; - ucontext->cleanup_reason = reason; /* - * Waits for all remove_commit and alloc_commit to finish. Logically, We - * want to hold this forever as the context is going to be destroyed, - * but we'll release it since it causes a "held lock freed" BUG message. + * This shouldn't run while executing other commands on this + * context. Thus, the only thing we should take care of is + * releasing a FD while traversing this list. The FD could be + * closed and released from the _release fop of this FD. + * In order to mitigate this, we add a lock. + * We take and release the lock per traversal in order to let + * other threads (which might still use the FDs) chance to run. */ - down_write(&ucontext->cleanup_rwsem); - - while (!list_empty(&ucontext->uobjects)) { - struct ib_uobject *obj, *next_obj; - unsigned int next_order = UINT_MAX; - + mutex_lock(&ufile->uobjects_lock); + ufile->cleanup_reason = reason; + list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { /* - * This shouldn't run while executing other commands on this - * context. Thus, the only thing we should take care of is - * releasing a FD while traversing this list. The FD could be - * closed and released from the _release fop of this FD. - * In order to mitigate this, we add a lock. - * We take and release the lock per order traversal in order - * to let other threads (which might still use the FDs) chance - * to run. + * if we hit this WARN_ON, that means we are + * racing with a lookup_get. */ - mutex_lock(&ucontext->uobjects_lock); - list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, - list) { - if (obj->type->destroy_order == cur_order) { - int ret; - - /* - * if we hit this WARN_ON, that means we are - * racing with a lookup_get. - */ - WARN_ON(uverbs_try_lock_object(obj, true)); - ret = obj->type->type_class->remove_commit(obj, - reason); - list_del(&obj->list); - if (ret) - pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n", - obj->id, cur_order); - /* put the ref we took when we created the object */ - uverbs_uobject_put(obj); - } else { - next_order = min(next_order, - obj->type->destroy_order); - } + WARN_ON(uverbs_try_lock_object(obj, true)); + err = obj->type->type_class->remove_commit(obj, reason); + + if (ib_is_destroy_retryable(err, reason, obj)) { + pr_debug("ib_uverbs: failed to remove uobject id %d err %d\n", + obj->id, err); + atomic_set(&obj->usecnt, 0); + continue; } - mutex_unlock(&ucontext->uobjects_lock); - cur_order = next_order; + + if (err) + pr_err("ib_uverbs: unable to remove uobject id %d err %d\n", + obj->id, err); + + list_del(&obj->list); + /* put the ref we took when we created the object */ + uverbs_uobject_put(obj); + ret = 0; } - up_write(&ucontext->cleanup_rwsem); + mutex_unlock(&ufile->uobjects_lock); + return ret; } -void uverbs_initialize_ucontext(struct ib_ucontext *ucontext) +void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) { - ucontext->cleanup_reason = 0; - mutex_init(&ucontext->uobjects_lock); - INIT_LIST_HEAD(&ucontext->uobjects); - init_rwsem(&ucontext->cleanup_rwsem); + enum rdma_remove_reason reason = device_removed ? + RDMA_REMOVE_DRIVER_REMOVE : + RDMA_REMOVE_CLOSE; + + /* + * Waits for all remove_commit and alloc_commit to finish. Logically, We + * want to hold this forever as the context is going to be destroyed, + * but we'll release it since it causes a "held lock freed" BUG message. + */ + down_write(&ufile->cleanup_rwsem); + ufile->ucontext->cleanup_retryable = true; + while (!list_empty(&ufile->uobjects)) + if (__uverbs_cleanup_ufile(ufile, reason)) { + /* + * No entry was cleaned-up successfully during this + * iteration + */ + break; + } + + ufile->ucontext->cleanup_retryable = false; + if (!list_empty(&ufile->uobjects)) + __uverbs_cleanup_ufile(ufile, reason); + + up_write(&ufile->cleanup_rwsem); } const struct uverbs_obj_type_class uverbs_fd_class = { @@ -719,20 +743,21 @@ const struct uverbs_obj_type_class uverbs_fd_class = { .remove_commit = remove_commit_fd_uobject, .needs_kfree_rcu = false, }; +EXPORT_SYMBOL(uverbs_fd_class); -struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, - struct ib_ucontext *ucontext, - enum uverbs_obj_access access, - int id) +struct ib_uobject * +uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, + struct ib_uverbs_file *ufile, + enum uverbs_obj_access access, int id) { switch (access) { case UVERBS_ACCESS_READ: - return rdma_lookup_get_uobject(type_attrs, ucontext, id, false); + return rdma_lookup_get_uobject(type_attrs, ufile, id, false); case UVERBS_ACCESS_DESTROY: case UVERBS_ACCESS_WRITE: - return rdma_lookup_get_uobject(type_attrs, ucontext, id, true); + return rdma_lookup_get_uobject(type_attrs, ufile, id, true); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(type_attrs, ucontext); + return rdma_alloc_begin_uobject(type_attrs, ufile); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); @@ -777,43 +802,3 @@ int uverbs_finalize_object(struct ib_uobject *uobj, return ret; } - -int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, - struct uverbs_attr_spec_hash * const *spec_hash, - size_t num, - bool commit) -{ - unsigned int i; - int ret = 0; - - for (i = 0; i < num; i++) { - struct uverbs_attr_bundle_hash *curr_bundle = - &attrs_bundle->hash[i]; - const struct uverbs_attr_spec_hash *curr_spec_bucket = - spec_hash[i]; - unsigned int j; - - for (j = 0; j < curr_bundle->num_attrs; j++) { - struct uverbs_attr *attr; - const struct uverbs_attr_spec *spec; - - if (!uverbs_attr_is_valid_in_hash(curr_bundle, j)) - continue; - - attr = &curr_bundle->attrs[j]; - spec = &curr_spec_bucket->attrs[j]; - - if (spec->type == UVERBS_ATTR_TYPE_IDR || - spec->type == UVERBS_ATTR_TYPE_FD) { - int current_ret; - - current_ret = uverbs_finalize_object(attr->obj_attr.uobject, - spec->obj.access, - commit); - if (!ret) - ret = current_ret; - } - } - } - return ret; -} diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 1efcf93238dd..1bba60e960c1 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -44,19 +44,12 @@ #include <linux/mutex.h> int uverbs_ns_idx(u16 *id, unsigned int ns_count); -const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, +const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, uint16_t object); const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, uint16_t method); -/* - * These functions initialize the context and cleanups its uobjects. - * The context has a list of objects which is protected by a mutex - * on the context. initialize_ucontext should be called when we create - * a context. - * cleanup_ucontext removes all uobjects from the context and puts them. - */ -void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed); -void uverbs_initialize_ucontext(struct ib_ucontext *ucontext); + +void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed); /* * uverbs_uobject_get is called in order to increase the reference count on @@ -82,7 +75,7 @@ void uverbs_uobject_put(struct ib_uobject *uobject); void uverbs_close_fd(struct file *f); /* - * Get an ib_uobject that corresponds to the given id from ucontext, assuming + * Get an ib_uobject that corresponds to the given id from ufile, assuming * the object is from the given type. Lock it to the required access when * applicable. * This function could create (access == NEW), destroy (access == DESTROY) @@ -90,13 +83,11 @@ void uverbs_close_fd(struct file *f); * The action will be finalized only when uverbs_finalize_object or * uverbs_finalize_objects are called. */ -struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, - struct ib_ucontext *ucontext, - enum uverbs_obj_access access, - int id); -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, - bool commit); +struct ib_uobject * +uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, + struct ib_uverbs_file *ufile, + enum uverbs_obj_access access, int id); + /* * Note that certain finalize stages could return a status: * (a) alloc_commit could return a failure if the object is committed at the @@ -112,9 +103,8 @@ int uverbs_finalize_object(struct ib_uobject *uobj, * function. For example, this could happen when we couldn't destroy an * object. */ -int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, - struct uverbs_attr_spec_hash * const *spec_hash, - size_t num, - bool commit); +int uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, + bool commit); #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index c8963e91f92a..474d65297afc 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -87,7 +87,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, } ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); - if (ret < nents) { + if (ret < 0 || ret < nents) { ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); return -EINVAL; } @@ -325,7 +325,7 @@ out_unmap_sg: EXPORT_SYMBOL(rdma_rw_ctx_init); /** - * rdma_rw_ctx_signature init - initialize a RW context with signature offload + * rdma_rw_ctx_signature_init - initialize a RW context with signature offload * @ctx: context to initialize * @qp: queue pair to operate on * @port_num: port num to which the connection is bound diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a61ec7e33613..7b794a14d6e8 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1227,20 +1227,10 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num) return src_path_mask; } -static int -roce_resolve_route_from_path(struct ib_device *device, u8 port_num, - struct sa_path_rec *rec) +static int roce_resolve_route_from_path(struct sa_path_rec *rec, + const struct ib_gid_attr *attr) { - struct net_device *resolved_dev; - struct net_device *ndev; - struct net_device *idev; - struct rdma_dev_addr dev_addr = { - .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ? - sa_path_get_ifindex(rec) : 0), - .net = sa_path_get_ndev(rec) ? - sa_path_get_ndev(rec) : - &init_net - }; + struct rdma_dev_addr dev_addr = {}; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; @@ -1250,9 +1240,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, if (rec->roce.route_resolved) return 0; + if (!attr || !attr->ndev) + return -EINVAL; - if (!device->get_netdev) - return -EOPNOTSUPP; + dev_addr.bound_dev_if = attr->ndev->ifindex; + /* TODO: Use net from the ib_gid_attr once it is added to it, + * until than, limit itself to init_net. + */ + dev_addr.net = &init_net; rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); @@ -1268,60 +1263,52 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) return -EINVAL; - idev = device->get_netdev(device, port_num); - if (!idev) - return -ENODEV; - - resolved_dev = dev_get_by_index(dev_addr.net, - dev_addr.bound_dev_if); - if (!resolved_dev) { - ret = -ENODEV; - goto done; - } - ndev = ib_get_ndev_from_path(rec); - rcu_read_lock(); - if ((ndev && ndev != resolved_dev) || - (resolved_dev != idev && - !rdma_is_upper_dev_rcu(idev, resolved_dev))) - ret = -EHOSTUNREACH; - rcu_read_unlock(); - dev_put(resolved_dev); - if (ndev) - dev_put(ndev); -done: - dev_put(idev); - if (!ret) - rec->roce.route_resolved = true; - return ret; + rec->roce.route_resolved = true; + return 0; } static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *gid_attr) { enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); - struct net_device *ndev; - u16 gid_index; - int ret; - ndev = ib_get_ndev_from_path(rec); - ret = ib_find_cached_gid_by_port(device, &rec->sgid, type, - port_num, ndev, &gid_index); - if (ndev) - dev_put(ndev); - if (ret) - return ret; + if (!gid_attr) { + gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type, + port_num, NULL); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); + } else + rdma_hold_gid_attr(gid_attr); - rdma_ah_set_grh(ah_attr, &rec->dgid, - be32_to_cpu(rec->flow_label), - gid_index, rec->hop_limit, - rec->traffic_class); + rdma_move_grh_sgid_attr(ah_attr, &rec->dgid, + be32_to_cpu(rec->flow_label), + rec->hop_limit, rec->traffic_class, + gid_attr); return 0; } +/** + * ib_init_ah_attr_from_path - Initialize address handle attributes based on + * an SA path record. + * @device: Device associated ah attributes initialization. + * @port_num: Port on the specified device. + * @rec: path record entry to use for ah attributes initialization. + * @ah_attr: address handle attributes to initialization from path record. + * @sgid_attr: SGID attribute to consider during initialization. + * + * When ib_init_ah_attr_from_path() returns success, + * (a) for IB link layer it optionally contains a reference to SGID attribute + * when GRH is present for IB link layer. + * (b) for RoCE link layer it contains a reference to SGID attribute. + * User must invoke rdma_destroy_ah_attr() to release reference to SGID + * attributes which are initialized using ib_init_ah_attr_from_path(). + */ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *gid_attr) { int ret = 0; @@ -1332,7 +1319,7 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, rdma_ah_set_static_rate(ah_attr, rec->rate); if (sa_path_is_roce(rec)) { - ret = roce_resolve_route_from_path(device, port_num, rec); + ret = roce_resolve_route_from_path(rec, gid_attr); if (ret) return ret; @@ -1349,7 +1336,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, } if (rec->hop_limit > 0 || sa_path_is_roce(rec)) - ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr); + ret = init_ah_attr_grh_fields(device, port_num, + rec, ah_attr, gid_attr); return ret; } EXPORT_SYMBOL(ib_init_ah_attr_from_path); @@ -1557,8 +1545,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ARRAY_SIZE(path_rec_table), mad->data, &rec); rec.rec_type = SA_PATH_REC_TYPE_IB; - sa_path_set_ndev(&rec, NULL); - sa_path_set_ifindex(&rec, 0); sa_path_set_dmac_zero(&rec); if (query->conv_pr) { @@ -2290,6 +2276,7 @@ static void update_sm_ah(struct work_struct *work) struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct rdma_ah_attr ah_attr; + bool grh_required; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { pr_warn("Couldn't query port\n"); @@ -2314,16 +2301,27 @@ static void update_sm_ah(struct work_struct *work) rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid); rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); rdma_ah_set_port_num(&ah_attr, port->port_num); - if (port_attr.grh_required) { - if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) { - rdma_ah_set_make_grd(&ah_attr, true); - } else { - rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); - rdma_ah_set_subnet_prefix(&ah_attr, - cpu_to_be64(port_attr.subnet_prefix)); - rdma_ah_set_interface_id(&ah_attr, - cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); - } + + grh_required = rdma_is_grh_required(port->agent->device, + port->port_num); + + /* + * The OPA sm_lid of 0xFFFF needs special handling so that it can be + * differentiated from a permissive LID of 0xFFFF. We set the + * grh_required flag here so the SA can program the DGID in the + * address handle appropriately + */ + if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA && + (grh_required || + port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))) + rdma_ah_set_make_grd(&ah_attr, true); + + if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) { + rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); + rdma_ah_set_subnet_prefix(&ah_attr, + cpu_to_be64(port_attr.subnet_prefix)); + rdma_ah_set_interface_id(&ah_attr, + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); } new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 31c7efaf8e7a..7fd14ead7b37 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -42,6 +42,7 @@ #include <rdma/ib_mad.h> #include <rdma/ib_pma.h> +#include <rdma/ib_cache.h> struct ib_port; @@ -346,7 +347,7 @@ static struct attribute *port_default_attrs[] = { NULL }; -static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) +static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { if (!gid_attr->ndev) return -EINVAL; @@ -354,33 +355,26 @@ static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) return sprintf(buf, "%s\n", gid_attr->ndev->name); } -static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf) +static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) { return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); } -static ssize_t _show_port_gid_attr(struct ib_port *p, - struct port_attribute *attr, - char *buf, - size_t (*print)(struct ib_gid_attr *gid_attr, - char *buf)) +static ssize_t _show_port_gid_attr( + struct ib_port *p, struct port_attribute *attr, char *buf, + size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - union ib_gid gid; - struct ib_gid_attr gid_attr = {}; + const struct ib_gid_attr *gid_attr; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, - &gid_attr); - if (ret) - goto err; + gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); - ret = print(&gid_attr, buf); - -err: - if (gid_attr.ndev) - dev_put(gid_attr.ndev); + ret = print(gid_attr, buf); + rdma_put_gid_attr(gid_attr); return ret; } @@ -389,26 +383,28 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - union ib_gid *pgid; - union ib_gid gid; + const struct ib_gid_attr *gid_attr; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); + gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); + if (IS_ERR(gid_attr)) { + const union ib_gid zgid = {}; + + /* If reading GID fails, it is likely due to GID entry being + * empty (invalid) or reserved GID in the table. User space + * expects to read GID table entries as long as it given index + * is within GID table size. Administrative/debugging tool + * fails to query rest of the GID entries if it hits error + * while querying a GID of the given index. To avoid user + * space throwing such error on fail to read gid, return zero + * GID as before. This maintains backward compatibility. + */ + return sprintf(buf, "%pI6\n", zgid.raw); + } - /* If reading GID fails, it is likely due to GID entry being empty - * (invalid) or reserved GID in the table. - * User space expects to read GID table entries as long as it given - * index is within GID table size. - * Administrative/debugging tool fails to query rest of the GID entries - * if it hits error while querying a GID of the given index. - * To avoid user space throwing such error on fail to read gid, return - * zero GID as before. This maintains backward compatibility. - */ - if (ret) - pgid = &zgid; - else - pgid = &gid; - return sprintf(buf, "%pI6\n", pgid->raw); + ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw); + rdma_put_gid_attr(gid_attr); + return ret; } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 54ab6335c48d..a41792dbae1f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -84,7 +84,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, struct ib_umem *umem; struct page **page_list; struct vm_area_struct **vma_list; - unsigned long locked; unsigned long lock_limit; unsigned long cur_base; unsigned long npages; @@ -92,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, int i; unsigned long dma_attrs = 0; struct scatterlist *sg, *sg_list_start; - int need_release = 0; unsigned int gup_flags = FOLL_WRITE; if (dmasync) @@ -121,10 +119,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (access & IB_ACCESS_ON_DEMAND) { ret = ib_umem_odp_get(context, umem, access); - if (ret) { - kfree(umem); - return ERR_PTR(ret); - } + if (ret) + goto umem_kfree; return umem; } @@ -135,8 +131,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { - kfree(umem); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto umem_kfree; } /* @@ -149,41 +145,43 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, npages = ib_umem_num_pages(umem); - down_write(¤t->mm->mmap_sem); - - locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm += npages; + if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { + up_write(¤t->mm->mmap_sem); ret = -ENOMEM; - goto out; + goto vma; } + up_write(¤t->mm->mmap_sem); cur_base = addr & PAGE_MASK; if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; - goto out; + goto vma; } ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); if (ret) - goto out; + goto vma; if (!umem->writable) gup_flags |= FOLL_FORCE; - need_release = 1; sg_list_start = umem->sg_head.sgl; + down_read(¤t->mm->mmap_sem); while (npages) { ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), gup_flags, page_list, vma_list); - - if (ret < 0) - goto out; + if (ret < 0) { + up_read(¤t->mm->mmap_sem); + goto umem_release; + } umem->npages += ret; cur_base += ret * PAGE_SIZE; @@ -199,6 +197,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, /* preparing for next loop */ sg_list_start = sg; } + up_read(¤t->mm->mmap_sem); umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, @@ -206,27 +205,28 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, DMA_BIDIRECTIONAL, dma_attrs); - if (umem->nmap <= 0) { + if (!umem->nmap) { ret = -ENOMEM; - goto out; + goto umem_release; } ret = 0; + goto out; -out: - if (ret < 0) { - if (need_release) - __ib_umem_release(context->device, umem, 0); - kfree(umem); - } else - current->mm->pinned_vm = locked; - +umem_release: + __ib_umem_release(context->device, umem, 0); +vma: + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm -= ib_umem_num_pages(umem); up_write(¤t->mm->mmap_sem); +out: if (vma_list) free_page((unsigned long) vma_list); free_page((unsigned long) page_list); - - return ret < 0 ? ERR_PTR(ret) : umem; +umem_kfree: + if (ret) + kfree(umem); + return ret ? ERR_PTR(ret) : umem; } EXPORT_SYMBOL(ib_umem_get); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index bb98c9e4a7fd..c34a6852d691 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -268,6 +268,7 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.traffic_class = grh->traffic_class; memcpy(packet->mad.hdr.gid, &grh->dgid, 16); packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label); + rdma_destroy_ah_attr(&ah_attr); } if (queue_packet(file, agent, packet)) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index c0d40fc3a53a..d0a1a54275e5 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -130,7 +130,7 @@ struct ib_uverbs_async_event_file { }; struct ib_uverbs_completion_event_file { - struct ib_uobject_file uobj_file; + struct ib_uobject uobj; struct ib_uverbs_event_queue ev_queue; }; @@ -145,6 +145,14 @@ struct ib_uverbs_file { struct list_head list; int is_closed; + /* locking the uobjects_list */ + struct mutex uobjects_lock; + struct list_head uobjects; + + /* protects cleanup process from other actions */ + struct rw_semaphore cleanup_rwsem; + enum rdma_remove_reason cleanup_reason; + struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; @@ -196,7 +204,6 @@ struct ib_uwq_object { struct ib_ucq_object { struct ib_uobject uobject; - struct ib_uverbs_file *uverbs_file; struct list_head comp_list; struct list_head async_list; u32 comp_events_reported; @@ -230,7 +237,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); -int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd, +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why); int uverbs_dealloc_mw(struct ib_mw *mw); @@ -238,12 +245,7 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj); void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata); -extern const struct uverbs_attr_def uverbs_uhw_compat_in; -extern const struct uverbs_attr_def uverbs_uhw_compat_out; long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -int uverbs_destroy_def_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs); struct ib_uverbs_flow_spec { union { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3e90b6a1d9d2..bd6eefaecbd6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -48,11 +48,10 @@ #include "core_priv.h" static struct ib_uverbs_completion_event_file * -ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) +ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile) { struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL, - fd, context); - struct ib_uobject_file *uobj_file; + fd, ufile); if (IS_ERR(uobj)) return (void *)uobj; @@ -60,9 +59,8 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) uverbs_uobject_get(uobj); uobj_put_read(uobj); - uobj_file = container_of(uobj, struct ib_uobject_file, uobj); - return container_of(uobj_file, struct ib_uverbs_completion_event_file, - uobj_file); + return container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); } ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, @@ -110,12 +108,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->cg_obj = cg_obj; /* ufile is required when some objects are released */ ucontext->ufile = file; - uverbs_initialize_ucontext(ucontext); rcu_read_lock(); ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); rcu_read_unlock(); ucontext->closing = 0; + ucontext->cleanup_retryable = false; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING ucontext->umem_tree = RB_ROOT_CACHED; @@ -189,7 +187,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); - resp->max_sge = attr->max_sge; + resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge); resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; resp->max_cqe = attr->max_cqe; @@ -243,6 +241,27 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, return in_len; } +/* + * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the + * PortInfo CapabilityMask, but was extended with unique bits. + */ +static u32 make_port_cap_flags(const struct ib_port_attr *attr) +{ + u32 res; + + /* All IBA CapabilityMask bits are passed through here, except bit 26, + * which is overridden with IP_BASED_GIDS. This is due to a historical + * mistake in the implementation of IP_BASED_GIDS. Otherwise all other + * bits match the IBA definition across all kernel versions. + */ + res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS; + + if (attr->ip_gids) + res |= IB_UVERBS_PCF_IP_BASED_GIDS; + + return res; +} + ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, @@ -269,12 +288,15 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.max_mtu = attr.max_mtu; resp.active_mtu = attr.active_mtu; resp.gid_tbl_len = attr.gid_tbl_len; - resp.port_cap_flags = attr.port_cap_flags; + resp.port_cap_flags = make_port_cap_flags(&attr); resp.max_msg_sz = attr.max_msg_sz; resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; + if (rdma_is_grh_required(ib_dev, cmd.port_num)) + resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED; + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); @@ -322,7 +344,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_PD, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -366,20 +388,12 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; - struct ib_uobject *uobj; - int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file, + in_len); } struct xrcd_table_entry { @@ -517,8 +531,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, - file->ucontext); + obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_tree_mutex_unlock; @@ -596,27 +609,21 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_close_xrcd cmd; - struct ib_uobject *uobj; - int ret = 0; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file, + in_len); } -int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why) { struct inode *inode; int ret; + struct ib_uverbs_device *dev = uobject->context->ufile->device; inode = xrcd->inode; if (inode && !atomic_dec_and_test(&xrcd->usecnt)) @@ -624,9 +631,12 @@ int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, ret = ib_dealloc_xrcd(xrcd); - if (why == RDMA_REMOVE_DESTROY && ret) + if (ib_is_destroy_retryable(ret, why, uobject)) { atomic_inc(&xrcd->usecnt); - else if (inode) + return ret; + } + + if (inode) xrcd_table_delete(dev, inode); return ret; @@ -663,11 +673,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MR, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err_free; @@ -759,8 +769,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -778,7 +787,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, } if (cmd.flags & IB_MR_REREG_PD) { - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, + file); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -824,20 +834,12 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_dereg_mr cmd; - struct ib_uobject *uobj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file, + in_len); } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, @@ -859,11 +861,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MW, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err_free; @@ -916,19 +918,12 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_dealloc_mw cmd; - struct ib_uobject *uobj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file, + in_len); } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, @@ -947,14 +942,14 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); resp.fd = uobj->id; ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, - uobj_file.uobj); + uobj); ib_uverbs_init_event_queue(&ev_file->ev_queue); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { @@ -992,14 +987,12 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, - file->ucontext); + obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file); if (IS_ERR(obj)) return obj; if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, - file->ucontext); + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file); if (IS_ERR(ev_file)) { ret = PTR_ERR(ev_file); goto err; @@ -1007,7 +1000,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, } obj->uobject.user_handle = cmd->user_handle; - obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); @@ -1181,7 +1173,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1246,7 +1238,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1293,7 +1285,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1320,8 +1312,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -1379,8 +1370,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, - file->ucontext); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file); if (IS_ERR(obj)) return PTR_ERR(obj); obj->uxrcd = NULL; @@ -1390,9 +1380,9 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { - ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL, - cmd->rwq_ind_tbl_handle, - file->ucontext); + ind_tbl = uobj_get_obj_read(rwq_ind_table, + UVERBS_OBJECT_RWQ_IND_TBL, + cmd->rwq_ind_tbl_handle, file); if (!ind_tbl) { ret = -EINVAL; goto err_put; @@ -1418,7 +1408,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle, - file->ucontext); + file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; @@ -1437,8 +1427,8 @@ static int create_qp(struct ib_uverbs_file *file, cmd->max_recv_sge = 0; } else { if (cmd->is_srq) { - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle, - file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, + cmd->srq_handle, file); if (!srq || srq->srq_type == IB_SRQT_XRC) { ret = -EINVAL; goto err_put; @@ -1447,8 +1437,9 @@ static int create_qp(struct ib_uverbs_file *file, if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle, - file->ucontext); + rcq = uobj_get_obj_read( + cq, UVERBS_OBJECT_CQ, + cmd->recv_cq_handle, file); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1458,11 +1449,12 @@ static int create_qp(struct ib_uverbs_file *file, } if (has_sq) - scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, - file->ucontext); + scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->send_cq_handle, file); if (!ind_tbl) rcq = rcq ?: scq; - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, + file); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1759,13 +1751,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, - file->ucontext); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file); if (IS_ERR(obj)) return PTR_ERR(obj); - xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, - file->ucontext); + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err_put; @@ -1867,7 +1857,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) { ret = -EINVAL; goto out; @@ -1968,11 +1958,11 @@ static int modify_qp(struct ib_uverbs_file *file, struct ib_qp *qp; int ret; - attr = kmalloc(sizeof *attr, GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file); if (!qp) { ret = -EINVAL; goto out; @@ -2127,8 +2117,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); - uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -2193,7 +2182,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (!user_wr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) goto out; @@ -2229,8 +2218,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah, - file->ucontext); + ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, + user_wr->wr.ud.ah, file); if (!ud->ah) { kfree(ud); ret = -EINVAL; @@ -2464,7 +2453,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) goto out; @@ -2513,7 +2502,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) goto out; @@ -2552,7 +2541,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; struct ib_ah *ah; - struct rdma_ah_attr attr; + struct rdma_ah_attr attr = {}; int ret; struct ib_udata udata; @@ -2570,11 +2559,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_AH, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err; @@ -2636,19 +2625,12 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; - struct ib_uobject *uobj; - int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file, + in_len); } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, @@ -2665,7 +2647,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) return -EINVAL; @@ -2716,7 +2698,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) return -EINVAL; @@ -2761,29 +2743,27 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) resources = kzalloc(sizeof(*resources), GFP_KERNEL); if (!resources) - goto err_res; + return NULL; + + if (!num_specs) + goto out; resources->counters = kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL); - - if (!resources->counters) - goto err_cnt; - resources->collection = kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL); - if (!resources->collection) - goto err_collection; + if (!resources->counters || !resources->collection) + goto err; +out: resources->max = num_specs; - return resources; -err_collection: +err: kfree(resources->counters); -err_cnt: kfree(resources); -err_res: + return NULL; } @@ -2826,7 +2806,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res, uflow_res->num++; } -static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, +static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -2855,7 +2835,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, ib_spec->action.act = uobj_get_obj_read(flow_action, UVERBS_OBJECT_FLOW_ACTION, kern_spec->action.handle, - ucontext); + ufile); if (!ib_spec->action.act) return -EINVAL; ib_spec->action.size = @@ -2873,7 +2853,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, uobj_get_obj_read(counters, UVERBS_OBJECT_COUNTERS, kern_spec->flow_count.handle, - ucontext); + ufile); if (!ib_spec->flow_count.counters) return -EINVAL; ib_spec->flow_count.size = @@ -3042,9 +3022,6 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, void *kern_spec_mask; void *kern_spec_val; - if (kern_spec->reserved) - return -EINVAL; - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); kern_spec_val = (void *)kern_spec + @@ -3057,7 +3034,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, kern_filter_sz, ib_spec); } -static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, +static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -3066,7 +3043,7 @@ static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, return -EINVAL; if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) - return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec, + return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec, uflow_res); else return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); @@ -3109,18 +3086,17 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, - file->ucontext); + obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file); if (IS_ERR(obj)) return PTR_ERR(obj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3224,8 +3200,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3275,7 +3250,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file); if (!wq) return -EINVAL; @@ -3369,8 +3344,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs], - file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, + wqs_handles[num_read_wqs], file); if (!wq) { err = -EINVAL; goto put_wqs; @@ -3379,7 +3354,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto put_wqs; @@ -3445,7 +3420,6 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, struct ib_udata *uhw) { struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - struct ib_uobject *uobj; int ret; size_t required_cmd_sz; @@ -3466,12 +3440,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - return uobj_remove_commit(uobj); + return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL, + cmd.ind_tbl_handle, file, 0); } int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, @@ -3547,18 +3517,23 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_flow_attr = &cmd.flow_attr; } - uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto err_free_attr; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) { err = -EINVAL; goto err_uobj; } + if (!qp->device->create_flow) { + err = -EOPNOTSUPP; + goto err_put; + } + flow_attr = kzalloc(struct_size(flow_attr, flows, cmd.flow_attr.num_of_specs), GFP_KERNEL); if (!flow_attr) { @@ -3584,8 +3559,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && cmd.flow_attr.size >= ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { - err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec, - uflow_res); + err = kern_spec_to_ib_spec(file, kern_spec, ib_spec, uflow_res); if (err) goto err_free; @@ -3631,7 +3605,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kfree(kern_flow_attr); return 0; err_copy: - ib_destroy_flow(flow_id); + if (!qp->device->destroy_flow(flow_id)) + atomic_dec(&qp->usecnt); err_free: ib_uverbs_flow_resources_free(uflow_res); err_free_flow_attr: @@ -3652,7 +3627,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, struct ib_udata *uhw) { struct ib_uverbs_destroy_flow cmd; - struct ib_uobject *uobj; int ret; if (ucore->inlen < sizeof(cmd)) @@ -3665,13 +3639,8 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret; + return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file, + 0); } static int __uverbs_create_xsrq(struct ib_uverbs_file *file, @@ -3687,8 +3656,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_srq_init_attr attr; int ret; - obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, - file->ucontext); + obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3697,7 +3665,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (cmd->srq_type == IB_SRQT_XRC) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle, - file->ucontext); + file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err; @@ -3714,15 +3682,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, } if (ib_srq_has_cq(cmd->srq_type)) { - attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle, - file->ucontext); + attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->cq_handle, file); if (!attr.ext.cq) { ret = -EINVAL; goto err_put_xrcd; } } - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file); if (!pd) { ret = -EINVAL; goto err_put_cq; @@ -3894,7 +3862,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) return -EINVAL; @@ -3925,7 +3893,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) return -EINVAL; @@ -3962,8 +3930,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -4141,7 +4108,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (cmd.attr_mask > IB_CQ_MODERATE) return -EOPNOTSUPP; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8d32c4ae368c..d3bf82cfaa2b 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -46,8 +46,7 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 0, uattr->len - len); } -static int uverbs_process_attr(struct ib_device *ibdev, - struct ib_ucontext *ucontext, +static int uverbs_process_attr(struct ib_uverbs_file *ufile, const struct ib_uverbs_attr *uattr, u16 attr_id, const struct uverbs_attr_spec_hash *attr_spec_bucket, @@ -78,13 +77,13 @@ static int uverbs_process_attr(struct ib_device *ibdev, switch (spec->type) { case UVERBS_ATTR_TYPE_ENUM_IN: - if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems) + if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems) return -EOPNOTSUPP; if (uattr->attr_data.enum_data.reserved) return -EINVAL; - val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id]; + val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id]; /* Currently we only support PTR_IN based enums */ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN) @@ -98,25 +97,42 @@ static int uverbs_process_attr(struct ib_device *ibdev, * longer struct will fail here if used with an old kernel and * non-zero content, making ABI compat/discovery simpler. */ - if (uattr->len > val_spec->ptr.len && - val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO && - !uverbs_is_attr_cleared(uattr, val_spec->ptr.len)) + if (uattr->len > val_spec->u.ptr.len && + val_spec->zero_trailing && + !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len)) return -EOPNOTSUPP; /* fall through */ case UVERBS_ATTR_TYPE_PTR_OUT: - if (uattr->len < val_spec->ptr.min_len || - (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) && - uattr->len > val_spec->ptr.len)) + if (uattr->len < val_spec->u.ptr.min_len || + (!val_spec->zero_trailing && + uattr->len > val_spec->u.ptr.len)) return -EINVAL; if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN && uattr->attr_data.reserved) return -EINVAL; - e->ptr_attr.data = uattr->data; e->ptr_attr.len = uattr->len; e->ptr_attr.flags = uattr->flags; + + if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) { + void *p; + + p = kvmalloc(uattr->len, GFP_KERNEL); + if (!p) + return -ENOMEM; + + e->ptr_attr.ptr = p; + + if (copy_from_user(p, u64_to_user_ptr(uattr->data), + uattr->len)) { + kvfree(p); + return -EFAULT; + } + } else { + e->ptr_attr.data = uattr->data; + } break; case UVERBS_ATTR_TYPE_IDR: @@ -127,26 +143,25 @@ static int uverbs_process_attr(struct ib_device *ibdev, if (uattr->attr_data.reserved) return -EINVAL; - if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX) + if (uattr->len != 0 || !ufile->ucontext || + uattr->data > INT_MAX) return -EINVAL; o_attr = &e->obj_attr; - object = uverbs_get_object(ibdev, spec->obj.obj_type); + object = uverbs_get_object(ufile, spec->u.obj.obj_type); if (!object) return -EINVAL; - o_attr->type = object->type_attrs; - o_attr->id = (int)uattr->data; - o_attr->uobject = uverbs_get_uobject_from_context( - o_attr->type, - ucontext, - spec->obj.access, - o_attr->id); + o_attr->uobject = uverbs_get_uobject_from_file( + object->type_attrs, + ufile, + spec->u.obj.access, + (int)uattr->data); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); - if (spec->obj.access == UVERBS_ACCESS_NEW) { + if (spec->u.obj.access == UVERBS_ACCESS_NEW) { u64 id = o_attr->uobject->id; /* Copy the allocated id to the user-space */ @@ -167,8 +182,53 @@ static int uverbs_process_attr(struct ib_device *ibdev, return 0; } -static int uverbs_uattrs_process(struct ib_device *ibdev, - struct ib_ucontext *ucontext, +static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle, + struct uverbs_attr_spec_hash *const *spec_hash, + size_t num, bool commit) +{ + unsigned int i; + int ret = 0; + + for (i = 0; i < num; i++) { + struct uverbs_attr_bundle_hash *curr_bundle = + &attrs_bundle->hash[i]; + const struct uverbs_attr_spec_hash *curr_spec_bucket = + spec_hash[i]; + unsigned int j; + + if (!curr_spec_bucket) + continue; + + for (j = 0; j < curr_bundle->num_attrs; j++) { + struct uverbs_attr *attr; + const struct uverbs_attr_spec *spec; + + if (!uverbs_attr_is_valid_in_hash(curr_bundle, j)) + continue; + + attr = &curr_bundle->attrs[j]; + spec = &curr_spec_bucket->attrs[j]; + + if (spec->type == UVERBS_ATTR_TYPE_IDR || + spec->type == UVERBS_ATTR_TYPE_FD) { + int current_ret; + + current_ret = uverbs_finalize_object( + attr->obj_attr.uobject, + spec->u.obj.access, commit); + if (!ret) + ret = current_ret; + } else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN && + spec->alloc_and_copy && + !uverbs_attr_ptr_is_inline(attr)) { + kvfree(attr->ptr_attr.ptr); + } + } + } + return ret; +} + +static int uverbs_uattrs_process(struct ib_uverbs_file *ufile, const struct ib_uverbs_attr *uattrs, size_t num_uattrs, const struct uverbs_method_spec *method, @@ -185,12 +245,12 @@ static int uverbs_uattrs_process(struct ib_device *ibdev, struct uverbs_attr_spec_hash *attr_spec_bucket; ret = uverbs_ns_idx(&attr_id, method->num_buckets); - if (ret < 0) { + if (ret < 0 || !method->attr_buckets[ret]) { if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); + uverbs_finalize_attrs(attr_bundle, + method->attr_buckets, + num_given_buckets, + false); return ret; } continue; @@ -204,14 +264,14 @@ static int uverbs_uattrs_process(struct ib_device *ibdev, num_given_buckets = ret + 1; attr_spec_bucket = method->attr_buckets[ret]; - ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id, - attr_spec_bucket, &attr_bundle->hash[ret], - uattr_ptr++); + ret = uverbs_process_attr(ufile, uattr, attr_id, + attr_spec_bucket, + &attr_bundle->hash[ret], uattr_ptr++); if (ret) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); + uverbs_finalize_attrs(attr_bundle, + method->attr_buckets, + num_given_buckets, + false); return ret; } } @@ -228,6 +288,9 @@ static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *met struct uverbs_attr_spec_hash *attr_spec_bucket = method_spec->attr_buckets[i]; + if (!attr_spec_bucket) + continue; + if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask, attr_bundle->hash[i].valid_bitmap, attr_spec_bucket->num_attrs)) @@ -258,9 +321,8 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, int finalize_ret; int num_given_buckets; - num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs, - num_uattrs, method_spec, - attr_bundle, uattr_ptr); + num_given_buckets = uverbs_uattrs_process( + ufile, uattrs, num_uattrs, method_spec, attr_bundle, uattr_ptr); if (num_given_buckets <= 0) return -EINVAL; @@ -271,10 +333,10 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, ret = method_spec->handler(ibdev, ufile, attr_bundle); cleanup: - finalize_ret = uverbs_finalize_objects(attr_bundle, - method_spec->attr_buckets, - attr_bundle->num_buckets, - !ret); + finalize_ret = uverbs_finalize_attrs(attr_bundle, + method_spec->attr_buckets, + attr_bundle->num_buckets, + !ret); return ret ? ret : finalize_ret; } @@ -301,7 +363,7 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, if (hdr->driver_id != ib_dev->driver_id) return -EINVAL; - object_spec = uverbs_get_object(ib_dev, hdr->object_id); + object_spec = uverbs_get_object(file, hdr->object_id); if (!object_spec) return -EPROTONOSUPPORT; @@ -341,7 +403,12 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, * filled at a later stage (uverbs_process_attr) */ for (i = 0; i < method_spec->num_buckets; i++) { - unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; + unsigned int curr_num_attrs; + + if (!method_spec->attr_buckets[i]) + continue; + + curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr; curr_attr += curr_num_attrs; diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c index 6ceb672c4d46..f81aa888ce5c 100644 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -367,25 +367,25 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me memcpy(attr, &attr_defs[0]->attr, sizeof(*attr)); attr_obj_with_special_access = IS_ATTR_OBJECT(attr) && - (attr->obj.access == UVERBS_ACCESS_NEW || - attr->obj.access == UVERBS_ACCESS_DESTROY); + (attr->u.obj.access == UVERBS_ACCESS_NEW || + attr->u.obj.access == UVERBS_ACCESS_DESTROY); num_of_singularities += !!attr_obj_with_special_access; if (WARN(num_of_singularities > 1, "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n", min_id) || WARN(attr_obj_with_special_access && - !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY), + !attr->mandatory, "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n", min_id) || WARN(IS_ATTR_OBJECT(attr) && - attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, + attr->zero_trailing, "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n", min_id)) { res = -EINVAL; goto free; } - if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY) + if (attr->mandatory) set_bit(min_id, hash->mandatory_attrs_bitmask); min_id++; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 3ae2339dd27a..8425718bebbd 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -138,6 +138,12 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); +struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) +{ + return ufile->ucontext; +} +EXPORT_SYMBOL(ib_uverbs_get_ucontext); + int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; @@ -155,6 +161,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj) container_of(kobj, struct ib_uverbs_device, kobj); cleanup_srcu_struct(&dev->disassociate_srcu); + uverbs_free_spec_tree(dev->specs_root); kfree(dev); } @@ -184,7 +191,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file, } spin_unlock_irq(&ev_file->ev_queue.lock); - uverbs_uobject_put(&ev_file->uobj_file.uobj); + uverbs_uobject_put(&ev_file->uobj); } spin_lock_irq(&file->async_file->ev_queue.lock); @@ -220,12 +227,13 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, } } -static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, - struct ib_ucontext *context, - bool device_removed) +static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file, + bool device_removed) { + struct ib_ucontext *context = file->ucontext; + context->closing = 1; - uverbs_cleanup_ucontext(context, device_removed); + uverbs_cleanup_ufile(file, device_removed); put_pid(context->tgid); ib_rdmacg_uncharge(&context->cg_obj, context->device, @@ -338,7 +346,7 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, filp->private_data; return ib_uverbs_event_read(&comp_ev_file->ev_queue, - comp_ev_file->uobj_file.ufile, filp, + comp_ev_file->uobj.ufile, filp, buf, count, pos, sizeof(struct ib_uverbs_comp_event_desc)); } @@ -420,7 +428,9 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) { - struct ib_uverbs_completion_event_file *file = filp->private_data; + struct ib_uobject *uobj = filp->private_data; + struct ib_uverbs_completion_event_file *file = container_of( + uobj, struct ib_uverbs_completion_event_file, uobj); struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&file->ev_queue.lock); @@ -528,7 +538,7 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); - ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, + ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); } @@ -875,12 +885,14 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) file->device = dev; spin_lock_init(&file->idr_lock); idr_init(&file->idr); - file->ucontext = NULL; - file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); mutex_init(&file->cleanup_mutex); + mutex_init(&file->uobjects_lock); + INIT_LIST_HEAD(&file->uobjects); + init_rwsem(&file->cleanup_rwsem); + filp->private_data = file; kobject_get(&dev->kobj); list_add_tail(&file->list, &dev->uverbs_file_list); @@ -907,7 +919,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) mutex_lock(&file->cleanup_mutex); if (file->ucontext) { - ib_uverbs_cleanup_ucontext(file, file->ucontext, false); + ib_uverbs_cleanup_ufile(file, false); file->ucontext = NULL; } mutex_unlock(&file->cleanup_mutex); @@ -1061,7 +1073,7 @@ static void ib_uverbs_add_one(struct ib_device *device) if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; - if (!device->specs_root) { + if (!device->driver_specs_root) { const struct uverbs_object_tree_def *default_root[] = { uverbs_default_get_objects()}; @@ -1069,8 +1081,13 @@ static void ib_uverbs_add_one(struct ib_device *device) default_root); if (IS_ERR(uverbs_dev->specs_root)) goto err_class; - - device->specs_root = uverbs_dev->specs_root; + } else { + uverbs_dev->specs_root = device->driver_specs_root; + /* + * Take responsibility to free the specs allocated by the + * driver. + */ + device->driver_specs_root = NULL; } ib_set_client_data(device, &uverbs_client, uverbs_dev); @@ -1160,7 +1177,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, mutex_unlock(&file->cleanup_mutex); /* At this point ib_uverbs_close cannot be running - * ib_uverbs_cleanup_ucontext + * ib_uverbs_cleanup_ufile */ if (ucontext) { /* We must release the mutex before going ahead and @@ -1172,7 +1189,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, ib_uverbs_event_handler(&file->event_handler, &event); ib_uverbs_disassociate_ucontext(ucontext); mutex_lock(&file->cleanup_mutex); - ib_uverbs_cleanup_ucontext(file, ucontext, true); + ib_uverbs_cleanup_ufile(file, true); mutex_unlock(&file->cleanup_mutex); } @@ -1235,10 +1252,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); - if (uverbs_dev->specs_root) { - uverbs_free_spec_tree(uverbs_dev->specs_root); - device->specs_root = NULL; - } kobject_put(&uverbs_dev->kobj); } diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index bb372b4713a4..b8d715c68ca4 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst, /* TODO: No need to set this */ sa_path_set_dmac_zero(dst); - sa_path_set_ndev(dst, NULL); - sa_path_set_ifindex(dst, 0); } EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index b570acbd94af..718c8430d364 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -48,14 +48,17 @@ static int uverbs_free_ah(struct ib_uobject *uobject, static int uverbs_free_flow(struct ib_uobject *uobject, enum rdma_remove_reason why) { - int ret; struct ib_flow *flow = (struct ib_flow *)uobject->object; struct ib_uflow_object *uflow = container_of(uobject, struct ib_uflow_object, uobject); + struct ib_qp *qp = flow->qp; + int ret; - ret = ib_destroy_flow(flow); - if (!ret) + ret = qp->device->destroy_flow(flow); + if (!ret) { + atomic_dec(&qp->usecnt); ib_uverbs_flow_resources_free(uflow->resources); + } return ret; } @@ -74,6 +77,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject, container_of(uobject, struct ib_uqp_object, uevent.uobject); int ret; + /* + * If this is a user triggered destroy then do not allow destruction + * until the user cleans up all the mcast bindings. Unlike in other + * places we forcibly clean up the mcast attachments for !DESTROY + * because the mcast attaches are not ubojects and will not be + * destroyed by anything else during cleanup processing. + */ if (why == RDMA_REMOVE_DESTROY) { if (!list_empty(&uqp->mcast_list)) return -EBUSY; @@ -82,7 +92,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } ret = ib_destroy_qp(qp); - if (ret && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(ret, why, uobject)) return ret; if (uqp->uxrcd) @@ -100,8 +110,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, int ret; ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); - if (!ret || why != RDMA_REMOVE_DESTROY) - kfree(ind_tbl); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + kfree(ind_tbl); return ret; } @@ -114,8 +126,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject, int ret; ret = ib_destroy_wq(wq); - if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); return ret; } @@ -129,8 +143,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, int ret; ret = ib_destroy_srq(srq); - - if (ret && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(ret, why, uobject)) return ret; if (srq_type == IB_SRQT_XRC) { @@ -152,12 +165,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, container_of(uobject, struct ib_uxrcd_object, uobject); int ret; + ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); + if (ret) + return ret; + mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); - if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt)) - ret = -EBUSY; - else - ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device, - xrcd, why); + ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why); mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); return ret; @@ -167,20 +180,22 @@ static int uverbs_free_pd(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_pd *pd = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); + if (ret) + return ret; ib_dealloc_pd((struct ib_pd *)uobject->object); return 0; } -static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file, +static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, enum rdma_remove_reason why) { struct ib_uverbs_completion_event_file *comp_event_file = - container_of(uobj_file, struct ib_uverbs_completion_event_file, - uobj_file); + container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; spin_lock_irq(&event_queue->lock); @@ -200,18 +215,7 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev, { return 0; } - -/* - * This spec is used in order to pass information to the hardware driver in a - * legacy way. Every verb that could get driver specific data should get this - * spec. - */ -const struct uverbs_attr_def uverbs_uhw_compat_in = - UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); -const struct uverbs_attr_def uverbs_uhw_compat_out = - UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); +EXPORT_SYMBOL(uverbs_destroy_def_handler); void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) { @@ -245,65 +249,68 @@ void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) } } -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL, - &UVERBS_TYPE_ALLOC_FD(0, - sizeof(struct ib_uverbs_completion_event_file), - uverbs_hot_unplug_completion_event_file, - &uverbs_event_fops, - "[infinibandevent]", O_RDONLY)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), + uverbs_hot_unplug_completion_event_file, + &uverbs_event_fops, + "[infinibandevent]", + O_RDONLY)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, - uverbs_free_qp)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_QP, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0, - uverbs_free_srq)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_SRQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), + uverbs_free_srq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), - 0, uverbs_free_flow)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), + uverbs_free_flow)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0, - uverbs_free_wq)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_WQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0, - uverbs_free_xrcd)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_XRCD, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), + uverbs_free_xrcd)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, - /* 2 is used in order to free the PD after MRs */ - &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd)); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL); - -static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, - &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), - &UVERBS_OBJECT(UVERBS_OBJECT_PD), - &UVERBS_OBJECT(UVERBS_OBJECT_MR), - &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), - &UVERBS_OBJECT(UVERBS_OBJECT_CQ), - &UVERBS_OBJECT(UVERBS_OBJECT_QP), - &UVERBS_OBJECT(UVERBS_OBJECT_AH), - &UVERBS_OBJECT(UVERBS_OBJECT_MW), - &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), - &UVERBS_OBJECT(UVERBS_OBJECT_WQ), - &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), - &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), - &UVERBS_OBJECT(UVERBS_OBJECT_DM), - &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd)); + +DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE); + +DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, + &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), + &UVERBS_OBJECT(UVERBS_OBJECT_PD), + &UVERBS_OBJECT(UVERBS_OBJECT_MR), + &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), + &UVERBS_OBJECT(UVERBS_OBJECT_CQ), + &UVERBS_OBJECT(UVERBS_OBJECT_QP), + &UVERBS_OBJECT(UVERBS_OBJECT_AH), + &UVERBS_OBJECT(UVERBS_OBJECT_MW), + &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), + &UVERBS_OBJECT(UVERBS_OBJECT_WQ), + &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), + &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), + &UVERBS_OBJECT(UVERBS_OBJECT_DM), + &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); const struct uverbs_object_tree_def *uverbs_default_get_objects(void) { diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 03b182a684a6..dfe59ad721f6 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -38,10 +38,11 @@ static int uverbs_free_counters(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_counters *counters = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && - atomic_read(&counters->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); + if (ret) + return ret; return counters->device->destroy_counters(counters); } @@ -123,35 +124,35 @@ err_read: return ret; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY, - uverbs_destroy_def_handler, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, - &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, - UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, - UVERBS_ATTR_TYPE(__u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_COUNTERS_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_READ, + UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, + UVERBS_ATTR_MIN_SIZE(0), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, + UVERBS_ATTR_TYPE(__u32), + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters), + UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); - diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 3d293d01afea..5a6154345fa0 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -44,12 +44,16 @@ static int uverbs_free_cq(struct ib_uobject *uobject, int ret; ret = ib_destroy_cq(cq); - if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ? - container_of(ev_queue, - struct ib_uverbs_completion_event_file, - ev_queue) : NULL, - ucq); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_ucq( + uobject->context->ufile, + ev_queue ? container_of(ev_queue, + struct ib_uverbs_completion_event_file, + ev_queue) : + NULL, + ucq); return ret; } @@ -57,7 +61,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct ib_ucontext *ucontext = file->ucontext; struct ib_ucq_object *obj; struct ib_udata uhw; int ret; @@ -67,7 +70,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_uobject *ev_file_uobj; - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) + if (!ib_dev->create_cq || !ib_dev->destroy_cq) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.comp_vector, attrs, @@ -90,11 +93,11 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, if (!IS_ERR(ev_file_uobj)) { ev_file = container_of(ev_file_uobj, struct ib_uverbs_completion_event_file, - uobj_file.uobj); + uobj); uverbs_uobject_get(ev_file_uobj); } - if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) { + if (attr.comp_vector >= file->device->num_comp_vectors) { ret = -EINVAL; goto err_event_file; } @@ -102,7 +105,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, obj = container_of(uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), typeof(*obj), uobject); - obj->uverbs_file = ucontext->ufile; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); @@ -111,7 +113,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, /* Temporary, only until drivers get the new uverbs_attr_bundle */ create_udata(attrs, &uhw); - cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); + cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, &uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_event_file; @@ -143,25 +145,30 @@ err_event_file: return ret; }; -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, - UVERBS_OBJECT_COMP_CHANNEL, - UVERBS_ACCESS_READ), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &uverbs_uhw_compat_in, &uverbs_uhw_compat_out); + UA_MANDATORY), + UVERBS_ATTR_UHW()); static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, struct ib_uverbs_file *file, @@ -178,9 +185,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, obj = container_of(uobj, struct ib_ucq_object, uobject); - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ)) - return -EOPNOTSUPP; - ret = rdma_explicit_destroy(uobj); if (ret) return ret; @@ -192,20 +196,22 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, sizeof(resp)); } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, - UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, - uverbs_free_cq), +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, + UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_CQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq), + #if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) - &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) + &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) #endif - ); - +); diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 8b681575b615..9e148e322523 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -37,9 +37,11 @@ static int uverbs_free_dm(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_dm *dm = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); + if (ret) + return ret; return dm->device->dealloc_dm(dm); } @@ -48,7 +50,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct ib_ucontext *ucontext = file->ucontext; struct ib_dm_alloc_attr attr = {}; struct ib_uobject *uobj; struct ib_dm *dm; @@ -69,7 +70,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject; - dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs); + dm = ib_dev->alloc_dm(ib_dev, file->ucontext, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); @@ -83,26 +84,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, return 0; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC, - &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE, - uverbs_destroy_def_handler, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, - UVERBS_OBJECT_DM, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_ALLOC, + UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_DM_FREE, + UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, - /* 1 is used in order to free the DM after MRs */ - &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm), + UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index a7be51cf2e42..c753a34cd984 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -37,10 +37,11 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_flow_action *action = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && - atomic_read(&action->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&action->usecnt, why, uobject); + if (ret) + return ret; return action->device->destroy_flow_action(action); } @@ -320,7 +321,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device return ret; /* No need to check as this attribute is marked as MANDATORY */ - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); + uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); if (IS_ERR(action)) return PTR_ERR(action); @@ -350,7 +352,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device if (ret) return ret; - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); + uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); action = uobj->object; if (action->type != IB_FLOW_ACTION_ESP) @@ -363,73 +366,84 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), - .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } }, + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT( + struct ib_uverbs_flow_action_esp_keymat_aes_gcm, + aes_key), }, }; static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - /* No need to specify any data */ - .len = 0, - } } + .type = UVERBS_ATTR_TYPE_PTR_IN, + /* No need to specify any data */ + UVERBS_ATTR_SIZE(0, 0), }, [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), - .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } } + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, + size), }, }; -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); - -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_WRITE, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); - -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY, - uverbs_destroy_def_handler, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); - +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, + UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_WRITE, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_FLOW_ACTION_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 68f7cadf088f..c1b9124d611e 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -115,33 +115,37 @@ err_dereg: return ret; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG, - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_MR_REG, + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR, - /* 1 is used in order to free the MR after all the MWs */ - &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr), - &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_MR, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), + &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 0b56828c1319..b6ceb6fd6a67 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -326,6 +326,153 @@ EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ +/** + * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination. + * @dest: Pointer to destination ah_attr. Contents of the destination + * pointer is assumed to be invalid and attribute are overwritten. + * @src: Pointer to source ah_attr. + */ +void rdma_copy_ah_attr(struct rdma_ah_attr *dest, + const struct rdma_ah_attr *src) +{ + *dest = *src; + if (dest->grh.sgid_attr) + rdma_hold_gid_attr(dest->grh.sgid_attr); +} +EXPORT_SYMBOL(rdma_copy_ah_attr); + +/** + * rdma_replace_ah_attr - Replace valid ah_attr with new new one. + * @old: Pointer to existing ah_attr which needs to be replaced. + * old is assumed to be valid or zero'd + * @new: Pointer to the new ah_attr. + * + * rdma_replace_ah_attr() first releases any reference in the old ah_attr if + * old the ah_attr is valid; after that it copies the new attribute and holds + * the reference to the replaced ah_attr. + */ +void rdma_replace_ah_attr(struct rdma_ah_attr *old, + const struct rdma_ah_attr *new) +{ + rdma_destroy_ah_attr(old); + *old = *new; + if (old->grh.sgid_attr) + rdma_hold_gid_attr(old->grh.sgid_attr); +} +EXPORT_SYMBOL(rdma_replace_ah_attr); + +/** + * rdma_move_ah_attr - Move ah_attr pointed by source to destination. + * @dest: Pointer to destination ah_attr to copy to. + * dest is assumed to be valid or zero'd + * @src: Pointer to the new ah_attr. + * + * rdma_move_ah_attr() first releases any reference in the destination ah_attr + * if it is valid. This also transfers ownership of internal references from + * src to dest, making src invalid in the process. No new reference of the src + * ah_attr is taken. + */ +void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src) +{ + rdma_destroy_ah_attr(dest); + *dest = *src; + src->grh.sgid_attr = NULL; +} +EXPORT_SYMBOL(rdma_move_ah_attr); + +/* + * Validate that the rdma_ah_attr is valid for the device before passing it + * off to the driver. + */ +static int rdma_check_ah_attr(struct ib_device *device, + struct rdma_ah_attr *ah_attr) +{ + if (!rdma_is_port_valid(device, ah_attr->port_num)) + return -EINVAL; + + if ((rdma_is_grh_required(device, ah_attr->port_num) || + ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) && + !(ah_attr->ah_flags & IB_AH_GRH)) + return -EINVAL; + + if (ah_attr->grh.sgid_attr) { + /* + * Make sure the passed sgid_attr is consistent with the + * parameters + */ + if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index || + ah_attr->grh.sgid_attr->port_num != ah_attr->port_num) + return -EINVAL; + } + return 0; +} + +/* + * If the ah requires a GRH then ensure that sgid_attr pointer is filled in. + * On success the caller is responsible to call rdma_unfill_sgid_attr(). + */ +static int rdma_fill_sgid_attr(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr **old_sgid_attr) +{ + const struct ib_gid_attr *sgid_attr; + struct ib_global_route *grh; + int ret; + + *old_sgid_attr = ah_attr->grh.sgid_attr; + + ret = rdma_check_ah_attr(device, ah_attr); + if (ret) + return ret; + + if (!(ah_attr->ah_flags & IB_AH_GRH)) + return 0; + + grh = rdma_ah_retrieve_grh(ah_attr); + if (grh->sgid_attr) + return 0; + + sgid_attr = + rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); + + /* Move ownerhip of the kref into the ah_attr */ + grh->sgid_attr = sgid_attr; + return 0; +} + +static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *old_sgid_attr) +{ + /* + * Fill didn't change anything, the caller retains ownership of + * whatever it passed + */ + if (ah_attr->grh.sgid_attr == old_sgid_attr) + return; + + /* + * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller + * doesn't see any change in the rdma_ah_attr. If we get here + * old_sgid_attr is NULL. + */ + rdma_destroy_ah_attr(ah_attr); +} + +static const struct ib_gid_attr * +rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *old_attr) +{ + if (old_attr) + rdma_put_gid_attr(old_attr); + if (ah_attr->ah_flags & IB_AH_GRH) { + rdma_hold_gid_attr(ah_attr->grh.sgid_attr); + return ah_attr->grh.sgid_attr; + } + return NULL; +} + static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) @@ -339,15 +486,38 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->pd = pd; ah->uobject = NULL; ah->type = ah_attr->type; + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + atomic_inc(&pd->usecnt); } return ah; } +/** + * rdma_create_ah - Creates an address handle for the + * given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) { - return _rdma_create_ah(pd, ah_attr, NULL); + const struct ib_gid_attr *old_sgid_attr; + struct ib_ah *ah; + int ret; + + ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); + if (ret) + return ERR_PTR(ret); + + ah = _rdma_create_ah(pd, ah_attr, NULL); + + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ah; } EXPORT_SYMBOL(rdma_create_ah); @@ -368,15 +538,27 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) { + const struct ib_gid_attr *old_sgid_attr; + struct ib_ah *ah; int err; + err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); + if (err) + return ERR_PTR(err); + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { err = ib_resolve_eth_dmac(pd->device, ah_attr); - if (err) - return ERR_PTR(err); + if (err) { + ah = ERR_PTR(err); + goto out; + } } - return _rdma_create_ah(pd, ah_attr, udata); + ah = _rdma_create_ah(pd, ah_attr, udata); + +out: + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ah; } EXPORT_SYMBOL(rdma_create_user_ah); @@ -455,16 +637,16 @@ static bool find_gid_index(const union ib_gid *gid, return true; } -static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, - u16 vlan_id, const union ib_gid *sgid, - enum ib_gid_type gid_type, - u16 *gid_index) +static const struct ib_gid_attr * +get_sgid_attr_from_eth(struct ib_device *device, u8 port_num, + u16 vlan_id, const union ib_gid *sgid, + enum ib_gid_type gid_type) { struct find_gid_index_context context = {.vlan_id = vlan_id, .gid_type = gid_type}; - return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, - &context, gid_index); + return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index, + &context); } int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, @@ -508,39 +690,24 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); static int ib_resolve_unicast_gid_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { - struct ib_gid_attr sgid_attr; - struct ib_global_route *grh; + struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr); + const struct ib_gid_attr *sgid_attr = grh->sgid_attr; int hop_limit = 0xff; - union ib_gid sgid; - int ret; - - grh = rdma_ah_retrieve_grh(ah_attr); - - ret = ib_query_gid(device, - rdma_ah_get_port_num(ah_attr), - grh->sgid_index, - &sgid, &sgid_attr); - if (ret || !sgid_attr.ndev) { - if (!ret) - ret = -ENXIO; - return ret; - } + int ret = 0; /* If destination is link local and source GID is RoCEv1, * IP stack is not used. */ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) && - sgid_attr.gid_type == IB_GID_TYPE_ROCE) { + sgid_attr->gid_type == IB_GID_TYPE_ROCE) { rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, ah_attr->roce.dmac); - goto done; + return ret; } - ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, + ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, ah_attr->roce.dmac, - sgid_attr.ndev, &hop_limit); -done: - dev_put(sgid_attr.ndev); + sgid_attr->ndev, &hop_limit); grh->hop_limit = hop_limit; return ret; @@ -555,16 +722,18 @@ done: * as sgid and, sgid is used as dgid because sgid contains destinations * GID whom to respond to. * + * On success the caller is responsible to call rdma_destroy_ah_attr on the + * attr. */ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr) { u32 flow_class; - u16 gid_index; int ret; enum rdma_network_type net_type = RDMA_NETWORK_IB; enum ib_gid_type gid_type = IB_GID_TYPE_IB; + const struct ib_gid_attr *sgid_attr; int hoplimit = 0xff; union ib_gid dgid; union ib_gid sgid; @@ -595,72 +764,141 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; - ret = get_sgid_index_from_eth(device, port_num, - vlan_id, &dgid, - gid_type, &gid_index); - if (ret) - return ret; + sgid_attr = get_sgid_attr_from_eth(device, port_num, + vlan_id, &dgid, + gid_type); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); flow_class = be32_to_cpu(grh->version_tclass_flow); - rdma_ah_set_grh(ah_attr, &sgid, - flow_class & 0xFFFFF, - (u8)gid_index, hoplimit, - (flow_class >> 20) & 0xFF); - return ib_resolve_unicast_gid_dmac(device, ah_attr); + rdma_move_grh_sgid_attr(ah_attr, + &sgid, + flow_class & 0xFFFFF, + hoplimit, + (flow_class >> 20) & 0xFF, + sgid_attr); + + ret = ib_resolve_unicast_gid_dmac(device, ah_attr); + if (ret) + rdma_destroy_ah_attr(ah_attr); + + return ret; } else { rdma_ah_set_dlid(ah_attr, wc->slid); rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits); - if (wc->wc_flags & IB_WC_GRH) { - if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { - ret = ib_find_cached_gid_by_port(device, &dgid, - IB_GID_TYPE_IB, - port_num, NULL, - &gid_index); - if (ret) - return ret; - } else { - gid_index = 0; - } + if ((wc->wc_flags & IB_WC_GRH) == 0) + return 0; - flow_class = be32_to_cpu(grh->version_tclass_flow); - rdma_ah_set_grh(ah_attr, &sgid, + if (dgid.global.interface_id != + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { + sgid_attr = rdma_find_gid_by_port( + device, &dgid, IB_GID_TYPE_IB, port_num, NULL); + } else + sgid_attr = rdma_get_gid_attr(device, port_num, 0); + + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); + flow_class = be32_to_cpu(grh->version_tclass_flow); + rdma_move_grh_sgid_attr(ah_attr, + &sgid, flow_class & 0xFFFFF, - (u8)gid_index, hoplimit, - (flow_class >> 20) & 0xFF); - } + hoplimit, + (flow_class >> 20) & 0xFF, + sgid_attr); + return 0; } } EXPORT_SYMBOL(ib_init_ah_attr_from_wc); +/** + * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership + * of the reference + * + * @attr: Pointer to AH attribute structure + * @dgid: Destination GID + * @flow_label: Flow label + * @hop_limit: Hop limit + * @traffic_class: traffic class + * @sgid_attr: Pointer to SGID attribute + * + * This takes ownership of the sgid_attr reference. The caller must ensure + * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after + * calling this function. + */ +void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, + u32 flow_label, u8 hop_limit, u8 traffic_class, + const struct ib_gid_attr *sgid_attr) +{ + rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit, + traffic_class); + attr->grh.sgid_attr = sgid_attr; +} +EXPORT_SYMBOL(rdma_move_grh_sgid_attr); + +/** + * rdma_destroy_ah_attr - Release reference to SGID attribute of + * ah attribute. + * @ah_attr: Pointer to ah attribute + * + * Release reference to the SGID attribute of the ah attribute if it is + * non NULL. It is safe to call this multiple times, and safe to call it on + * a zero initialized ah_attr. + */ +void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr) +{ + if (ah_attr->grh.sgid_attr) { + rdma_put_gid_attr(ah_attr->grh.sgid_attr); + ah_attr->grh.sgid_attr = NULL; + } +} +EXPORT_SYMBOL(rdma_destroy_ah_attr); + struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u8 port_num) { struct rdma_ah_attr ah_attr; + struct ib_ah *ah; int ret; ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr); if (ret) return ERR_PTR(ret); - return rdma_create_ah(pd, &ah_attr); + ah = rdma_create_ah(pd, &ah_attr); + + rdma_destroy_ah_attr(&ah_attr); + return ah; } EXPORT_SYMBOL(ib_create_ah_from_wc); int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { + const struct ib_gid_attr *old_sgid_attr; + int ret; + if (ah->type != ah_attr->type) return -EINVAL; - return ah->device->modify_ah ? + ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr); + if (ret) + return ret; + + ret = ah->device->modify_ah ? ah->device->modify_ah(ah, ah_attr) : -EOPNOTSUPP; + + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr); + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ret; } EXPORT_SYMBOL(rdma_modify_ah); int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { + ah_attr->grh.sgid_attr = NULL; + return ah->device->query_ah ? ah->device->query_ah(ah, ah_attr) : -EOPNOTSUPP; @@ -669,13 +907,17 @@ EXPORT_SYMBOL(rdma_query_ah); int rdma_destroy_ah(struct ib_ah *ah) { + const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; int ret; pd = ah->pd; ret = ah->device->destroy_ah(ah); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + if (sgid_attr) + rdma_put_gid_attr(sgid_attr); + } return ret; } @@ -1290,16 +1532,19 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); +/** + * ib_resolve_eth_dmac - Resolve destination mac address + * @device: Device to consider + * @ah_attr: address handle attribute which describes the + * source and destination parameters + * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It + * returns 0 on success or appropriate error code. It initializes the + * necessary ah_attr fields when call is successful. + */ static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { - int ret = 0; - struct ib_global_route *grh; - - if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr))) - return -EINVAL; - - grh = rdma_ah_retrieve_grh(ah_attr); + int ret = 0; if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { @@ -1317,6 +1562,14 @@ static int ib_resolve_eth_dmac(struct ib_device *device, return ret; } +static bool is_qp_type_connected(const struct ib_qp *qp) +{ + return (qp->qp_type == IB_QPT_UC || + qp->qp_type == IB_QPT_RC || + qp->qp_type == IB_QPT_XRC_INI || + qp->qp_type == IB_QPT_XRC_TGT); +} + /** * IB core internal function to perform QP attributes modification. */ @@ -1324,8 +1577,53 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + const struct ib_gid_attr *old_sgid_attr_av; + const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; + if (attr_mask & IB_QP_AV) { + ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, + &old_sgid_attr_av); + if (ret) + return ret; + } + if (attr_mask & IB_QP_ALT_PATH) { + /* + * FIXME: This does not track the migration state, so if the + * user loads a new alternate path after the HW has migrated + * from primary->alternate we will keep the wrong + * references. This is OK for IB because the reference + * counting does not serve any functional purpose. + */ + ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr, + &old_sgid_attr_alt_av); + if (ret) + goto out_av; + + /* + * Today the core code can only handle alternate paths and APM + * for IB. Ban them in roce mode. + */ + if (!(rdma_protocol_ib(qp->device, + attr->alt_ah_attr.port_num) && + rdma_protocol_ib(qp->device, port))) { + ret = EINVAL; + goto out; + } + } + + /* + * If the user provided the qp_attr then we have to resolve it. Kernel + * users have to provide already resolved rdma_ah_attr's + */ + if (udata && (attr_mask & IB_QP_AV) && + attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && + is_qp_type_connected(qp)) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); + if (ret) + goto out; + } + if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n", @@ -1341,20 +1639,27 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } ret = ib_security_modify_qp(qp, attr, attr_mask, udata); - if (!ret && (attr_mask & IB_QP_PORT)) - qp->port = attr->port_num; + if (ret) + goto out; + if (attr_mask & IB_QP_PORT) + qp->port = attr->port_num; + if (attr_mask & IB_QP_AV) + qp->av_sgid_attr = + rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr); + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_path_sgid_attr = rdma_update_sgid_attr( + &attr->alt_ah_attr, qp->alt_path_sgid_attr); + +out: + if (attr_mask & IB_QP_ALT_PATH) + rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); +out_av: + if (attr_mask & IB_QP_AV) + rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); return ret; } -static bool is_qp_type_connected(const struct ib_qp *qp) -{ - return (qp->qp_type == IB_QPT_UC || - qp->qp_type == IB_QPT_RC || - qp->qp_type == IB_QPT_XRC_INI || - qp->qp_type == IB_QPT_XRC_TGT); -} - /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. * @ib_qp: The QP to modify. @@ -1369,17 +1674,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp) int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { - struct ib_qp *qp = ib_qp->real_qp; - int ret; - - if (attr_mask & IB_QP_AV && - attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && - is_qp_type_connected(qp)) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - return ret; - } - return _ib_modify_qp(qp, attr, attr_mask, udata); + return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata); } EXPORT_SYMBOL(ib_modify_qp_with_udata); @@ -1451,6 +1746,9 @@ int ib_query_qp(struct ib_qp *qp, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { + qp_attr->ah_attr.grh.sgid_attr = NULL; + qp_attr->alt_ah_attr.grh.sgid_attr = NULL; + return qp->device->query_qp ? qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -EOPNOTSUPP; @@ -1509,6 +1807,8 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp) int ib_destroy_qp(struct ib_qp *qp) { + const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr; + const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr; struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; @@ -1539,6 +1839,10 @@ int ib_destroy_qp(struct ib_qp *qp) rdma_restrack_del(&qp->res); ret = qp->device->destroy_qp(qp); if (!ret) { + if (alt_path_sgid_attr) + rdma_put_gid_attr(alt_path_sgid_attr); + if (av_sgid_attr) + rdma_put_gid_attr(av_sgid_attr); if (pd) atomic_dec(&pd->usecnt); if (scq) @@ -1975,35 +2279,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) } EXPORT_SYMBOL(ib_destroy_rwq_ind_table); -struct ib_flow *ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain) -{ - struct ib_flow *flow_id; - if (!qp->device->create_flow) - return ERR_PTR(-EOPNOTSUPP); - - flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL); - if (!IS_ERR(flow_id)) { - atomic_inc(&qp->usecnt); - flow_id->qp = qp; - } - return flow_id; -} -EXPORT_SYMBOL(ib_create_flow); - -int ib_destroy_flow(struct ib_flow *flow_id) -{ - int err; - struct ib_qp *qp = flow_id->qp; - - err = qp->device->destroy_flow(flow_id); - if (!err) - atomic_dec(&qp->usecnt); - return err; -} -EXPORT_SYMBOL(ib_destroy_flow); - int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index a76e206704d4..dd800d153aa2 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -166,7 +166,8 @@ int bnxt_re_query_device(struct ib_device *ibdev, | IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_MEM_MGT_EXTENSIONS; - ib_attr->max_sge = dev_attr->max_qp_sges; + ib_attr->max_send_sge = dev_attr->max_qp_sges; + ib_attr->max_recv_sge = dev_attr->max_qp_sges; ib_attr->max_sge_rd = dev_attr->max_qp_sges; ib_attr->max_cq = dev_attr->max_cq; ib_attr->max_cqe = dev_attr->max_cq_wqes; @@ -243,8 +244,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, port_attr->gid_tbl_len = dev_attr->max_sgid; port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP | - IB_PORT_IP_BASED_GIDS; + IB_PORT_VENDOR_CLASS_SUP; + port_attr->ip_gids = true; port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW; port_attr->bad_pkey_cntr = 0; @@ -364,8 +365,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) return rc; } -int bnxt_re_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) +int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context) { int rc; u32 tbl_idx = 0; @@ -377,7 +377,7 @@ int bnxt_re_add_gid(const union ib_gid *gid, if ((attr->ndev) && is_vlan_dev(attr->ndev)) vlan_id = vlan_dev_vlan_id(attr->ndev); - rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid, + rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid, rdev->qplib_res.netdev->dev_addr, vlan_id, true, &tbl_idx); if (rc == -EALREADY) { @@ -673,8 +673,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, int rc; u8 nw_type; - struct ib_gid_attr sgid_attr; - if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set"); return ERR_PTR(-EINVAL); @@ -705,20 +703,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, grh->dgid.raw) && !rdma_link_local_addr((struct in6_addr *) grh->dgid.raw)) { - union ib_gid sgid; + const struct ib_gid_attr *sgid_attr; - rc = ib_get_cached_gid(&rdev->ibdev, 1, - grh->sgid_index, &sgid, - &sgid_attr); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to query gid at index %d", - grh->sgid_index); - goto fail; - } - dev_put(sgid_attr.ndev); + sgid_attr = grh->sgid_attr; /* Get network header type for this GID */ - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4; @@ -1599,9 +1588,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; enum ib_qp_state curr_qp_state, new_qp_state; int rc, entries; - int status; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; unsigned int flags; u8 nw_type; @@ -1668,6 +1654,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, if (qp_attr_mask & IB_QP_AV) { const struct ib_global_route *grh = rdma_ah_read_grh(&qp_attr->ah_attr); + const struct ib_gid_attr *sgid_attr; qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID | CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL | @@ -1691,29 +1678,23 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, ether_addr_copy(qp->qplib_qp.ah.dmac, qp_attr->ah_attr.roce.dmac); - status = ib_get_cached_gid(&rdev->ibdev, 1, - grh->sgid_index, - &sgid, &sgid_attr); - if (!status) { - memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr, - ETH_ALEN); - dev_put(sgid_attr.ndev); - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, - &sgid); - switch (nw_type) { - case RDMA_NETWORK_IPV4: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; - break; - case RDMA_NETWORK_IPV6: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; - break; - default: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1; - break; - } + sgid_attr = qp_attr->ah_attr.grh.sgid_attr; + memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr, + ETH_ALEN); + nw_type = rdma_gid_attr_network_type(sgid_attr); + switch (nw_type) { + case RDMA_NETWORK_IPV4: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; + break; + case RDMA_NETWORK_IPV6: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; + break; + default: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1; + break; } } @@ -1899,15 +1880,13 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, struct bnxt_qplib_swqe *wqe, int payload_size) { - struct ib_device *ibdev = &qp->rdev->ibdev; struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah, ib_ah); struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah; + const struct ib_gid_attr *sgid_attr = ah->ib_ah.sgid_attr; struct bnxt_qplib_sge sge; - union ib_gid sgid; u8 nw_type; u16 ether_type; - struct ib_gid_attr sgid_attr; union ib_gid dgid; bool is_eth = false; bool is_vlan = false; @@ -1920,22 +1899,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); - rc = ib_get_cached_gid(ibdev, 1, - qplib_ah->host_sgid_index, &sgid, - &sgid_attr); - if (rc) { - dev_err(rdev_to_dev(qp->rdev), - "Failed to query gid at index %d", - qplib_ah->host_sgid_index); - return rc; - } - if (sgid_attr.ndev) { - if (is_vlan_dev(sgid_attr.ndev)) - vlan_id = vlan_dev_vlan_id(sgid_attr.ndev); - dev_put(sgid_attr.ndev); - } + if (is_vlan_dev(sgid_attr->ndev)) + vlan_id = vlan_dev_vlan_id(sgid_attr->ndev); /* Get network header type for this GID */ - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: nw_type = BNXT_RE_ROCEV2_IPV4_PACKET; @@ -1948,9 +1915,9 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, break; } memcpy(&dgid.raw, &qplib_ah->dgid, 16); - is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; + is_udp = sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; if (is_udp) { - if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { + if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) { ip_version = 4; ether_type = ETH_P_IP; } else { @@ -1983,9 +1950,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, } if (is_grh || (ip_version == 6)) { - memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid)); + memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid_attr->gid.raw, + sizeof(sgid_attr->gid)); memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data, - sizeof(sgid)); + sizeof(sgid_attr->gid)); qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit; } @@ -1995,7 +1963,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, qp->qp1_hdr.ip4.frag_off = htons(IP_DF); qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit; - memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4); + memcpy(&qp->qp1_hdr.ip4.saddr, sgid_attr->gid.raw + 12, 4); memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4); qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr); } @@ -2441,7 +2409,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr, default: break; } - /* Fall thru to build the wqe */ + /* fall through */ case IB_WR_SEND_WITH_INV: rc = bnxt_re_build_send_wqe(qp, wr, &wqe); break; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 5c6414cad4af..bd04d40d897a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -158,8 +158,7 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str); int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey); int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context); -int bnxt_re_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context); +int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context); int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid); enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 50d8f1fc98d5..e426b990c1dd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2354,7 +2354,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, srq = qp->srq; if (!srq) return -EINVAL; - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); dev_err(&cq->hwq.pdev->dev, @@ -2369,7 +2369,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); dev_err(&cq->hwq.pdev->dev, @@ -2437,7 +2437,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, if (!srq) return -EINVAL; - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); dev_err(&cq->hwq.pdev->dev, @@ -2452,7 +2452,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); dev_err(&cq->hwq.pdev->dev, @@ -2546,7 +2546,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, "QPLIB: FP: SRQ used but not defined??"); return -EINVAL; } - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 "); dev_err(&cq->hwq.pdev->dev, @@ -2561,7 +2561,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); dev_err(&cq->hwq.pdev->dev, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 2f3f32eaa1d5..4097f3fa25c5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -197,7 +197,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, struct bnxt_qplib_gid *gid) { - if (index > sgid_tbl->max) { + if (index >= sgid_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded SGID table max (%d)", index, sgid_tbl->max); @@ -402,7 +402,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, *pkey = 0xFFFF; return 0; } - if (index > pkey_tbl->max) { + if (index >= pkey_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded PKEY table max (%d)", index, pkey_tbl->max); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index 0a8542c20804..a098c0140580 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -32,38 +32,16 @@ #include "iwch_provider.h" #include "iwch.h" -/* - * Get one cq entry from cxio and map it to openib. - * - * Returns: - * 0 EMPTY; - * 1 cqe returned - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct ib_wc *wc) +static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, + struct iwch_qp *qhp, struct ib_wc *wc) { - struct iwch_qp *qhp = NULL; - struct t3_cqe cqe, *rd_cqe; - struct t3_wq *wq; + struct t3_wq *wq = qhp ? &qhp->wq : NULL; + struct t3_cqe cqe; u32 credit = 0; u8 cqe_flushed; u64 cookie; int ret = 1; - rd_cqe = cxio_next_cqe(&chp->cq); - - if (!rd_cqe) - return 0; - - qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); - if (!qhp) - wq = NULL; - else { - spin_lock(&qhp->lock); - wq = &(qhp->wq); - } ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); if (t3a_device(chp->rhp) && credit) { @@ -79,7 +57,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, ret = 1; wc->wr_id = cookie; - wc->qp = &qhp->ibqp; + wc->qp = qhp ? &qhp->ibqp : NULL; wc->vendor_err = CQE_STATUS(cqe); wc->wc_flags = 0; @@ -182,8 +160,38 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, } } out: - if (wq) + return ret; +} + +/* + * Get one cq entry from cxio and map it to openib. + * + * Returns: + * 0 EMPTY; + * 1 cqe returned + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, + struct ib_wc *wc) +{ + struct iwch_qp *qhp; + struct t3_cqe *rd_cqe; + int ret; + + rd_cqe = cxio_next_cqe(&chp->cq); + + if (!rd_cqe) + return 0; + + qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + ret = __iwch_poll_cq_one(rhp, chp, qhp, wc); spin_unlock(&qhp->lock); + } else { + ret = __iwch_poll_cq_one(rhp, chp, NULL, wc); + } return ret; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index be097c6723c0..68bc2f9a532f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1103,7 +1103,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->max_mr_size = dev->attr.max_mr_size; props->max_qp = dev->attr.max_qps; props->max_qp_wr = dev->attr.max_wrs; - props->max_sge = dev->attr.max_sge_per_wr; + props->max_send_sge = dev->attr.max_sge_per_wr; + props->max_recv_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0912fa026327..77243f7e17d5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3444,9 +3444,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) } insert_handle(dev, &dev->stid_idr, ep, ep->stid); - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) err = create_server4(dev, ep); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 2be2e1ac1b5f..a055f9f08e76 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -77,6 +77,10 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, int user = (uctx != &rdev->uctx); int ret; struct sk_buff *skb; + struct c4iw_ucontext *ucontext = NULL; + + if (user) + ucontext = container_of(uctx, struct c4iw_ucontext, uctx); cq->cqid = c4iw_get_cqid(rdev, uctx); if (!cq->cqid) { @@ -100,6 +104,16 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, dma_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, cq->memsize); + if (user && ucontext->is_32b_cqe) { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + (sizeof(*cq->queue) / 2)))->qp_err; + } else { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + sizeof(*cq->queue)))->qp_err; + } + /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + sizeof *res; @@ -132,7 +146,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_IQPCIECH_V(2) | FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | FW_RI_RES_WR_IQO_F | - FW_RI_RES_WR_IQESIZE_V(1)); + ((user && ucontext->is_32b_cqe) ? + FW_RI_RES_WR_IQESIZE_V(1) : + FW_RI_RES_WR_IQESIZE_V(2))); res->u.cq.iqsize = cpu_to_be16(cq->size); res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); @@ -668,43 +684,22 @@ skip_cqe: return ret; } -/* - * Get one cq entry from c4iw and map it to openib. - * - * Returns: - * 0 cqe returned - * -ENODATA EMPTY; - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, + struct ib_wc *wc) { - struct c4iw_qp *qhp = NULL; - struct t4_cqe uninitialized_var(cqe), *rd_cqe; - struct t4_wq *wq; + struct t4_cqe uninitialized_var(cqe); + struct t4_wq *wq = qhp ? &qhp->wq : NULL; u32 credit = 0; u8 cqe_flushed; u64 cookie = 0; int ret; - ret = t4_next_cqe(&chp->cq, &rd_cqe); - - if (ret) - return ret; - - qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); - if (!qhp) - wq = NULL; - else { - spin_lock(&qhp->lock); - wq = &(qhp->wq); - } ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); if (ret) goto out; wc->wr_id = cookie; - wc->qp = &qhp->ibqp; + wc->qp = qhp ? &qhp->ibqp : NULL; wc->vendor_err = CQE_STATUS(&cqe); wc->wc_flags = 0; @@ -819,8 +814,37 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) } } out: - if (wq) + return ret; +} + +/* + * Get one cq entry from c4iw and map it to openib. + * + * Returns: + * 0 cqe returned + * -ENODATA EMPTY; + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +{ + struct c4iw_qp *qhp = NULL; + struct t4_cqe *rd_cqe; + int ret; + + ret = t4_next_cqe(&chp->cq, &rd_cqe); + + if (ret) + return ret; + + qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + ret = __c4iw_poll_cq_one(chp, qhp, wc); spin_unlock(&qhp->lock); + } else { + ret = __c4iw_poll_cq_one(chp, NULL, wc); + } return ret; } @@ -876,6 +900,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int vector = attr->comp_vector; struct c4iw_dev *rhp; struct c4iw_cq *chp; + struct c4iw_create_cq ucmd; struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; int ret, wr_len; @@ -891,9 +916,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (vector >= rhp->rdev.lldi.nciq) return ERR_PTR(-EINVAL); + if (ib_context) { + ucontext = to_c4iw_ucontext(ib_context); + if (udata->inlen < sizeof(ucmd)) + ucontext->is_32b_cqe = 1; + } + chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) return ERR_PTR(-ENOMEM); + chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!chp->wr_waitp) { ret = -ENOMEM; @@ -908,9 +940,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, goto err_free_wr_wait; } - if (ib_context) - ucontext = to_c4iw_ucontext(ib_context); - /* account for the status page. */ entries++; @@ -934,13 +963,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (hwentries < 64) hwentries = 64; - memsize = hwentries * sizeof *chp->cq.queue; + memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ? + (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue)); /* * memsize must be a multiple of the page size if its a user cq. */ if (ucontext) memsize = roundup(memsize, PAGE_SIZE); + chp->cq.size = hwentries; chp->cq.memsize = memsize; chp->cq.vector = vector; @@ -971,6 +1002,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (!mm2) goto err_free_mm; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; @@ -980,9 +1012,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ucontext->key += PAGE_SIZE; uresp.gts_key = ucontext->key; ucontext->key += PAGE_SIZE; + /* communicate to the userspace that + * kernel driver supports 64B CQE + */ + uresp.flags |= C4IW_64B_CQE; + spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, - sizeof(uresp) - sizeof(uresp.reserved)); + ucontext->is_32b_cqe ? + sizeof(uresp) - sizeof(uresp.flags) : + sizeof(uresp)); if (ret) goto err_free_mm2; diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 3e9d8b277ab9..8741d23168f3 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -70,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - pr_debug("%016llx %016llx %016llx %016llx\n", + pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n", be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]), - be64_to_cpu(p[3])); + be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]), + be64_to_cpu(p[6]), be64_to_cpu(p[7])); /* * Ingress WRITE and READ_RESP errors provide diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 870649ff049c..8866bf992316 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -566,6 +566,7 @@ struct c4iw_ucontext { spinlock_t mmap_lock; struct list_head mmaps; struct kref kref; + bool is_32b_cqe; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 1feade8bb4b3..61b8bdb9423d 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -343,7 +343,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->max_mr_size = T4_MAX_MR_SIZE; props->max_qp = dev->rdev.lldi.vr->qp.size / 2; props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; - props->max_sge = T4_MAX_RECV_SGE; + props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE); + props->max_recv_sge = T4_MAX_RECV_SGE; props->max_sge_rd = 1; props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter; props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 8369c7c8de83..838a7dee48bd 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -179,9 +179,20 @@ struct t4_cqe { __be32 wrid_hi; __be32 wrid_low; } gen; + struct { + __be32 stag; + __be32 msn; + __be32 reserved; + __be32 abs_rqe_idx; + } srcqe; + struct { + __be64 imm_data; + } imm_data_rcqe; + u64 drain_cookie; + __be64 flits[3]; } u; - __be64 reserved; + __be64 reserved[3]; __be64 bits_type_ts; }; @@ -565,6 +576,7 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + u8 *qp_errp; unsigned long flags; }; @@ -698,12 +710,12 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe) static inline int t4_cq_in_error(struct t4_cq *cq) { - return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err; + return *cq->qp_errp; } static inline void t4_set_cq_in_error(struct t4_cq *cq) { - ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; + *cq->qp_errp = 1; } #endif diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 6deb101cdd43..2c19bf772451 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8143,8 +8143,15 @@ static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source) } } -/* +/** + * is_rcv_avail_int() - User receive context available IRQ handler + * @dd: valid dd + * @source: logical IRQ source (offset from IS_RCVAVAIL_START) + * * RX block receive available interrupt. Source is < 160. + * + * This is the general interrupt handler for user (PSM) receive contexts, + * and can only be used for non-threaded IRQs. */ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) { @@ -8154,12 +8161,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { - /* Check for non-user contexts, including vnic */ - if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic) - rcd->do_interrupt(rcd, 0); - else - handle_user_interrupt(rcd); - + handle_user_interrupt(rcd); hfi1_rcd_put(rcd); return; /* OK */ } @@ -8173,8 +8175,14 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) err_detail, source); } -/* +/** + * is_rcv_urgent_int() - User receive context urgent IRQ handler + * @dd: valid dd + * @source: logical IRQ source (ofse from IS_RCVURGENT_START) + * * RX block receive urgent interrupt. Source is < 160. + * + * NOTE: kernel receive contexts specifically do NOT enable this IRQ. */ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) { @@ -8184,11 +8192,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { - /* only pay attention to user urgent interrupts */ - if (source >= dd->first_dyn_alloc_ctxt && - !rcd->is_vnic) - handle_user_interrupt(rcd); - + handle_user_interrupt(rcd); hfi1_rcd_put(rcd); return; /* OK */ } @@ -8260,9 +8264,14 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source) dd_dev_err(dd, "invalid interrupt source %u\n", source); } -/* - * General interrupt handler. This is able to correctly handle - * all interrupts in case INTx is used. +/** + * gerneral_interrupt() - General interrupt handler + * @irq: MSIx IRQ vector + * @data: hfi1 devdata + * + * This is able to correctly handle all non-threaded interrupts. Receive + * context DATA IRQs are threaded and are not supported by this handler. + * */ static irqreturn_t general_interrupt(int irq, void *data) { @@ -10130,7 +10139,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << SEND_CTXT_CHECK_SLID_VALUE_SHIFT); - for (i = 0; i < dd->chip_send_contexts; i++) { + for (i = 0; i < chip_send_contexts(dd); i++) { hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x", i, (u32)sreg); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg); @@ -11857,7 +11866,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, * sequence numbers could land exactly on the same spot. * E.g. a rcd restart before the receive header wrapped. */ - memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); + memset(rcd->rcvhdrq, 0, rcvhdrq_size(rcd)); /* starting timeout */ rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr; @@ -11952,9 +11961,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; - rcd->rcvctrl = rcvctrl; hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); - write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl); + write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl); /* work around sticky RcvCtxtStatus.BlockedRHQFull */ if (did_enable && @@ -12042,7 +12050,7 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) } else if (entry->flags & CNTR_SDMA) { hfi1_cdbg(CNTR, "\t Per SDMA Engine\n"); - for (j = 0; j < dd->chip_sdma_engines; + for (j = 0; j < chip_sdma_engines(dd); j++) { val = entry->rw_cntr(entry, dd, j, @@ -12418,6 +12426,7 @@ static int init_cntrs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; const char *bit_type_32 = ",32"; const int bit_type_32_sz = strlen(bit_type_32); + u32 sdma_engines = chip_sdma_engines(dd); /* set up the stats timer; the add_timer is done at the end */ timer_setup(&dd->synth_stats_timer, update_synth_timer, 0); @@ -12450,7 +12459,7 @@ static int init_cntrs(struct hfi1_devdata *dd) } } else if (dev_cntrs[i].flags & CNTR_SDMA) { dev_cntrs[i].offset = dd->ndevcntrs; - for (j = 0; j < dd->chip_sdma_engines; j++) { + for (j = 0; j < sdma_engines; j++) { snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); sz += strlen(name); @@ -12507,7 +12516,7 @@ static int init_cntrs(struct hfi1_devdata *dd) *p++ = '\n'; } } else if (dev_cntrs[i].flags & CNTR_SDMA) { - for (j = 0; j < dd->chip_sdma_engines; j++) { + for (j = 0; j < sdma_engines; j++) { snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); memcpy(p, name, strlen(name)); @@ -13020,9 +13029,9 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0); - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0); write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0); @@ -13030,48 +13039,30 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); } -/* Move to pcie.c? */ -static void disable_intx(struct pci_dev *pdev) -{ - pci_intx(pdev, 0); -} - /** * hfi1_clean_up_interrupts() - Free all IRQ resources * @dd: valid device data data structure * - * Free the MSI or INTx IRQs and assoicated PCI resources, - * if they have been allocated. + * Free the MSIx and assoicated PCI resources, if they have been allocated. */ void hfi1_clean_up_interrupts(struct hfi1_devdata *dd) { int i; + struct hfi1_msix_entry *me = dd->msix_entries; /* remove irqs - must happen before disabling/turning off */ - if (dd->num_msix_entries) { - /* MSI-X */ - struct hfi1_msix_entry *me = dd->msix_entries; - - for (i = 0; i < dd->num_msix_entries; i++, me++) { - if (!me->arg) /* => no irq, no affinity */ - continue; - hfi1_put_irq_affinity(dd, me); - pci_free_irq(dd->pcidev, i, me->arg); - } - - /* clean structures */ - kfree(dd->msix_entries); - dd->msix_entries = NULL; - dd->num_msix_entries = 0; - } else { - /* INTx */ - if (dd->requested_intx_irq) { - pci_free_irq(dd->pcidev, 0, dd); - dd->requested_intx_irq = 0; - } - disable_intx(dd->pcidev); + for (i = 0; i < dd->num_msix_entries; i++, me++) { + if (!me->arg) /* => no irq, no affinity */ + continue; + hfi1_put_irq_affinity(dd, me); + pci_free_irq(dd->pcidev, i, me->arg); } + /* clean structures */ + kfree(dd->msix_entries); + dd->msix_entries = NULL; + dd->num_msix_entries = 0; + pci_free_irq_vectors(dd->pcidev); } @@ -13121,20 +13112,6 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd, msix_intr); } -static int request_intx_irq(struct hfi1_devdata *dd) -{ - int ret; - - ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd, - DRIVER_NAME "_%d", dd->unit); - if (ret) - dd_dev_err(dd, "unable to request INTx interrupt, err %d\n", - ret); - else - dd->requested_intx_irq = 1; - return ret; -} - static int request_msix_irqs(struct hfi1_devdata *dd) { int first_general, last_general; @@ -13253,11 +13230,6 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) { int i; - if (!dd->num_msix_entries) { - synchronize_irq(pci_irq_vector(dd->pcidev, 0)); - return; - } - for (i = 0; i < dd->vnic.num_ctxt; i++) { struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; @@ -13346,7 +13318,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd) { u32 total; int ret, request; - int single_interrupt = 0; /* we expect to have all the interrupts */ /* * Interrupt count: @@ -13363,17 +13334,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd) if (request < 0) { ret = request; goto fail; - } else if (request == 0) { - /* using INTx */ - /* dd->num_msix_entries already zero */ - single_interrupt = 1; - dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n"); - } else if (request < total) { - /* using MSI-X, with reduced interrupts */ - dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n", - total, request); - ret = -EINVAL; - goto fail; } else { dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), GFP_KERNEL); @@ -13394,10 +13354,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd) /* reset general handler mask, chip MSI-X mappings */ reset_interrupts(dd); - if (single_interrupt) - ret = request_intx_irq(dd); - else - ret = request_msix_irqs(dd); + ret = request_msix_irqs(dd); if (ret) goto fail; @@ -13429,6 +13386,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd) int qos_rmt_count; int user_rmt_reduced; u32 n_usr_ctxts; + u32 send_contexts = chip_send_contexts(dd); + u32 rcv_contexts = chip_rcv_contexts(dd); /* * Kernel receive contexts: @@ -13450,16 +13409,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * Every kernel receive context needs an ACK send context. * one send context is allocated for each VL{0-7} and VL15 */ - if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { + if (num_kernel_contexts > (send_contexts - num_vls - 1)) { dd_dev_err(dd, "Reducing # kernel rcv contexts to: %d, from %lu\n", - (int)(dd->chip_send_contexts - num_vls - 1), + send_contexts - num_vls - 1, num_kernel_contexts); - num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; + num_kernel_contexts = send_contexts - num_vls - 1; } /* Accommodate VNIC contexts if possible */ - if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) { + if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) { dd_dev_err(dd, "No receive contexts available for VNIC\n"); num_vnic_contexts = 0; } @@ -13477,13 +13436,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * Adjust the counts given a global max. */ - if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) { + if (total_contexts + n_usr_ctxts > rcv_contexts) { dd_dev_err(dd, "Reducing # user receive contexts to: %d, from %u\n", - (int)(dd->chip_rcv_contexts - total_contexts), + rcv_contexts - total_contexts, n_usr_ctxts); /* recalculate */ - n_usr_ctxts = dd->chip_rcv_contexts - total_contexts; + n_usr_ctxts = rcv_contexts - total_contexts; } /* each user context requires an entry in the RMT */ @@ -13509,7 +13468,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd->freectxts = n_usr_ctxts; dd_dev_info(dd, "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", - (int)dd->chip_rcv_contexts, + rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, dd->num_vnic_contexts, @@ -13527,7 +13486,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * contexts. */ dd->rcv_entries.group_size = RCV_INCREMENT; - ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size; + ngroups = chip_rcv_array_count(dd) / dd->rcv_entries.group_size; dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts; dd->rcv_entries.nctxt_extra = ngroups - (dd->num_rcv_contexts * dd->rcv_entries.ngroups); @@ -13552,7 +13511,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd_dev_info( dd, "send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n", - dd->chip_send_contexts, + send_contexts, dd->num_send_contexts, dd->sc_sizes[SC_KERNEL].count, dd->sc_sizes[SC_ACK].count, @@ -13610,7 +13569,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) write_csr(dd, CCE_INT_MAP + (8 * i), 0); /* SendCtxtCreditReturnAddr */ - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); /* PIO Send buffers */ @@ -13623,7 +13582,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* RcvHdrAddr */ /* RcvHdrTailAddr */ /* RcvTidFlowTable */ - for (i = 0; i < dd->chip_rcv_contexts; i++) { + for (i = 0; i < chip_rcv_contexts(dd); i++) { write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0); write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0); for (j = 0; j < RXE_NUM_TID_FLOWS; j++) @@ -13631,7 +13590,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) } /* RcvArray */ - for (i = 0; i < dd->chip_rcv_array_count; i++) + for (i = 0; i < chip_rcv_array_count(dd); i++) hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0); /* RcvQPMapTable */ @@ -13789,7 +13748,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0); for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++) write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0); - for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++) + for (i = 0; i < chip_send_contexts(dd) / NUM_CONTEXTS_PER_SET; i++) write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0); for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++) write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0); @@ -13817,7 +13776,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) /* * TXE Per-Context CSRs */ - for (i = 0; i < dd->chip_send_contexts; i++) { + for (i = 0; i < chip_send_contexts(dd); i++) { write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); @@ -13835,7 +13794,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) /* * TXE Per-SDMA CSRs */ - for (i = 0; i < dd->chip_sdma_engines; i++) { + for (i = 0; i < chip_sdma_engines(dd); i++) { write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* SEND_DMA_STATUS read-only */ write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0); @@ -13968,7 +13927,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd) /* * RXE Kernel and User Per-Context CSRs */ - for (i = 0; i < dd->chip_rcv_contexts; i++) { + for (i = 0; i < chip_rcv_contexts(dd); i++) { /* kernel */ write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0); /* RCV_CTXT_STATUS read-only */ @@ -14084,13 +14043,13 @@ static int init_chip(struct hfi1_devdata *dd) /* disable send contexts and SDMA engines */ write_csr(dd, SEND_CTRL, 0); - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* disable port (turn off RXE inbound traffic) and contexts */ write_csr(dd, RCV_CTRL, 0); - for (i = 0; i < dd->chip_rcv_contexts; i++) + for (i = 0; i < chip_rcv_contexts(dd); i++) write_csr(dd, RCV_CTXT_CTRL, 0); /* mask all interrupt sources */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) @@ -14709,9 +14668,9 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull); /* enable all per-context and per-SDMA engine errors */ - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull); /* set the local CU to AU mapping */ @@ -14979,11 +14938,13 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, "Functional simulator" }; struct pci_dev *parent = pdev->bus->self; + u32 sdma_engines; dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * sizeof(struct hfi1_pportdata)); if (IS_ERR(dd)) goto bail; + sdma_engines = chip_sdma_engines(dd); ppd = dd->pport; for (i = 0; i < dd->num_pports; i++, ppd++) { int vl; @@ -15081,11 +15042,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* give a reasonable active value, will be set on link up */ dd->pport->link_speed_active = OPA_LINK_SPEED_25G; - dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS); - dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS); - dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES); - dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE); - dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE); /* fix up link widths for emulation _p */ ppd = dd->pport; if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) { @@ -15096,11 +15052,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, OPA_LINK_WIDTH_1X; } /* insure num_vls isn't larger than number of sdma engines */ - if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) { + if (HFI1_CAP_IS_KSET(SDMA) && num_vls > sdma_engines) { dd_dev_err(dd, "num_vls %u too large, using %u VLs\n", - num_vls, dd->chip_sdma_engines); - num_vls = dd->chip_sdma_engines; - ppd->vls_supported = dd->chip_sdma_engines; + num_vls, sdma_engines); + num_vls = sdma_engines; + ppd->vls_supported = sdma_engines; ppd->vls_operational = ppd->vls_supported; } @@ -15216,13 +15172,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, */ aspm_init(dd); - dd->rcvhdrsize = DEFAULT_RCVHDRSIZE; - /* - * rcd[0] is guaranteed to be valid by this point. Also, all - * context are using the same value, as per the module parameter. - */ - dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32); - ret = init_pervl_scs(dd); if (ret) goto bail_cleanup; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index fdf389e46e19..36b04d6300e5 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -656,6 +656,36 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt, write_csr(dd, offset0 + (0x1000 * ctxt), value); } +static inline u32 chip_rcv_contexts(struct hfi1_devdata *dd) +{ + return read_csr(dd, RCV_CONTEXTS); +} + +static inline u32 chip_send_contexts(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_CONTEXTS); +} + +static inline u32 chip_sdma_engines(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_DMA_ENGINES); +} + +static inline u32 chip_pio_mem_size(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_PIO_MEM_SIZE); +} + +static inline u32 chip_sdma_mem_size(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_DMA_MEM_SIZE); +} + +static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd) +{ + return read_csr(dd, RCV_ARRAY_CNT); +} + u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, u32 dw_len); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 94dca95db04f..a41f85558312 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -208,25 +208,25 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, (offset * RCV_BUF_BLOCK_SIZE)); } -static inline void *hfi1_get_header(struct hfi1_devdata *dd, +static inline void *hfi1_get_header(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - return (void *)(rhf_addr - dd->rhf_offset + offset); + return (void *)(rhf_addr - rcd->rhf_offset + offset); } -static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, +static inline struct ib_header *hfi1_get_msgheader(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { - return (struct ib_header *)hfi1_get_header(dd, rhf_addr); + return (struct ib_header *)hfi1_get_header(rcd, rhf_addr); } static inline struct hfi1_16b_header - *hfi1_get_16B_header(struct hfi1_devdata *dd, + *hfi1_get_16B_header(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { - return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr); + return (struct hfi1_16b_header *)hfi1_get_header(rcd, rhf_addr); } /* @@ -591,13 +591,12 @@ static void __prescan_rxq(struct hfi1_packet *packet) init_ps_mdata(&mdata, packet); while (1) { - struct hfi1_devdata *dd = rcd->dd; struct hfi1_ibport *ibp = rcd_to_iport(rcd); __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + - dd->rhf_offset; + packet->rcd->rhf_offset; struct rvt_qp *qp; struct ib_header *hdr; - struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; int is_ecn = 0; @@ -612,7 +611,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - packet->hdr = hfi1_get_msgheader(dd, rhf_addr); + packet->hdr = hfi1_get_msgheader(packet->rcd, rhf_addr); hdr = packet->hdr; lnh = ib_get_lnh(hdr); @@ -718,7 +717,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) ret = check_max_packet(packet, thread); packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + - packet->rcd->dd->rhf_offset; + packet->rcd->rhf_offset; packet->rhf = rhf_to_cpu(packet->rhf_addr); return ret; @@ -757,7 +756,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) * crashing down. There is no need to eat another * comparison in this performance critical code. */ - packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet); + packet->rcd->rhf_rcv_function_map[packet->etype](packet); packet->numpkt++; /* Set up for the next packet */ @@ -768,7 +767,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) ret = check_max_packet(packet, thread); packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + - packet->rcd->dd->rhf_offset; + packet->rcd->rhf_offset; packet->rhf = rhf_to_cpu(packet->rhf_addr); return ret; @@ -949,12 +948,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, u8 sc = SC15_PACKET; if (etype == RHF_RCV_TYPE_IB) { - struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd, packet->rhf_addr); sc = hfi1_9B_get_sc5(hdr, packet->rhf); } else if (etype == RHF_RCV_TYPE_BYPASS) { struct hfi1_16b_header *hdr = hfi1_get_16B_header( - packet->rcd->dd, + packet->rcd, packet->rhf_addr); sc = hfi1_16B_get_sc(hdr); } @@ -1034,7 +1033,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) packet.rhqoff += packet.rsize; packet.rhf_addr = (__le32 *)rcd->rcvhdrq + packet.rhqoff + - dd->rhf_offset; + rcd->rhf_offset; packet.rhf = rhf_to_cpu(packet.rhf_addr); } else if (skip_pkt) { @@ -1384,7 +1383,7 @@ bail: static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) { packet->hdr = (struct hfi1_ib_message_header *) - hfi1_get_msgheader(packet->rcd->dd, + hfi1_get_msgheader(packet->rcd, packet->rhf_addr); packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; } @@ -1485,7 +1484,7 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) u8 l4; packet->hdr = (struct hfi1_16b_header *) - hfi1_get_16B_header(packet->rcd->dd, + hfi1_get_16B_header(packet->rcd, packet->rhf_addr); l4 = hfi1_16B_get_l4(packet->hdr); if (l4 == OPA_16B_L4_IB_LOCAL) { @@ -1575,7 +1574,7 @@ void handle_eflags(struct hfi1_packet *packet) * The following functions are called by the interrupt handler. They are type * specific handlers for each packet type. */ -int process_receive_ib(struct hfi1_packet *packet) +static int process_receive_ib(struct hfi1_packet *packet) { if (hfi1_setup_9B_packet(packet)) return RHF_RCV_CONTINUE; @@ -1607,7 +1606,7 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) return false; } -int process_receive_bypass(struct hfi1_packet *packet) +static int process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; @@ -1649,7 +1648,7 @@ int process_receive_bypass(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int process_receive_error(struct hfi1_packet *packet) +static int process_receive_error(struct hfi1_packet *packet) { /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ if (unlikely( @@ -1668,7 +1667,7 @@ int process_receive_error(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int kdeth_process_expected(struct hfi1_packet *packet) +static int kdeth_process_expected(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) @@ -1682,7 +1681,7 @@ int kdeth_process_expected(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int kdeth_process_eager(struct hfi1_packet *packet) +static int kdeth_process_eager(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) @@ -1695,7 +1694,7 @@ int kdeth_process_eager(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int process_receive_invalid(struct hfi1_packet *packet) +static int process_receive_invalid(struct hfi1_packet *packet) { dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", rhf_rcv_type(packet->rhf)); @@ -1719,9 +1718,8 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) init_ps_mdata(&mdata, &packet); while (1) { - struct hfi1_devdata *dd = rcd->dd; __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + - dd->rhf_offset; + rcd->rhf_offset; struct ib_header *hdr; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn; @@ -1738,7 +1736,7 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) if (etype > RHF_RCV_TYPE_IB) goto next; - packet.hdr = hfi1_get_msgheader(dd, rhf_addr); + packet.hdr = hfi1_get_msgheader(rcd, rhf_addr); hdr = packet.hdr; lnh = be16_to_cpu(hdr->lrh[0]) & 3; @@ -1760,3 +1758,14 @@ next: update_ps_mdata(&mdata, rcd); } } + +const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = { + [RHF_RCV_TYPE_EXPECTED] = kdeth_process_expected, + [RHF_RCV_TYPE_EAGER] = kdeth_process_eager, + [RHF_RCV_TYPE_IB] = process_receive_ib, + [RHF_RCV_TYPE_ERROR] = process_receive_error, + [RHF_RCV_TYPE_BYPASS] = process_receive_bypass, + [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, +}; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 0fc4aa9455c3..1fc75647e47b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -411,7 +411,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) mapio = 1; break; case RCV_HDRQ: - memlen = uctxt->rcvhdrq_size; + memlen = rcvhdrq_size(uctxt); memvirt = uctxt->rcvhdrq; break; case RCV_EGRBUF: { @@ -521,7 +521,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) break; case SUBCTXT_RCV_HDRQ: memaddr = (u64)uctxt->subctxt_rcvhdr_base; - memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; + memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt; flags |= VM_IO | VM_DONTEXPAND; vmf = 1; break; @@ -985,7 +985,11 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, * sub contexts. * This has to be done here so the rest of the sub-contexts find the * proper base context. + * NOTE: _set_bit() can be used here because the context creation is + * protected by the mutex (rather than the spin_lock), and will be the + * very first instance of this context. */ + __set_bit(0, uctxt->in_use_ctxts); if (uinfo->subctxt_cnt) init_subctxts(uctxt, uinfo); uctxt->userversion = uinfo->userversion; @@ -1040,7 +1044,7 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) return -ENOMEM; /* We can take the size of the RcvHdr Queue from the master */ - uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * + uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) * num_subctxts); if (!uctxt->subctxt_rcvhdr_base) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4ab8b5bfbed1..d9470317983f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -169,12 +169,6 @@ extern const struct pci_error_handlers hfi1_pci_err_handler; struct hfi1_opcode_stats_perctx; struct ctxt_eager_bufs { - ssize_t size; /* total size of eager buffers */ - u32 count; /* size of buffers array */ - u32 numbufs; /* number of buffers allocated */ - u32 alloced; /* number of rcvarray entries used */ - u32 rcvtid_size; /* size of each eager rcv tid */ - u32 threshold; /* head update threshold */ struct eager_buffer { void *addr; dma_addr_t dma; @@ -184,6 +178,12 @@ struct ctxt_eager_bufs { void *addr; dma_addr_t dma; } *rcvtids; + u32 size; /* total size of eager buffers */ + u32 rcvtid_size; /* size of each eager rcv tid */ + u16 count; /* size of buffers array */ + u16 numbufs; /* number of buffers allocated */ + u16 alloced; /* number of rcvarray entries used */ + u16 threshold; /* head update threshold */ }; struct exp_tid_set { @@ -191,43 +191,84 @@ struct exp_tid_set { u32 count; }; +typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); struct hfi1_ctxtdata { - /* shadow the ctxt's RcvCtrl register */ - u64 rcvctrl; /* rcvhdrq base, needs mmap before useful */ void *rcvhdrq; /* kernel virtual address where hdrqtail is updated */ volatile __le64 *rcvhdrtail_kvaddr; - /* when waiting for rcv or pioavail */ - wait_queue_head_t wait; - /* rcvhdrq size (for freeing) */ - size_t rcvhdrq_size; + /* so functions that need physical port can get it easily */ + struct hfi1_pportdata *ppd; + /* so file ops can get at unit */ + struct hfi1_devdata *dd; + /* this receive context's assigned PIO ACK send context */ + struct send_context *sc; + /* per context recv functions */ + const rhf_rcv_function_ptr *rhf_rcv_function_map; + /* + * The interrupt handler for a particular receive context can vary + * throughout it's lifetime. This is not a lock protected data member so + * it must be updated atomically and the prev and new value must always + * be valid. Worst case is we process an extra interrupt and up to 64 + * packets with the wrong interrupt handler. + */ + int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); + /* verbs rx_stats per rcd */ + struct hfi1_opcode_stats_perctx *opstats; + /* clear interrupt mask */ + u64 imask; + /* ctxt rcvhdrq head offset */ + u32 head; /* number of rcvhdrq entries */ u16 rcvhdrq_cnt; + u8 ireg; /* clear interrupt register */ + /* receive packet sequence counter */ + u8 seq_cnt; /* size of each of the rcvhdrq entries */ - u16 rcvhdrqentsize; + u8 rcvhdrqentsize; + /* offset of RHF within receive header entry */ + u8 rhf_offset; + /* dynamic receive available interrupt timeout */ + u8 rcvavail_timeout; + /* Indicates that this is vnic context */ + bool is_vnic; + /* vnic queue index this context is mapped to */ + u8 vnic_q_idx; + /* Is ASPM interrupt supported for this context */ + bool aspm_intr_supported; + /* ASPM state (enabled/disabled) for this context */ + bool aspm_enabled; + /* Is ASPM processing enabled for this context (in intr context) */ + bool aspm_intr_enable; + struct ctxt_eager_bufs egrbufs; + /* QPs waiting for context processing */ + struct list_head qp_wait_list; + /* tid allocation lists */ + struct exp_tid_set tid_group_list; + struct exp_tid_set tid_used_list; + struct exp_tid_set tid_full_list; + + /* Timer for re-enabling ASPM if interrupt activity quiets down */ + struct timer_list aspm_timer; + /* per-context configuration flags */ + unsigned long flags; + /* array of tid_groups */ + struct tid_group *groups; /* mmap of hdrq, must fit in 44 bits */ dma_addr_t rcvhdrq_dma; dma_addr_t rcvhdrqtailaddr_dma; - struct ctxt_eager_bufs egrbufs; - /* this receive context's assigned PIO ACK send context */ - struct send_context *sc; - - /* dynamic receive available interrupt timeout */ - u32 rcvavail_timeout; + /* Last interrupt timestamp */ + ktime_t aspm_ts_last_intr; + /* Last timestamp at which we scheduled a timer for this context */ + ktime_t aspm_ts_timer_sched; + /* Lock to serialize between intr, timer intr and user threads */ + spinlock_t aspm_lock; /* Reference count the base context usage */ struct kref kref; - - /* Device context index */ - u16 ctxt; - /* - * non-zero if ctxt can be shared, and defines the maximum number of - * sub-contexts for this device context. - */ - u16 subctxt_cnt; - /* non-zero if ctxt is being shared. */ - u16 subctxt_id; - u8 uuid[16]; + /* numa node of this context */ + int numa_id; + /* associated msix interrupt. */ + s16 msix_intr; /* job key */ u16 jkey; /* number of RcvArray groups for this context. */ @@ -238,87 +279,59 @@ struct hfi1_ctxtdata { u16 expected_count; /* index of first expected TID entry. */ u16 expected_base; - /* array of tid_groups */ - struct tid_group *groups; - - struct exp_tid_set tid_group_list; - struct exp_tid_set tid_used_list; - struct exp_tid_set tid_full_list; + /* Device context index */ + u8 ctxt; - /* lock protecting all Expected TID data of user contexts */ + /* PSM Specific fields */ + /* lock protecting all Expected TID data */ struct mutex exp_mutex; - /* per-context configuration flags */ - unsigned long flags; - /* per-context event flags for fileops/intr communication */ - unsigned long event_flags; - /* total number of polled urgent packets */ - u32 urgent; - /* saved total number of polled urgent packets for poll edge trigger */ - u32 urgent_poll; + /* when waiting for rcv or pioavail */ + wait_queue_head_t wait; + /* uuid from PSM */ + u8 uuid[16]; /* same size as task_struct .comm[], command that opened context */ char comm[TASK_COMM_LEN]; - /* so file ops can get at unit */ - struct hfi1_devdata *dd; - /* so functions that need physical port can get it easily */ - struct hfi1_pportdata *ppd; - /* associated msix interrupt */ - u32 msix_intr; + /* Bitmask of in use context(s) */ + DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); + /* per-context event flags for fileops/intr communication */ + unsigned long event_flags; /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ void *subctxt_uregbase; /* An array of pages for the eager receive buffers * N */ void *subctxt_rcvegrbuf; /* An array of pages for the eager header queue entries * N */ void *subctxt_rcvhdr_base; - /* Bitmask of in use context(s) */ - DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); - /* The version of the library which opened this ctxt */ - u32 userversion; + /* total number of polled urgent packets */ + u32 urgent; + /* saved total number of polled urgent packets for poll edge trigger */ + u32 urgent_poll; /* Type of packets or conditions we want to poll for */ u16 poll_type; - /* receive packet sequence counter */ - u8 seq_cnt; - /* ctxt rcvhdrq head offset */ - u32 head; - /* QPs waiting for context processing */ - struct list_head qp_wait_list; - /* interrupt handling */ - u64 imask; /* clear interrupt mask */ - int ireg; /* clear interrupt register */ - int numa_id; /* numa node of this context */ - /* verbs rx_stats per rcd */ - struct hfi1_opcode_stats_perctx *opstats; - - /* Is ASPM interrupt supported for this context */ - bool aspm_intr_supported; - /* ASPM state (enabled/disabled) for this context */ - bool aspm_enabled; - /* Timer for re-enabling ASPM if interrupt activity quietens down */ - struct timer_list aspm_timer; - /* Lock to serialize between intr, timer intr and user threads */ - spinlock_t aspm_lock; - /* Is ASPM processing enabled for this context (in intr context) */ - bool aspm_intr_enable; - /* Last interrupt timestamp */ - ktime_t aspm_ts_last_intr; - /* Last timestamp at which we scheduled a timer for this context */ - ktime_t aspm_ts_timer_sched; - + /* non-zero if ctxt is being shared. */ + u16 subctxt_id; + /* The version of the library which opened this ctxt */ + u32 userversion; /* - * The interrupt handler for a particular receive context can vary - * throughout it's lifetime. This is not a lock protected data member so - * it must be updated atomically and the prev and new value must always - * be valid. Worst case is we process an extra interrupt and up to 64 - * packets with the wrong interrupt handler. + * non-zero if ctxt can be shared, and defines the maximum number of + * sub-contexts for this device context. */ - int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); - - /* Indicates that this is vnic context */ - bool is_vnic; + u8 subctxt_cnt; - /* vnic queue index this context is mapped to */ - u8 vnic_q_idx; }; +/** + * rcvhdrq_size - return total size in bytes for header queue + * @rcd: the receive context + * + * rcvhdrqentsize is in DWs, so we have to convert to bytes + * + */ +static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd) +{ + return PAGE_ALIGN(rcd->rcvhdrq_cnt * + rcd->rcvhdrqentsize * sizeof(u32)); +} + /* * Represents a single packet at a high level. Put commonly computed things in * here so we do not have to keep doing them over and over. The rule of thumb is @@ -897,12 +910,11 @@ struct hfi1_pportdata { u64 vl_xmit_flit_cnt[C_VL_COUNT + 1]; }; -typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); - typedef void (*opcode_handler)(struct hfi1_packet *packet); typedef void (*hfi1_make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct rvt_swqe *wqe); +extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[]; /* return values for the RHF receive functions */ @@ -1046,8 +1058,6 @@ struct hfi1_devdata { dma_addr_t sdma_pad_phys; /* for deallocation */ size_t sdma_heads_size; - /* number from the chip */ - u32 chip_sdma_engines; /* num used */ u32 num_sdma; /* array of engines sized by num_sdma */ @@ -1102,8 +1112,6 @@ struct hfi1_devdata { /* base receive interrupt timeout, in CSR units */ u32 rcv_intr_timeout_csr; - u32 freezelen; /* max length of freezemsg */ - u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ spinlock_t uctxt_lock; /* protect rcd changes */ @@ -1130,25 +1138,6 @@ struct hfi1_devdata { /* Base GUID for device (network order) */ u64 base_guid; - /* these are the "32 bit" regs */ - - /* value we put in kr_rcvhdrsize */ - u32 rcvhdrsize; - /* number of receive contexts the chip supports */ - u32 chip_rcv_contexts; - /* number of receive array entries */ - u32 chip_rcv_array_count; - /* number of PIO send contexts the chip supports */ - u32 chip_send_contexts; - /* number of bytes in the PIO memory buffer */ - u32 chip_pio_mem_size; - /* number of bytes in the SDMA memory buffer */ - u32 chip_sdma_mem_size; - - /* size of each rcvegrbuffer */ - u32 rcvegrbufsize; - /* log2 of above */ - u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; u8 dc_shutdown; @@ -1221,9 +1210,6 @@ struct hfi1_devdata { u32 num_msix_entries; u32 first_dyn_msix_idx; - /* INTx information */ - u32 requested_intx_irq; /* did we request one? */ - /* general interrupt: mask of handled interrupts */ u64 gi_mask[CCE_NUM_INT_CSRS]; @@ -1289,8 +1275,6 @@ struct hfi1_devdata { u64 sw_cce_err_status_aggregate; /* Software counter that aggregates all bypass packet rcv errors */ u64 sw_rcv_bypass_packet_errors; - /* receive interrupt function */ - rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; /* Save the enabled LCB error bits */ u64 lcb_err_en; @@ -1329,10 +1313,7 @@ struct hfi1_devdata { /* seqlock for sc2vl */ seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; u64 sc2vl[4]; - /* receive interrupt functions */ - rhf_rcv_function_ptr *rhf_rcv_function_map; u64 __percpu *rcv_limit; - u16 rhf_offset; /* offset of RHF within receive header entry */ /* adding a new field here would make it part of this cacheline */ /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ @@ -1471,7 +1452,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, /* calculate the current RHF address */ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) { - return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset; + return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset; } int hfi1_reset_device(int); @@ -2021,12 +2002,6 @@ static inline void flush_wc(void) } void handle_eflags(struct hfi1_packet *packet); -int process_receive_ib(struct hfi1_packet *packet); -int process_receive_bypass(struct hfi1_packet *packet); -int process_receive_error(struct hfi1_packet *packet); -int kdeth_process_expected(struct hfi1_packet *packet); -int kdeth_process_eager(struct hfi1_packet *packet); -int process_receive_invalid(struct hfi1_packet *packet); void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd); /* global module parameter variables */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f110842b91f5..758d273c32cf 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -364,9 +364,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, hfi1_exp_tid_group_init(rcd); rcd->ppd = ppd; rcd->dd = dd; - __set_bit(0, rcd->in_use_ctxts); rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; + rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; mutex_init(&rcd->exp_mutex); @@ -404,6 +404,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->rcvhdrq_cnt = rcvhdrcnt; rcd->rcvhdrqentsize = hfi1_hdrq_entsize; + rcd->rhf_offset = + rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32); /* * Simple Eager buffer allocation: we have already pre-allocated * the number of RcvArray entry groups. Each ctxtdata structure @@ -853,24 +855,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) struct hfi1_ctxtdata *rcd; struct hfi1_pportdata *ppd; - /* Set up recv low level handlers */ - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EXPECTED] = - kdeth_process_expected; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EAGER] = - kdeth_process_eager; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_IB] = process_receive_ib; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_ERROR] = - process_receive_error; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_BYPASS] = - process_receive_bypass; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID5] = - process_receive_invalid; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID6] = - process_receive_invalid; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID7] = - process_receive_invalid; - dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions; - /* Set up send low level handlers */ dd->process_pio_send = hfi1_verbs_send_pio; dd->process_dma_send = hfi1_verbs_send_dma; @@ -936,7 +920,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) } /* Allocate enough memory for user event notification. */ - len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * + len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS * sizeof(*dd->events)); dd->events = vmalloc_user(len); if (!dd->events) @@ -948,9 +932,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) dd->status = vmalloc_user(PAGE_SIZE); if (!dd->status) dd_dev_err(dd, "Failed to allocate dev status page\n"); - else - dd->freezelen = PAGE_SIZE - (sizeof(*dd->status) - - sizeof(dd->status->freezemsg)); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; if (dd->status) @@ -1144,7 +1125,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) return; if (rcd->rcvhdrq) { - dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, + dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd), rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; if (rcd->rcvhdrtail_kvaddr) { @@ -1855,12 +1836,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (!rcd->rcvhdrq) { gfp_t gfp_flags; - /* - * rcvhdrqentsize is in DWs, so we have to convert to bytes - * (* sizeof(u32)). - */ - amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * - sizeof(u32)); + amt = rcvhdrq_size(rcd); if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic) gfp_flags = GFP_KERNEL; @@ -1885,8 +1861,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (!rcd->rcvhdrtail_kvaddr) goto bail_free; } - - rcd->rcvhdrq_size = amt; } /* * These values are per-context: @@ -1902,7 +1876,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg); - reg = (dd->rcvhdrsize & RCV_HDR_SIZE_HDR_SIZE_MASK) + reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) << RCV_HDR_SIZE_HDR_SIZE_SHIFT; write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg); @@ -1938,9 +1912,9 @@ bail: int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) { struct hfi1_devdata *dd = rcd->dd; - u32 max_entries, egrtop, alloced_bytes = 0, idx = 0; + u32 max_entries, egrtop, alloced_bytes = 0; gfp_t gfp_flags; - u16 order; + u16 order, idx = 0; int ret = 0; u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 4d4371bf2c7c..de3ee606034c 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -157,6 +157,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) unsigned long len; resource_size_t addr; int ret = 0; + u32 rcv_array_count; addr = pci_resource_start(pdev, 0); len = pci_resource_len(pdev, 0); @@ -186,9 +187,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) goto nomem; } - dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); - dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); - dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; + rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); + dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count); + dd->base2_start = RCV_ARRAY + rcv_array_count * 8; dd->kregbase2 = ioremap_nocache( addr + dd->base2_start, @@ -214,13 +215,13 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) * to write an entire cacheline worth of entries in one shot. */ dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY, - dd->chip_rcv_array_count * 8); + rcv_array_count * 8); if (!dd->rcvarray_wc) { dd_dev_err(dd, "WC mapping of receive array failed\n"); goto nomem; } dd_dev_info(dd, "WC RcvArray: %p for %x\n", - dd->rcvarray_wc, dd->chip_rcv_array_count * 8); + dd->rcvarray_wc, rcv_array_count * 8); dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; @@ -346,15 +347,13 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * Returns: * - actual number of interrupts allocated or - * - 0 if fell back to INTx. * - error */ int request_msix(struct hfi1_devdata *dd, u32 msireq) { int nvec; - nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, - PCI_IRQ_MSIX | PCI_IRQ_LEGACY); + nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX); if (nvec < 0) { dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); return nvec; @@ -362,10 +361,6 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) tune_pcie_caps(dd); - /* check for legacy IRQ */ - if (nvec == 1 && !dd->pcidev->msix_enabled) - return 0; - return nvec; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 9cac15d10c4f..c2c1cba5b23b 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -226,7 +226,7 @@ static const char *sc_type_name(int index) int init_sc_pools_and_sizes(struct hfi1_devdata *dd) { struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; - int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1; + int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1; int total_contexts = 0; int fixed_blocks; int pool_blocks; @@ -343,8 +343,8 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd) sc_type_name(i), count); return -EINVAL; } - if (total_contexts + count > dd->chip_send_contexts) - count = dd->chip_send_contexts - total_contexts; + if (total_contexts + count > chip_send_contexts(dd)) + count = chip_send_contexts(dd) - total_contexts; total_contexts += count; @@ -507,7 +507,7 @@ static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index, if (sci->type == type && sci->allocated == 0) { sci->allocated = 1; /* use a 1:1 mapping, but make them non-equal */ - context = dd->chip_send_contexts - index - 1; + context = chip_send_contexts(dd) - index - 1; dd->hw_to_sw[context] = index; *sw_index = index; *hw_context = context; @@ -1618,11 +1618,11 @@ static void sc_piobufavail(struct send_context *sc) /* Wake up the most starved one first */ if (n) hfi1_qp_wakeup(qps[max_idx], - RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); for (i = 0; i < n; i++) if (i != max_idx) hfi1_qp_wakeup(qps[i], - RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1697d96151bd..9b1e84a6b1cc 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -273,7 +273,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PATH_MIG_STATE && attr->path_mig_state == IB_MIG_MIGRATED && qp->s_mig_state == IB_MIG_ARMED) { - qp->s_flags |= RVT_S_AHG_CLEAR; + qp->s_flags |= HFI1_S_AHG_CLEAR; priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); @@ -717,7 +717,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp) qp->remote_ah_attr = qp->alt_ah_attr; qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr); qp->s_pkey_index = qp->s_alt_pkey_index; - qp->s_flags |= RVT_S_AHG_CLEAR; + qp->s_flags |= HFI1_S_AHG_CLEAR; priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); qp_set_16b(qp); diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index b2d4cba8d15b..078cff7560b6 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -1,7 +1,7 @@ #ifndef _QP_H #define _QP_H /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -70,6 +70,26 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) } /* + * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK + * + * HFI1_S_AHG_VALID - ahg header valid on chip + * HFI1_S_AHG_CLEAR - have send engine clear ahg state + * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain + * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1 + */ +#define HFI1_S_AHG_VALID 0x80000000 +#define HFI1_S_AHG_CLEAR 0x40000000 +#define HFI1_S_WAIT_PIO_DRAIN 0x20000000 +#define HFI1_S_MIN_BIT_MASK 0x01000000 + +/* + * overload wait defines + */ + +#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN) +#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) + +/* * free_ahg - clear ahg from QP */ static inline void clear_ahg(struct rvt_qp *qp) @@ -77,7 +97,7 @@ static inline void clear_ahg(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; priv->s_ahg->ahgcount = 0; - qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR); + qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR); if (priv->s_sde && qp->s_ahgidx >= 0) sdma_ahg_free(priv->s_sde, qp->s_ahgidx); qp->s_ahgidx = -1; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1a1a47ac53c6..1d31bd2fa91f 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -241,7 +241,7 @@ bail: smp_wmb(); qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING - | RVT_S_AHG_VALID); + | HFI1_S_AHG_VALID); return 0; } @@ -1024,7 +1024,7 @@ done: if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) qp->s_flags |= RVT_S_WAIT_PSN; - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; } /* diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index ef4c566e206f..5f56f3c1b4c4 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -194,7 +194,7 @@ static void ruc_loopback(struct rvt_qp *sqp) spin_lock_irqsave(&sqp->s_lock, flags); /* Return if we are already busy processing a work request. */ - if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || + if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) || !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) goto unlock; @@ -533,9 +533,9 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) { struct hfi1_qp_priv *priv = qp->priv; - if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR)) + if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR)) clear_ahg(qp); - if (!(qp->s_flags & RVT_S_AHG_VALID)) { + if (!(qp->s_flags & HFI1_S_AHG_VALID)) { /* first middle that needs copy */ if (qp->s_ahgidx < 0) qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); @@ -544,7 +544,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; /* save to protect a change in another thread */ priv->s_ahg->ahgidx = qp->s_ahgidx; - qp->s_flags |= RVT_S_AHG_VALID; + qp->s_flags |= HFI1_S_AHG_VALID; } } else { /* subsequent middle after valid */ @@ -650,7 +650,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, if (middle) build_ahg(qp, bth2); else - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; bth0 |= pkey; bth0 |= extra_bytes << 20; @@ -727,7 +727,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, if (middle) build_ahg(qp, bth2); else - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; bth0 |= pkey; bth0 |= extra_bytes << 20; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 7fb350b87b49..88e326d6cc49 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1351,7 +1351,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) struct hfi1_pportdata *ppd = dd->pport + port; u32 per_sdma_credits; uint idle_cnt = sdma_idle_cnt; - size_t num_engines = dd->chip_sdma_engines; + size_t num_engines = chip_sdma_engines(dd); int ret = -ENOMEM; if (!HFI1_CAP_IS_KSET(SDMA)) { @@ -1360,18 +1360,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } if (mod_num_sdma && /* can't exceed chip support */ - mod_num_sdma <= dd->chip_sdma_engines && + mod_num_sdma <= chip_sdma_engines(dd) && /* count must be >= vls */ mod_num_sdma >= num_vls) num_engines = mod_num_sdma; dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma); - dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines); + dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd)); dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n", - dd->chip_sdma_mem_size); + chip_sdma_mem_size(dd)); per_sdma_credits = - dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE); + chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE); /* set up freeze waitqueue */ init_waitqueue_head(&dd->sdma_unfreeze_wq); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 08991874c0e2..13374c727b14 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1007,7 +1007,7 @@ static int pio_wait(struct rvt_qp *qp, int was_empty; dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); - dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); + dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN); qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); iowait_queue(ps->pkts_sent, &priv->s_iowait, @@ -1376,7 +1376,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) return pio_wait(qp, ps->s_txreq->psc, ps, - RVT_S_WAIT_PIO_DRAIN); + HFI1_S_WAIT_PIO_DRAIN); return sr(qp, ps, 0); } @@ -1410,7 +1410,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; rdi->dparms.props.max_qp = hfi1_max_qps; rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; - rdi->dparms.props.max_sge = hfi1_max_sges; + rdi->dparms.props.max_send_sge = hfi1_max_sges; + rdi->dparms.props.max_recv_sge = hfi1_max_sges; rdi->dparms.props.max_sge_rd = hfi1_max_sges; rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; @@ -1497,15 +1498,6 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); - /* - * sm_lid of 0xFFFF needs special handling so that it can - * be differentiated from a permissve LID of 0xFFFF. - * We set the grh_required flag here so the SA can program - * the DGID in the address handle appropriately - */ - if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)) - props->grh_required = true; - return 0; } @@ -1892,7 +1884,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->process_mad = hfi1_process_mad; ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; - strncpy(ibdev->node_desc, init_utsname()->nodename, + strlcpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); /* diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 5d65582fe4d9..ba160f99cf8e 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -120,8 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, uctxt->seq_cnt = 1; uctxt->is_vnic = true; - if (dd->num_msix_entries) - hfi1_set_vnic_msix_info(uctxt); + hfi1_set_vnic_msix_info(uctxt); hfi1_stats.sps_ctxts++; dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); @@ -136,8 +135,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); flush_wc(); - if (dd->num_msix_entries) - hfi1_reset_vnic_msix_info(uctxt); + hfi1_reset_vnic_msix_info(uctxt); /* * Disable receive context and interrupt available, reset all @@ -818,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - dd->chip_sdma_engines, dd->num_vnic_contexts); + chip_sdma_engines(dd), dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = dd->chip_sdma_engines; + vinfo->num_tx_q = chip_sdma_engines(dd); vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d74928621559..14efa3b9adb2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -44,13 +44,11 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); + const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; - struct ib_gid_attr gid_attr; struct hns_roce_ah *ah; u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - union ib_gid sgid; - int ret; ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) @@ -59,18 +57,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - /* Get source gid */ - ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &gid_attr); - if (ret) { - dev_err(dev, "get sgid failed! ret = %d\n", ret); - kfree(ah); - return ERR_PTR(ret); - } - - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - dev_put(gid_attr.ndev); + gid_attr = ah_attr->grh.sgid_attr; + if (is_vlan_dev(gid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 319cb74aebaf..93d4b4ec002d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -382,15 +382,6 @@ #define ROCEE_VF_EQ_DB_CFG0_REG 0x238 #define ROCEE_VF_EQ_DB_CFG1_REG 0x23C -#define ROCEE_VF_SMAC_CFG0_REG 0x12000 -#define ROCEE_VF_SMAC_CFG1_REG 0x12004 - -#define ROCEE_VF_SGID_CFG0_REG 0x10000 -#define ROCEE_VF_SGID_CFG1_REG 0x10004 -#define ROCEE_VF_SGID_CFG2_REG 0x10008 -#define ROCEE_VF_SGID_CFG3_REG 0x1000c -#define ROCEE_VF_SGID_CFG4_REG 0x10010 - #define ROCEE_VF_ABN_INT_CFG_REG 0x13000 #define ROCEE_VF_ABN_INT_ST_REG 0x13004 #define ROCEE_VF_ABN_INT_EN_REG 0x13008 diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 31221d506d9a..a595e72f243e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -579,22 +579,22 @@ struct hns_roce_ceqe { }; struct hns_roce_aeqe { - u32 asyn; + __le32 asyn; union { struct { - u32 qp; + __le32 qp; u32 rsv0; u32 rsv1; } qp_event; struct { - u32 cq; + __le32 cq; u32 rsv0; u32 rsv1; } cq_event; struct { - u32 ceqe; + __le32 ceqe; u32 rsv0; u32 rsv1; } ce_event; @@ -720,6 +720,9 @@ struct hns_roce_caps { u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; + u32 sl_num; + u32 tsq_buf_pg_sz; + u32 tpq_buf_pg_sz; u32 chunk_sz; /* chunk size in non multihop mode*/ u64 flags; }; @@ -736,7 +739,7 @@ struct hns_roce_hw { u16 token, int event); int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, - union ib_gid *gid, const struct ib_gid_attr *attr); + const union ib_gid *gid, const struct ib_gid_attr *attr); int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu); @@ -864,7 +867,7 @@ static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) return container_of(hr_qp, struct hns_roce_sqp, hr_qp); } -static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest) +static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { __raw_writeq(*(u64 *) val, dest); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 8013d69c5ac4..783d28dd3ca4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -175,10 +175,10 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, UD_SEND_WQE_U32_36_FLOW_LABEL_M, UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0); roce_set_field(ud_sq_wqe->u32_36, - UD_SEND_WQE_U32_36_PRIORITY_M, - UD_SEND_WQE_U32_36_PRIORITY_S, - ah->av.sl_tclass_flowlabel >> - HNS_ROCE_SL_SHIFT); + UD_SEND_WQE_U32_36_PRIORITY_M, + UD_SEND_WQE_U32_36_PRIORITY_S, + le32_to_cpu(ah->av.sl_tclass_flowlabel) >> + HNS_ROCE_SL_SHIFT); roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_SGID_INDEX_M, UD_SEND_WQE_U32_36_SGID_INDEX_S, @@ -333,7 +333,7 @@ out: doorbell[0] = le32_to_cpu(sq_db.u32_4); doorbell[1] = le32_to_cpu(sq_db.u32_8); - hns_roce_write64_k(doorbell, qp->sq.db_reg_l); + hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l); qp->sq_next_wqe = ind; } @@ -349,7 +349,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int nreq = 0; int ind = 0; int i = 0; - u32 reg_val = 0; + u32 reg_val; unsigned long flags = 0; struct hns_roce_rq_wqe_ctrl *ctrl = NULL; struct hns_roce_wqe_data_seg *scat = NULL; @@ -402,14 +402,18 @@ out: wmb(); if (ibqp->qp_type == IB_QPT_GSI) { + __le32 tmp; + /* SW update GSI rq header */ reg_val = roce_read(to_hr_dev(ibqp->device), ROCEE_QP1C_CFG3_0_REG + QP1C_CFGN_OFFSET * hr_qp->phy_port); - roce_set_field(reg_val, + tmp = cpu_to_le32(reg_val); + roce_set_field(tmp, ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M, ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S, hr_qp->rq.head); + reg_val = le32_to_cpu(tmp); roce_write(to_hr_dev(ibqp->device), ROCEE_QP1C_CFG3_0_REG + QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val); @@ -430,7 +434,8 @@ out: doorbell[0] = le32_to_cpu(rq_db.u32_4); doorbell[1] = le32_to_cpu(rq_db.u32_8); - hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); + hns_roce_write64_k((__le32 *)doorbell, + hr_qp->rq.db_reg_l); } } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -441,51 +446,63 @@ out: static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, int sdb_mode, int odb_mode) { + __le32 tmp; u32 val; val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); - roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); + roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, u32 odb_mode) { + __le32 tmp; u32 val; /* Configure SDB/ODB extend mode */ val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); - roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); + roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept, u32 sdb_alful) { + __le32 tmp; u32 val; /* Configure SDB */ val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG); - roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful); - roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, + roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val); } static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept, u32 odb_alful) { + __le32 tmp; u32 val; /* Configure ODB */ val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG); - roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful); - roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, + roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val); } @@ -496,6 +513,7 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, struct hns_roce_v1_priv *priv; struct hns_roce_db_table *db; dma_addr_t sdb_dma_addr; + __le32 tmp; u32 val; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -511,7 +529,8 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, /* Configure extend SDB depth */ val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG); - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S, db->ext_db->esdb_dep); /* @@ -519,8 +538,9 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val); dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep); @@ -535,6 +555,7 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, struct hns_roce_v1_priv *priv; struct hns_roce_db_table *db; dma_addr_t odb_dma_addr; + __le32 tmp; u32 val; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -550,12 +571,14 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, /* Configure extend ODB depth */ val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG); - roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S, db->ext_db->eodb_dep); - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S, db->ext_db->eodb_dep); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val); dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep); @@ -1161,9 +1184,10 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev) static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) { int ret; + u32 val; + __le32 tmp; int raq_shift = 0; dma_addr_t addr; - u32 val; struct hns_roce_v1_priv *priv; struct hns_roce_raq_table *raq; struct device *dev = &hr_dev->pdev->dev; @@ -1189,46 +1213,54 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) /* Configure raq_shift */ raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY); val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG); - roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift); /* * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S, raq->e_raq_buf->map >> 44); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val); dev_dbg(dev, "Configure raq_shift 0x%x.\n", val); /* Configure raq threshold */ val = roce_read(hr_dev, ROCEE_RAQ_WL_REG); - roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, ROCEE_RAQ_WL_ROCEE_RAQ_WL_S, HNS_ROCE_V1_EXT_RAQ_WF); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_RAQ_WL_REG, val); dev_dbg(dev, "Configure raq_wl 0x%x.\n", val); /* Enable extend raq */ val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG); - roce_set_field(val, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S, POL_TIME_INTERVAL_VAL); - roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); - roce_set_field(val, + roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); + roce_set_field(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S, 2); - roce_set_bit(val, + roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val); dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val); /* Enable raq drop */ val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val); @@ -1255,20 +1287,25 @@ static void hns_roce_raq_free(struct hns_roce_dev *hr_dev) static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) { + __le32 tmp; u32 val; if (enable_flag) { val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); /* Open all ports */ - roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, ROCEE_GLB_CFG_ROCEE_PORT_ST_S, ALL_PORT_VAL_OPEN); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } else { val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); /* Close all ports */ - roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } } @@ -1498,13 +1535,11 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) int i = 0; struct hns_roce_caps *caps = &hr_dev->caps; - hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG)); - hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev, - ROCEE_VENDOR_PART_ID_REG)); - hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev, - ROCEE_SYS_IMAGE_GUID_L_REG)) | - ((u64)le32_to_cpu(roce_read(hr_dev, - ROCEE_SYS_IMAGE_GUID_H_REG)) << 32); + hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG); + hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG); + hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) | + ((u64)roce_read(hr_dev, + ROCEE_SYS_IMAGE_GUID_H_REG) << 32); hr_dev->hw_rev = HNS_ROCE_HW_VER1; caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; @@ -1557,8 +1592,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM; caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM; - caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev, - ROCEE_ACK_DELAY_REG)); + caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG); caps->max_mtu = IB_MTU_2048; return 0; @@ -1568,21 +1602,25 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) { int ret; u32 val; + __le32 tmp; struct device *dev = &hr_dev->pdev->dev; /* DMAE user config */ val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG); - roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf); - roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, + roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S, 1 << PAGES_SHIFT_16); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val); val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG); - roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf); - roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, + roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S, 1 << PAGES_SHIFT_16); @@ -1668,6 +1706,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG); unsigned long end; u32 val = 0; + __le32 tmp; end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; while (hns_roce_v1_cmd_pending(hr_dev)) { @@ -1679,15 +1718,17 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, cond_resched(); } - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, op); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, + roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event); + roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M, ROCEE_MB6_ROCEE_MB_TOKEN_S, token); + val = le32_to_cpu(tmp); writeq(in_param, hcr + 0); writeq(out_param, hcr + 2); writel(in_modifier, hcr + 4); @@ -1717,7 +1758,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, return -ETIMEDOUT; } - status = le32_to_cpu((__force __be32) + status = le32_to_cpu((__force __le32) __raw_readl(hcr + HCR_STATUS_OFFSET)); if ((status & STATUS_MASK) != 0x1) { dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status); @@ -1728,7 +1769,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, } static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid, + int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { u32 *p = NULL; @@ -1760,6 +1801,7 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, { u32 reg_smac_l; u16 reg_smac_h; + __le32 tmp; u16 *p_h; u32 *p; u32 val; @@ -1784,10 +1826,12 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, val = roce_read(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); + tmp = cpu_to_le32(val); p_h = (u16 *)(&addr[4]); reg_smac_h = *p_h; - roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M, + roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M, ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, val); @@ -1797,12 +1841,15 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu) { + __le32 tmp; u32 val; val = roce_read(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); - roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, val); } @@ -1848,9 +1895,9 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M, MPT_BYTE_12_MW_BIND_COUNTER_S, 0); - mpt_entry->virt_addr_l = (u32)mr->iova; - mpt_entry->virt_addr_h = (u32)(mr->iova >> 32); - mpt_entry->length = (u32)mr->size; + mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova); + mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32)); + mpt_entry->length = cpu_to_le32((u32)mr->size); roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M, MPT_BYTE_28_PD_S, mr->pd); @@ -1885,64 +1932,59 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->mpt_byte_36, MPT_BYTE_36_PA0_H_M, MPT_BYTE_36_PA0_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + (u32)(pages[i] >> PAGES_SHIFT_32)); break; case 1: roce_set_field(mpt_entry->mpt_byte_36, MPT_BYTE_36_PA1_L_M, - MPT_BYTE_36_PA1_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_36_PA1_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_40, MPT_BYTE_40_PA1_H_M, MPT_BYTE_40_PA1_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + (u32)(pages[i] >> PAGES_SHIFT_24)); break; case 2: roce_set_field(mpt_entry->mpt_byte_40, MPT_BYTE_40_PA2_L_M, - MPT_BYTE_40_PA2_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_40_PA2_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_44, MPT_BYTE_44_PA2_H_M, MPT_BYTE_44_PA2_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + (u32)(pages[i] >> PAGES_SHIFT_16)); break; case 3: roce_set_field(mpt_entry->mpt_byte_44, MPT_BYTE_44_PA3_L_M, - MPT_BYTE_44_PA3_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_44_PA3_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_48, MPT_BYTE_48_PA3_H_M, MPT_BYTE_48_PA3_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8))); + (u32)(pages[i] >> PAGES_SHIFT_8)); break; case 4: mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_56, MPT_BYTE_56_PA4_H_M, MPT_BYTE_56_PA4_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + (u32)(pages[i] >> PAGES_SHIFT_32)); break; case 5: roce_set_field(mpt_entry->mpt_byte_56, MPT_BYTE_56_PA5_L_M, - MPT_BYTE_56_PA5_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_56_PA5_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_60, MPT_BYTE_60_PA5_H_M, MPT_BYTE_60_PA5_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + (u32)(pages[i] >> PAGES_SHIFT_24)); break; case 6: roce_set_field(mpt_entry->mpt_byte_60, MPT_BYTE_60_PA6_L_M, - MPT_BYTE_60_PA6_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_60_PA6_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_PA6_H_M, MPT_BYTE_64_PA6_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + (u32)(pages[i] >> PAGES_SHIFT_16)); break; default: break; @@ -1951,7 +1993,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, free_page((unsigned long) pages); - mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr); + mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr)); roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, MPT_BYTE_12_PBL_ADDR_H_S, @@ -1982,9 +2024,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { - u32 doorbell[2]; + __le32 doorbell[2]; - doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1); + doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1)); doorbell[1] = 0; roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, @@ -2081,10 +2123,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID); roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M, CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn); - cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4); - cq_context->cq_bt_l = (u32)dma_handle; - cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l); + cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M, @@ -2096,15 +2136,12 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, ilog2((unsigned int)nent)); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); - cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12); - cq_context->cur_cqe_ba0_l = (u32)(mtts[0]); - cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l); + cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M, - CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, - cpu_to_le32((mtts[0]) >> 32)); + CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32); /* Dedicated hardware, directly set 0 */ roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M, @@ -2118,9 +2155,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, tptr_dma_addr >> 44); - cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20); - cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12); + cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12)); roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, @@ -2138,7 +2174,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); - cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32); } static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) @@ -2151,7 +2186,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, { struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; - u32 doorbell[2]; + __le32 doorbell[2]; notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; @@ -2159,7 +2194,8 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, * flags = 0; Notification Flag = 1, next * flags = 1; Notification Flag = 0, solocited */ - doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); + doorbell[0] = + cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1)); roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); @@ -2416,7 +2452,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; struct hns_roce_v1_priv *priv; unsigned long end = 0, flags = 0; - uint32_t bt_cmd_val[2] = {0}; + __le32 bt_cmd_val[2] = {0}; void __iomem *bt_cmd; u64 bt_ba = 0; @@ -2468,7 +2504,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, msleep(HW_SYNC_SLEEP_TIME_INTERVAL); } - bt_cmd_val[0] = (uint32_t)bt_ba; + bt_cmd_val[0] = (__le32)bt_ba; roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32); hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); @@ -2569,10 +2605,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_sqp_context *context; struct device *dev = &hr_dev->pdev->dev; dma_addr_t dma_handle = 0; + u32 __iomem *addr; int rq_pa_start; + __le32 tmp; u32 reg_val; u64 *mtts; - u32 __iomem *addr; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -2598,7 +2635,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); - context->sq_rq_bt_l = (u32)(dma_handle); + context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); roce_set_field(context->qp1c_bytes_12, QP1C_BYTES_12_SQ_RQ_BT_H_M, QP1C_BYTES_12_SQ_RQ_BT_H_S, @@ -2610,7 +2647,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); + le32_to_cpu(hr_qp->sq_signal_bits)); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, 1); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, @@ -2624,7 +2661,8 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + context->cur_rq_wqe_ba_l = + cpu_to_le32((u32)(mtts[rq_pa_start])); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, @@ -2643,7 +2681,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_32_TX_CQ_NUM_S, to_hr_cq(ibqp->send_cq)->cqn); - context->cur_sq_wqe_ba_l = (u32)mtts[0]; + context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, @@ -2658,23 +2696,25 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context)); - writel(context->qp1c_bytes_4, addr); - writel(context->sq_rq_bt_l, addr + 1); - writel(context->qp1c_bytes_12, addr + 2); - writel(context->qp1c_bytes_16, addr + 3); - writel(context->qp1c_bytes_20, addr + 4); - writel(context->cur_rq_wqe_ba_l, addr + 5); - writel(context->qp1c_bytes_28, addr + 6); - writel(context->qp1c_bytes_32, addr + 7); - writel(context->cur_sq_wqe_ba_l, addr + 8); - writel(context->qp1c_bytes_40, addr + 9); + writel(le32_to_cpu(context->qp1c_bytes_4), addr); + writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1); + writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2); + writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3); + writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4); + writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5); + writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6); + writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7); + writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8); + writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9); } /* Modify QP1C status */ reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context)); - roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, + tmp = cpu_to_le32(reg_val); + roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state); + reg_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context), reg_val); @@ -2712,7 +2752,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); dma_addr_t dma_handle_2 = 0; dma_addr_t dma_handle = 0; - uint32_t doorbell[2] = {0}; + __le32 doorbell[2] = {0}; int rq_pa_start = 0; u64 *mtts_2 = NULL; int ret = -EINVAL; @@ -2887,7 +2927,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dmac = (u8 *)attr->ah_attr.roce.dmac; - context->sq_rq_bt_l = (u32)(dma_handle); + context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); roce_set_field(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, @@ -2899,7 +2939,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S, attr->min_rnr_timer); - context->irrl_ba_l = (u32)(dma_handle_2); + context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2)); roce_set_field(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M, QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S, @@ -2913,7 +2953,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, 1); roce_set_bit(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); + le32_to_cpu(hr_qp->sq_signal_bits)); port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; @@ -2991,7 +3031,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + context->cur_rq_wqe_ba_l = + cpu_to_le32((u32)(mtts[rq_pa_start])); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, @@ -3071,7 +3112,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; } - context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(context->qpc_bytes_120, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, @@ -3219,7 +3260,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); - context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, @@ -3386,16 +3427,16 @@ static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, addr = ROCEE_QP1C_CFG0_0_REG + hr_qp->port * sizeof(struct hns_roce_sqp_context); - context.qp1c_bytes_4 = roce_read(hr_dev, addr); - context.sq_rq_bt_l = roce_read(hr_dev, addr + 1); - context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2); - context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3); - context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4); - context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5); - context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6); - context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7); - context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8); - context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9); + context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr)); + context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1)); + context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2)); + context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3)); + context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4)); + context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5)); + context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6)); + context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7)); + context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8)); + context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9)); hr_qp->state = roce_get_field(context.qp1c_bytes_4, QP1C_BYTES_4_QP_STATE_M, @@ -3557,7 +3598,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S); - qp_attr->rnr_retry = context->rnr_retry; + qp_attr->rnr_retry = (u8)context->rnr_retry; done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -3595,42 +3636,47 @@ static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev, u32 *old_send, u32 *old_retry, u32 *tsp_st, u32 *success_flags) { + __le32 *old_send_tmp, *old_retry_tmp; u32 sdb_retry_cnt; u32 sdb_send_ptr; u32 cur_cnt, old_cnt; + __le32 tmp, tmp1; u32 send_ptr; sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - cur_cnt = roce_get_field(sdb_send_ptr, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + tmp = cpu_to_le32(sdb_send_ptr); + tmp1 = cpu_to_le32(sdb_retry_cnt); + cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + + old_send_tmp = (__le32 *)old_send; + old_retry_tmp = (__le32 *)old_retry; if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { - old_cnt = roce_get_field(*old_send, + old_cnt = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(*old_retry, + roce_get_field(*old_retry_tmp, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) *success_flags = 1; } else { - old_cnt = roce_get_field(*old_send, + old_cnt = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) { *success_flags = 1; } else { - send_ptr = roce_get_field(*old_send, + send_ptr = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, + roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - roce_set_field(*old_send, + roce_set_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, send_ptr); @@ -3646,11 +3692,14 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, { struct device *dev = &hr_dev->pdev->dev; u32 sdb_send_ptr, old_send; + __le32 sdb_issue_ptr_tmp; + __le32 sdb_send_ptr_tmp; u32 success_flags = 0; unsigned long end; u32 old_retry; u32 inv_cnt; u32 tsp_st; + __le32 tmp; if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || *wait_stage < HNS_ROCE_V1_DB_STAGE1) { @@ -3679,10 +3728,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, ROCEE_SDB_SEND_PTR_REG); } - if (roce_get_field(sdb_issue_ptr, + sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr); + sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr); + if (roce_get_field(sdb_issue_ptr_tmp, ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == - roce_get_field(sdb_send_ptr, + roce_get_field(sdb_send_ptr_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); @@ -3690,7 +3741,8 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, do { tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); - if (roce_get_bit(tsp_st, + tmp = cpu_to_le32(tsp_st); + if (roce_get_bit(tmp, ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; return 0; @@ -3699,8 +3751,9 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, if (!time_before(jiffies, end)) { dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" "issue 0x%x send 0x%x.\n", - hr_qp->qpn, sdb_issue_ptr, - sdb_send_ptr); + hr_qp->qpn, + le32_to_cpu(sdb_issue_ptr_tmp), + le32_to_cpu(sdb_send_ptr_tmp)); return 0; } @@ -4102,9 +4155,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; u32 cqn; - cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq, + cqn = roce_get_field(aeqe->event.cq_event.cq, HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)); + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -4340,6 +4393,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) u32 aeshift_val; u32 ceshift_val; u32 cemask_val; + __le32 tmp; int i; /* @@ -4348,30 +4402,34 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) * interrupt, mask irq, clear irq, cancel mask operation */ aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); + tmp = cpu_to_le32(aeshift_val); /* AEQE overflow */ - if (roce_get_bit(aeshift_val, + if (roce_get_bit(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) { dev_warn(dev, "AEQ overflow!\n"); /* Set mask */ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(caepaemask_val, - ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(caepaemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_ENABLE); + caepaemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); /* Clear int state(INT_WC : write 1 clear) */ caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG); - roce_set_bit(caepaest_val, - ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + tmp = cpu_to_le32(caepaest_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + caepaest_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val); /* Clear mask */ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(caepaemask_val, - ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(caepaemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_DISABLE); + caepaemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); } @@ -4379,8 +4437,9 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG + i * CEQ_REG_OFFSET); + tmp = cpu_to_le32(ceshift_val); - if (roce_get_bit(ceshift_val, + if (roce_get_bit(tmp, ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) { dev_warn(dev, "CEQ[%d] almost overflow!\n", i); int_work++; @@ -4389,9 +4448,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cemask_val = roce_read(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cemask_val, + tmp = cpu_to_le32(cemask_val); + roce_set_bit(tmp, ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_ENABLE); + cemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET, cemask_val); @@ -4399,9 +4460,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cealmovf_val = roce_read(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cealmovf_val, + tmp = cpu_to_le32(cealmovf_val); + roce_set_bit(tmp, ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S, 1); + cealmovf_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + i * CEQ_REG_OFFSET, cealmovf_val); @@ -4409,9 +4472,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cemask_val = roce_read(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cemask_val, + tmp = cpu_to_le32(cemask_val); + roce_set_bit(tmp, ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_DISABLE); + cemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET, cemask_val); } @@ -4435,13 +4500,16 @@ static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev) { u32 aemask_val; int masken = 0; + __le32 tmp; int i; /* AEQ INT */ aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(aemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, masken); - roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + aemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val); /* CEQ INT */ @@ -4473,20 +4541,24 @@ static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num, int enable_flag) { void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num]; + __le32 tmp; u32 val; val = readl(eqc); + tmp = cpu_to_le32(val); if (enable_flag) - roce_set_field(val, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_VALID); else - roce_set_field(val, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_INVALID); + + val = le32_to_cpu(tmp); writel(val, eqc); } @@ -4499,6 +4571,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, u32 eqconsindx_val = 0; u32 eqcuridx_val = 0; u32 eqshift_val = 0; + __le32 tmp2 = 0; + __le32 tmp1 = 0; + __le32 tmp = 0; int num_bas; int ret; int i; @@ -4530,14 +4605,13 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE); } eq->cons_index = 0; - roce_set_field(eqshift_val, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_INVALID); - roce_set_field(eqshift_val, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S, eq->log_entries); + eqshift_val = le32_to_cpu(tmp); writel(eqshift_val, eqc); /* Configure eq extended address 12~44bit */ @@ -4549,18 +4623,18 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, + roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S, eq->buf_list[0].map >> 44); - roce_set_field(eqcuridx_val, - ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, + roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0); + eqcuridx_val = le32_to_cpu(tmp1); writel(eqcuridx_val, eqc + 8); /* Configure eq consumer index */ - roce_set_field(eqconsindx_val, - ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, + roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0); + eqconsindx_val = le32_to_cpu(tmp2); writel(eqconsindx_val, eqc + 0xc); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index e9a2717ea7cd..66440147d9eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -260,7 +260,7 @@ struct hns_roce_cqe { __le32 cqe_byte_4; union { __le32 r_key; - __be32 immediate_data; + __le32 immediate_data; }; __le32 byte_cnt; __le32 cqe_byte_16; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a6e11be0ea0f..951d839f1392 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -172,7 +172,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; struct hns_roce_qp *qp = to_hr_qp(ibqp); - struct hns_roce_v2_wqe_data_seg *dseg; struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; unsigned int sge_ind = 0; @@ -485,7 +484,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } wqe += sizeof(struct hns_roce_v2_rc_send_wqe); - dseg = wqe; ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe, &sge_ind, bad_wr); @@ -925,7 +923,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc[2]; - struct hns_roce_pf_res *res; + struct hns_roce_pf_res_a *req_a; + struct hns_roce_pf_res_b *req_b; int ret; int i; @@ -943,21 +942,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) if (ret) return ret; - res = (struct hns_roce_pf_res *)desc[0].data; + req_a = (struct hns_roce_pf_res_a *)desc[0].data; + req_b = (struct hns_roce_pf_res_b *)desc[1].data; - hr_dev->caps.qpc_bt_num = roce_get_field(res->qpc_bt_idx_num, + hr_dev->caps.qpc_bt_num = roce_get_field(req_a->qpc_bt_idx_num, PF_RES_DATA_1_PF_QPC_BT_NUM_M, PF_RES_DATA_1_PF_QPC_BT_NUM_S); - hr_dev->caps.srqc_bt_num = roce_get_field(res->srqc_bt_idx_num, + hr_dev->caps.srqc_bt_num = roce_get_field(req_a->srqc_bt_idx_num, PF_RES_DATA_2_PF_SRQC_BT_NUM_M, PF_RES_DATA_2_PF_SRQC_BT_NUM_S); - hr_dev->caps.cqc_bt_num = roce_get_field(res->cqc_bt_idx_num, + hr_dev->caps.cqc_bt_num = roce_get_field(req_a->cqc_bt_idx_num, PF_RES_DATA_3_PF_CQC_BT_NUM_M, PF_RES_DATA_3_PF_CQC_BT_NUM_S); - hr_dev->caps.mpt_bt_num = roce_get_field(res->mpt_bt_idx_num, + hr_dev->caps.mpt_bt_num = roce_get_field(req_a->mpt_bt_idx_num, PF_RES_DATA_4_PF_MPT_BT_NUM_M, PF_RES_DATA_4_PF_MPT_BT_NUM_S); + hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num, + PF_RES_DATA_3_PF_SL_NUM_M, + PF_RES_DATA_3_PF_SL_NUM_S); + return 0; } @@ -1203,6 +1207,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->eqe_ba_pg_sz = 0; caps->eqe_buf_pg_sz = 0; caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; + caps->tsq_buf_pg_sz = 0; caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | @@ -1224,6 +1229,228 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } +static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, + enum hns_roce_link_table_type type) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cfg_llm_a *req_a = + (struct hns_roce_cfg_llm_a *)desc[0].data; + struct hns_roce_cfg_llm_b *req_b = + (struct hns_roce_cfg_llm_b *)desc[1].data; + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_link_table *link_tbl; + struct hns_roce_link_table_entry *entry; + enum hns_roce_opcode_type opcode; + u32 page_num; + int i; + + switch (type) { + case TSQ_LINK_TABLE: + link_tbl = &priv->tsq; + opcode = HNS_ROCE_OPC_CFG_EXT_LLM; + break; + case TPQ_LINK_TABLE: + link_tbl = &priv->tpq; + opcode = HNS_ROCE_OPC_CFG_TMOUT_LLM; + break; + default: + return -EINVAL; + } + + page_num = link_tbl->npages; + entry = link_tbl->table.buf; + memset(req_a, 0, sizeof(*req_a)); + memset(req_b, 0, sizeof(*req_b)); + + for (i = 0; i < 2; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); + + if (i == 0) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + else + desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + if (i == 0) { + req_a->base_addr_l = link_tbl->table.map & 0xffffffff; + req_a->base_addr_h = (link_tbl->table.map >> 32) & + 0xffffffff; + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_QUE_DEPTH_M, + CFG_LLM_QUE_DEPTH_S, + link_tbl->npages); + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_QUE_PGSZ_M, + CFG_LLM_QUE_PGSZ_S, + link_tbl->pg_sz); + req_a->head_ba_l = entry[0].blk_ba0; + req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr; + roce_set_field(req_a->head_ptr, + CFG_LLM_HEAD_PTR_M, + CFG_LLM_HEAD_PTR_S, 0); + } else { + req_b->tail_ba_l = entry[page_num - 1].blk_ba0; + roce_set_field(req_b->tail_ba_h, + CFG_LLM_TAIL_BA_H_M, + CFG_LLM_TAIL_BA_H_S, + entry[page_num - 1].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_BA1_M); + roce_set_field(req_b->tail_ptr, + CFG_LLM_TAIL_PTR_M, + CFG_LLM_TAIL_PTR_S, + (entry[page_num - 2].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> + HNS_ROCE_LINK_TABLE_NXT_PTR_S); + } + } + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1); + + return hns_roce_cmq_send(hr_dev, desc, 2); +} + +static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, + enum hns_roce_link_table_type type) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_link_table *link_tbl; + struct hns_roce_link_table_entry *entry; + struct device *dev = hr_dev->dev; + u32 buf_chk_sz; + dma_addr_t t; + int func_num = 1; + int pg_num_a; + int pg_num_b; + int pg_num; + int size; + int i; + + switch (type) { + case TSQ_LINK_TABLE: + link_tbl = &priv->tsq; + buf_chk_sz = 1 << (hr_dev->caps.tsq_buf_pg_sz + PAGE_SHIFT); + pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz; + pg_num_b = hr_dev->caps.sl_num * 4 + 2; + break; + case TPQ_LINK_TABLE: + link_tbl = &priv->tpq; + buf_chk_sz = 1 << (hr_dev->caps.tpq_buf_pg_sz + PAGE_SHIFT); + pg_num_a = hr_dev->caps.num_cqs * 4 / buf_chk_sz; + pg_num_b = 2 * 4 * func_num + 2; + break; + default: + return -EINVAL; + } + + pg_num = max(pg_num_a, pg_num_b); + size = pg_num * sizeof(struct hns_roce_link_table_entry); + + link_tbl->table.buf = dma_alloc_coherent(dev, size, + &link_tbl->table.map, + GFP_KERNEL); + if (!link_tbl->table.buf) + goto out; + + link_tbl->pg_list = kcalloc(pg_num, sizeof(*link_tbl->pg_list), + GFP_KERNEL); + if (!link_tbl->pg_list) + goto err_kcalloc_failed; + + entry = link_tbl->table.buf; + for (i = 0; i < pg_num; ++i) { + link_tbl->pg_list[i].buf = dma_alloc_coherent(dev, buf_chk_sz, + &t, GFP_KERNEL); + if (!link_tbl->pg_list[i].buf) + goto err_alloc_buf_failed; + + link_tbl->pg_list[i].map = t; + memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz); + + entry[i].blk_ba0 = (t >> 12) & 0xffffffff; + roce_set_field(entry[i].blk_ba1_nxt_ptr, + HNS_ROCE_LINK_TABLE_BA1_M, + HNS_ROCE_LINK_TABLE_BA1_S, + t >> 44); + + if (i < (pg_num - 1)) + roce_set_field(entry[i].blk_ba1_nxt_ptr, + HNS_ROCE_LINK_TABLE_NXT_PTR_M, + HNS_ROCE_LINK_TABLE_NXT_PTR_S, + i + 1); + } + link_tbl->npages = pg_num; + link_tbl->pg_sz = buf_chk_sz; + + return hns_roce_config_link_table(hr_dev, type); + +err_alloc_buf_failed: + for (i -= 1; i >= 0; i--) + dma_free_coherent(dev, buf_chk_sz, + link_tbl->pg_list[i].buf, + link_tbl->pg_list[i].map); + kfree(link_tbl->pg_list); + +err_kcalloc_failed: + dma_free_coherent(dev, size, link_tbl->table.buf, + link_tbl->table.map); + +out: + return -ENOMEM; +} + +static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, + struct hns_roce_link_table *link_tbl) +{ + struct device *dev = hr_dev->dev; + int size; + int i; + + size = link_tbl->npages * sizeof(struct hns_roce_link_table_entry); + + for (i = 0; i < link_tbl->npages; ++i) + if (link_tbl->pg_list[i].buf) + dma_free_coherent(dev, link_tbl->pg_sz, + link_tbl->pg_list[i].buf, + link_tbl->pg_list[i].map); + kfree(link_tbl->pg_list); + + dma_free_coherent(dev, size, link_tbl->table.buf, + link_tbl->table.map); +} + +static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + int ret; + + /* TSQ includes SQ doorbell and ack doorbell */ + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); + goto err_tpq_init_failed; + } + + return 0; + +err_tpq_init_failed: + hns_roce_free_link_table(hr_dev, &priv->tsq); + + return ret; +} + +static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + + hns_roce_free_link_table(hr_dev, &priv->tpq); + hns_roce_free_link_table(hr_dev, &priv->tsq); +} + static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) { u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); @@ -1307,13 +1534,45 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } +static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_sgid_tb *sgid_tb = + (struct hns_roce_cfg_sgid_tb *)desc.data; + u32 *p; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); + + roce_set_field(sgid_tb->table_idx_rsv, + CFG_SGID_TB_TABLE_IDX_M, + CFG_SGID_TB_TABLE_IDX_S, gid_index); + roce_set_field(sgid_tb->vf_sgid_type_rsv, + CFG_SGID_TB_VF_SGID_TYPE_M, + CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); + + p = (u32 *)&gid->raw[0]; + sgid_tb->vf_sgid_l = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[4]; + sgid_tb->vf_sgid_ml = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[8]; + sgid_tb->vf_sgid_mh = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[0xc]; + sgid_tb->vf_sgid_h = cpu_to_le32(*p); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid, + int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; - u32 *p; - u32 val; + int ret; if (!gid || !attr) return -EINVAL; @@ -1328,49 +1587,37 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; } - p = (u32 *)&gid->raw[0]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG0_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[4]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG1_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[8]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG2_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[0xc]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG3_REG + - 0x20 * gid_index); - - val = roce_read(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index); - roce_set_field(val, ROCEE_VF_SGID_CFG4_SGID_TYPE_M, - ROCEE_VF_SGID_CFG4_SGID_TYPE_S, sgid_type); - - roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val); + ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (ret) + dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret); - return 0; + return ret; } static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) { + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_smac_tb *smac_tb = + (struct hns_roce_cfg_smac_tb *)desc.data; u16 reg_smac_h; u32 reg_smac_l; - u32 val; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SMAC_TB, false); reg_smac_l = *(u32 *)(&addr[0]); - roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_VF_SMAC_CFG0_REG + - 0x08 * phy_port); - val = roce_read(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port); + reg_smac_h = *(u16 *)(&addr[4]); - reg_smac_h = *(u16 *)(&addr[4]); - roce_set_field(val, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M, - ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S, reg_smac_h); - roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val); + memset(smac_tb, 0, sizeof(*smac_tb)); + roce_set_field(smac_tb->tb_idx_rsv, + CFG_SMAC_TB_IDX_M, + CFG_SMAC_TB_IDX_S, phy_port); + roce_set_field(smac_tb->vf_smac_h_rsv, + CFG_SMAC_TB_VF_SMAC_H_M, + CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); + smac_tb->vf_smac_l = reg_smac_l; - return 0; + return hns_roce_cmq_send(hr_dev, &desc, 1); } static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, @@ -4052,15 +4299,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev, u32 bt_chk_sz; u32 mhop_num; int eqe_alloc; - int ba_num; int i = 0; int j = 0; mhop_num = hr_dev->caps.eqe_hop_num; buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); - ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) / - buf_chk_sz; /* hop_num = 0 */ if (mhop_num == HNS_ROCE_HOP_NUM_0) { @@ -4725,6 +4969,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, .hw_profile = hns_roce_v2_profile, + .hw_init = hns_roce_v2_init, + .hw_exit = hns_roce_v2_exit, .post_mbox = hns_roce_v2_post_mbox, .chk_mbox = hns_roce_v2_chk_mbox, .set_gid = hns_roce_v2_set_gid, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index d47675f365c7..df95b3515c94 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -203,6 +203,10 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, + HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, + HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, + HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, + HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, }; @@ -1061,6 +1065,40 @@ struct hns_roce_query_version { __le32 rsv[5]; }; +struct hns_roce_cfg_llm_a { + __le32 base_addr_l; + __le32 base_addr_h; + __le32 depth_pgsz_init_en; + __le32 head_ba_l; + __le32 head_ba_h_nxtptr; + __le32 head_ptr; +}; + +#define CFG_LLM_QUE_DEPTH_S 0 +#define CFG_LLM_QUE_DEPTH_M GENMASK(12, 0) + +#define CFG_LLM_QUE_PGSZ_S 16 +#define CFG_LLM_QUE_PGSZ_M GENMASK(19, 16) + +#define CFG_LLM_INIT_EN_S 20 +#define CFG_LLM_INIT_EN_M GENMASK(20, 20) + +#define CFG_LLM_HEAD_PTR_S 0 +#define CFG_LLM_HEAD_PTR_M GENMASK(11, 0) + +struct hns_roce_cfg_llm_b { + __le32 tail_ba_l; + __le32 tail_ba_h; + __le32 tail_ptr; + __le32 rsv[3]; +}; + +#define CFG_LLM_TAIL_BA_H_S 0 +#define CFG_LLM_TAIL_BA_H_M GENMASK(19, 0) + +#define CFG_LLM_TAIL_PTR_S 0 +#define CFG_LLM_TAIL_PTR_M GENMASK(11, 0) + struct hns_roce_cfg_global_param { __le32 time_cfg_udp_port; __le32 rsv[5]; @@ -1072,7 +1110,7 @@ struct hns_roce_cfg_global_param { #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16 #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16) -struct hns_roce_pf_res { +struct hns_roce_pf_res_a { __le32 rsv; __le32 qpc_bt_idx_num; __le32 srqc_bt_idx_num; @@ -1111,6 +1149,32 @@ struct hns_roce_pf_res { #define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16 #define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16) +struct hns_roce_pf_res_b { + __le32 rsv0; + __le32 smac_idx_num; + __le32 sgid_idx_num; + __le32 qid_idx_sl_num; + __le32 rsv[2]; +}; + +#define PF_RES_DATA_1_PF_SMAC_IDX_S 0 +#define PF_RES_DATA_1_PF_SMAC_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_1_PF_SMAC_NUM_S 8 +#define PF_RES_DATA_1_PF_SMAC_NUM_M GENMASK(16, 8) + +#define PF_RES_DATA_2_PF_SGID_IDX_S 0 +#define PF_RES_DATA_2_PF_SGID_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_2_PF_SGID_NUM_S 8 +#define PF_RES_DATA_2_PF_SGID_NUM_M GENMASK(16, 8) + +#define PF_RES_DATA_3_PF_QID_IDX_S 0 +#define PF_RES_DATA_3_PF_QID_IDX_M GENMASK(9, 0) + +#define PF_RES_DATA_3_PF_SL_NUM_S 16 +#define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16) + struct hns_roce_vf_res_a { __le32 vf_id; __le32 vf_qpc_bt_idx_num; @@ -1179,13 +1243,6 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) -/* Reg field definition */ -#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S 0 -#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M GENMASK(15, 0) - -#define ROCEE_VF_SGID_CFG4_SGID_TYPE_S 0 -#define ROCEE_VF_SGID_CFG4_SGID_TYPE_M GENMASK(1, 0) - struct hns_roce_cfg_bt_attr { __le32 vf_qpc_cfg; __le32 vf_srqc_cfg; @@ -1230,6 +1287,32 @@ struct hns_roce_cfg_bt_attr { #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8 #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8) +struct hns_roce_cfg_sgid_tb { + __le32 table_idx_rsv; + __le32 vf_sgid_l; + __le32 vf_sgid_ml; + __le32 vf_sgid_mh; + __le32 vf_sgid_h; + __le32 vf_sgid_type_rsv; +}; +#define CFG_SGID_TB_TABLE_IDX_S 0 +#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) + +#define CFG_SGID_TB_VF_SGID_TYPE_S 0 +#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0) + +struct hns_roce_cfg_smac_tb { + __le32 tb_idx_rsv; + __le32 vf_smac_l; + __le32 vf_smac_h_rsv; + __le32 rsv[3]; +}; +#define CFG_SMAC_TB_IDX_S 0 +#define CFG_SMAC_TB_IDX_M GENMASK(7, 0) + +#define CFG_SMAC_TB_VF_SMAC_H_S 0 +#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) + struct hns_roce_cmq_desc { __le16 opcode; __le16 flag; @@ -1276,8 +1359,32 @@ struct hns_roce_v2_cmq { u16 last_status; }; +enum hns_roce_link_table_type { + TSQ_LINK_TABLE, + TPQ_LINK_TABLE, +}; + +struct hns_roce_link_table { + struct hns_roce_buf_list table; + struct hns_roce_buf_list *pg_list; + u32 npages; + u32 pg_sz; +}; + +struct hns_roce_link_table_entry { + u32 blk_ba0; + u32 blk_ba1_nxt_ptr; +}; +#define HNS_ROCE_LINK_TABLE_BA1_S 0 +#define HNS_ROCE_LINK_TABLE_BA1_M GENMASK(19, 0) + +#define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20 +#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20) + struct hns_roce_v2_priv { struct hns_roce_v2_cmq cmq; + struct hns_roce_link_table tsq; + struct hns_roce_link_table tpq; }; struct hns_roce_eq_context { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 21b901cfa2d6..850032de8676 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -74,8 +74,7 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) return hr_dev->hw->set_mac(hr_dev, phy_port, addr); } -static int hns_roce_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) +static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); u8 port = attr->port_num - 1; @@ -87,8 +86,7 @@ static int hns_roce_add_gid(const union ib_gid *gid, spin_lock_irqsave(&hr_dev->iboe.lock, flags); - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, - (union ib_gid *)gid, attr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr); spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); @@ -208,7 +206,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_qp_wr = hr_dev->caps.max_wqes; props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_RC_RNR_NAK_GEN; - props->max_sge = max(hr_dev->caps.max_sq_sg, hr_dev->caps.max_rq_sg); + props->max_send_sge = hr_dev->caps.max_sq_sg; + props->max_recv_sge = hr_dev->caps.max_rq_sg; props->max_sge_rd = 1; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig index 2962979c06e9..d867ef1ac72a 100644 --- a/drivers/infiniband/hw/i40iw/Kconfig +++ b/drivers/infiniband/hw/i40iw/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_I40IW tristate "Intel(R) Ethernet X722 iWARP Driver" depends on INET && I40E + depends on IPV6 || !IPV6 depends on PCI select GENERIC_ALLOCATOR ---help--- diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 7b2655128b9f..423818a7d333 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -57,6 +57,7 @@ #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/ip_fib.h> +#include <net/secure_seq.h> #include <net/tcp.h> #include <asm/checksum.h> @@ -2164,7 +2165,6 @@ static struct i40iw_cm_node *i40iw_make_cm_node( struct i40iw_cm_listener *listener) { struct i40iw_cm_node *cm_node; - struct timespec ts; int oldarpindex; int arpindex; struct net_device *netdev = iwdev->netdev; @@ -2214,10 +2214,26 @@ static struct i40iw_cm_node *i40iw_make_cm_node( cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; - ts = current_kernel_time(); - cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; - cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4) : - (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6); + if (cm_node->ipv4) { + cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]), + htonl(cm_node->rem_addr[0]), + htons(cm_node->loc_port), + htons(cm_node->rem_port)); + cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4; + } else if (IS_ENABLED(CONFIG_IPV6)) { + __be32 loc[4] = { + htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]), + htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3]) + }; + __be32 rem[4] = { + htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]), + htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3]) + }; + cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem, + htons(cm_node->loc_port), + htons(cm_node->rem_port)); + cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6; + } cm_node->iwdev = iwdev; cm_node->dev = &iwdev->sc_dev; diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 2836c5420d60..55a1fbf0e670 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -435,45 +435,24 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) } /** - * i40iw_manage_apbvt - add or delete tcp port + * i40iw_cqp_manage_abvpt_cmd - send cqp command manage abpvt * @iwdev: iwarp device * @accel_local_port: port for apbvt * @add_port: add or delete port */ -int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port) +static enum i40iw_status_code +i40iw_cqp_manage_abvpt_cmd(struct i40iw_device *iwdev, + u16 accel_local_port, + bool add_port) { struct i40iw_apbvt_info *info; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; - unsigned long flags; - struct i40iw_cm_core *cm_core = &iwdev->cm_core; - enum i40iw_status_code status = 0; - bool in_use; - - /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to - * protect against race where add APBVT CQP can race ahead of the delete - * APBVT for same port. - */ - spin_lock_irqsave(&cm_core->apbvt_lock, flags); - - if (!add_port) { - in_use = i40iw_port_in_use(cm_core, accel_local_port); - if (in_use) - goto exit; - clear_bit(accel_local_port, cm_core->ports_in_use); - } else { - in_use = test_and_set_bit(accel_local_port, - cm_core->ports_in_use); - spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); - if (in_use) - return 0; - } + enum i40iw_status_code status; cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port); - if (!cqp_request) { - status = -ENOMEM; - goto exit; - } + if (!cqp_request) + return I40IW_ERR_NO_MEMORY; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; @@ -489,14 +468,54 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad status = i40iw_handle_cqp_op(iwdev, cqp_request); if (status) i40iw_pr_err("CQP-OP Manage APBVT entry fail"); -exit: - if (!add_port) - spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return status; } /** + * i40iw_manage_apbvt - add or delete tcp port + * @iwdev: iwarp device + * @accel_local_port: port for apbvt + * @add_port: add or delete port + */ +enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev, + u16 accel_local_port, + bool add_port) +{ + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + enum i40iw_status_code status; + unsigned long flags; + bool in_use; + + /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to + * protect against race where add APBVT CQP can race ahead of the delete + * APBVT for same port. + */ + if (add_port) { + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + in_use = __test_and_set_bit(accel_local_port, + cm_core->ports_in_use); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + if (in_use) + return 0; + return i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port, + true); + } else { + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + in_use = i40iw_port_in_use(cm_core, accel_local_port); + if (in_use) { + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return 0; + } + __clear_bit(accel_local_port, cm_core->ports_in_use); + status = i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port, + false); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return status; + } +} + +/** * i40iw_manage_arp_cache - manage hw arp cache * @iwdev: iwarp device * @mac_addr: mac address ptr diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 68679ad4c6da..7d85414742ff 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -71,7 +71,8 @@ static int i40iw_query_device(struct ib_device *ibdev, props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; props->max_qp = iwdev->max_qp - iwdev->used_qps; props->max_qp_wr = I40IW_MAX_QP_WRS; - props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; props->max_cq = iwdev->max_cq - iwdev->used_cqs; props->max_cqe = iwdev->max_cqe; props->max_mr = iwdev->max_mr - iwdev->used_mrs; @@ -1409,6 +1410,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) struct vm_area_struct *vma; struct hstate *h; + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); if (vma && is_vm_hugetlb_page(vma)) { h = hstate_vma(vma); @@ -1417,6 +1419,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) iwmr->page_msk = huge_page_mask(h); } } + up_read(¤t->mm->mmap_sem); } /** diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 9345d5b546d1..e9e3a6f390db 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -82,12 +82,11 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct mlx4_ib_ah *ah) { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + const struct ib_gid_attr *gid_attr; struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; struct in6_addr in6; u16 vlan_tag = 0xffff; - union ib_gid sgid; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); int ret; @@ -96,25 +95,30 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, is_mcast = 1; memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); - ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &gid_attr); - if (ret) - return ERR_PTR(ret); eth_zero_addr(ah->av.eth.s_mac); - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(gid_attr.ndev); + + /* + * If sgid_attr is NULL we are being called by mlx4_ib_create_ah_slave + * and we are directly creating an AV for a slave's gid_index. + */ + gid_attr = ah_attr->grh.sgid_attr; + if (gid_attr) { + if (is_vlan_dev(gid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); + memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr); + if (ret < 0) + return ERR_PTR(ret); + ah->av.eth.gid_index = ret; + } else { + /* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */ + ah->av.eth.gid_index = ah_attr->grh.sgid_index; + } + if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (rdma_ah_get_port_num(ah_attr) << 24)); - ret = mlx4_ib_gid_index_to_real_index(ibdev, - rdma_ah_get_port_num(ah_attr), - grh->sgid_index); - if (ret < 0) - return ERR_PTR(ret); - ah->av.eth.gid_index = ret; ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = grh->hop_limit; if (rdma_ah_get_static_rate(ah_attr)) { @@ -173,6 +177,40 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, return create_ib_ah(pd, ah_attr, ah); /* never fails */ } +/* AH's created via this call must be free'd by mlx4_ib_destroy_ah. */ +struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, + u16 vlan_tag) +{ + struct rdma_ah_attr slave_attr = *ah_attr; + struct mlx4_ib_ah *mah; + struct ib_ah *ah; + + slave_attr.grh.sgid_attr = NULL; + slave_attr.grh.sgid_index = slave_sgid_index; + ah = mlx4_ib_create_ah(pd, &slave_attr, NULL); + if (IS_ERR(ah)) + return ah; + + ah->device = pd->device; + ah->pd = pd; + ah->type = ah_attr->type; + mah = to_mah(ah); + + /* get rid of force-loopback bit */ + mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); + + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) + memcpy(mah->av.eth.s_mac, s_mac, 6); + + if (vlan_tag < 0x1000) + vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; + mah->av.eth.vlan = cpu_to_be16(vlan_tag); + + return ah; +} + int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 90a3e2642c2e..8d730a69793d 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1367,13 +1367,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, struct mlx4_mad_snd_buf *sqp_mad; struct ib_ah *ah; struct ib_qp *send_qp = NULL; - struct ib_global_route *grh; unsigned wire_tx_ix = 0; int ret = 0; u16 wire_pkey_ix; int src_qpnum; - u8 sgid_index; - sqp_ctx = dev->sriov.sqps[port-1]; @@ -1394,16 +1391,11 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, send_qp = sqp->qp; /* create ah */ - grh = rdma_ah_retrieve_grh(attr); - sgid_index = grh->sgid_index; - grh->sgid_index = 0; - ah = rdma_create_ah(sqp_ctx->pd, attr); + ah = mlx4_ib_create_ah_slave(sqp_ctx->pd, attr, + rdma_ah_retrieve_grh(attr)->sgid_index, + s_mac, vlan_id); if (IS_ERR(ah)) return -ENOMEM; - grh->sgid_index = sgid_index; - to_mah(ah)->av.ib.gid_index = sgid_index; - /* get rid of force-loopback bit */ - to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= (MLX4_NUM_TUNNEL_BUFS - 1)) @@ -1445,12 +1437,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.wr.num_sge = 1; wr.wr.opcode = IB_WR_SEND; wr.wr.send_flags = IB_SEND_SIGNALED; - if (s_mac) - memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); - if (vlan_id < 0x1000) - vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13; - to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id); - ret = ib_post_send(send_qp, &wr.wr, &bad_wr); if (!ret) @@ -1461,7 +1447,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - rdma_destroy_ah(ah); + mlx4_ib_destroy_ah(ah); return ret; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4ec519afc45b..ca0f1ee26091 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -246,9 +246,7 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return mlx4_ib_update_gids_v1(gids, ibdev, port_num); } -static int mlx4_ib_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context) +static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) { struct mlx4_ib_dev *ibdev = to_mdev(attr->device); struct mlx4_ib_iboe *iboe = &ibdev->iboe; @@ -271,8 +269,9 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { - if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) && - (port_gid_table->gids[i].gid_type == attr->gid_type)) { + if (!memcmp(&port_gid_table->gids[i].gid, + &attr->gid, sizeof(attr->gid)) && + port_gid_table->gids[i].gid_type == attr->gid_type) { found = i; break; } @@ -289,7 +288,8 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, ret = -ENOMEM; } else { *context = port_gid_table->gids[free].ctx; - memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid)); + memcpy(&port_gid_table->gids[free].gid, + &attr->gid, sizeof(attr->gid)); port_gid_table->gids[free].gid_type = attr->gid_type; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; @@ -380,17 +380,15 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) } int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, - u8 port_num, int index) + const struct ib_gid_attr *attr) { struct mlx4_ib_iboe *iboe = &ibdev->iboe; struct gid_cache_context *ctx = NULL; - union ib_gid gid; struct mlx4_port_gid_table *port_gid_table; int real_index = -EINVAL; int i; - int ret; unsigned long flags; - struct ib_gid_attr attr; + u8 port_num = attr->port_num; if (port_num > MLX4_MAX_PORTS) return -EINVAL; @@ -399,21 +397,15 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, port_num = 1; if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) - return index; - - ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr); - if (ret) - return ret; - - if (attr.ndev) - dev_put(attr.ndev); + return attr->index; spin_lock_irqsave(&iboe->lock, flags); port_gid_table = &iboe->gids[port_num - 1]; for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) - if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) && - attr.gid_type == port_gid_table->gids[i].gid_type) { + if (!memcmp(&port_gid_table->gids[i].gid, + &attr->gid, sizeof(attr->gid)) && + attr->gid_type == port_gid_table->gids[i].gid_type) { ctx = port_gid_table->gids[i].ctx; break; } @@ -525,8 +517,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; - props->max_sge = min(dev->dev->caps.max_sq_sg, - dev->dev->caps.max_rq_sg); + props->max_send_sge = dev->dev->caps.max_sq_sg; + props->max_recv_sge = dev->dev->caps.max_rq_sg; props->max_sge_rd = MLX4_MAX_SGE_RD; props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; @@ -770,7 +762,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, IB_WIDTH_4X : IB_WIDTH_1X; props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? IB_SPEED_FDR : IB_SPEED_QDR; - props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; + props->port_cap_flags = IB_PORT_CM_SUP; + props->ip_gids = true; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; props->pkey_tbl_len = 1; @@ -2709,6 +2702,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; ibdev->ib_dev.query_qp = mlx4_ib_query_qp; ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; + ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq; + ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq; ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; ibdev->ib_dev.create_cq = mlx4_ib_create_cq; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7b1429917aba..1a0fad30633b 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -760,6 +760,10 @@ void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); +struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, + u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); @@ -778,6 +782,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_destroy_qp(struct ib_qp *qp); +void mlx4_ib_drain_sq(struct ib_qp *qp); +void mlx4_ib_drain_rq(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, @@ -900,7 +906,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, - u8 port_num, int index); + const struct ib_gid_attr *attr); void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, int port); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 3b8045fd23ed..408e720fd923 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1859,8 +1859,7 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah); int real_sgid_index = - mlx4_ib_gid_index_to_real_index(dev, port, - grh->sgid_index); + mlx4_ib_gid_index_to_real_index(dev, grh->sgid_attr); if (real_sgid_index < 0) return real_sgid_index; @@ -2176,6 +2175,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, { struct ib_uobject *ibuobject; struct ib_srq *ibsrq; + const struct ib_gid_attr *gid_attr = NULL; struct ib_rwq_ind_table *rwq_ind_tbl; enum ib_qp_type qp_type; struct mlx4_ib_dev *dev; @@ -2356,29 +2356,17 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (attr_mask & IB_QP_AV) { u8 port_num = mlx4_is_bonded(dev->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - union ib_gid gid; - struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; - int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; if (is_eth) { - int index = - rdma_ah_read_grh(&attr->ah_attr)->sgid_index; - - status = ib_get_cached_gid(&dev->ib_dev, port_num, - index, &gid, &gid_attr); - if (!status) { - vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); - memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(gid_attr.ndev); - } + gid_attr = attr->ah_attr.grh.sgid_attr; + vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); + memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN); } - if (status) - goto out; if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, port_num, vlan, smac)) @@ -2389,7 +2377,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (is_eth && (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) { - u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type); + u8 qpc_roce_mode = gid_type_to_qpc(gid_attr->gid_type); if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) { err = -EINVAL; @@ -3181,10 +3169,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; } else { - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + sqp->ud_header.grh.source_gid = + ah->ibah.sgid_attr->gid; } } memcpy(sqp->ud_header.grh.destination_gid.raw, @@ -3582,8 +3568,8 @@ static void add_zero_len_inline(void *wqe) inl->byte_count = cpu_to_be32(1 << 31); } -int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int _mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); void *wqe; @@ -3623,7 +3609,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR && + !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -3913,8 +3900,14 @@ out: return err; } -int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + return _mlx4_ib_post_send(ibqp, wr, bad_wr, false); +} + +static int _mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_wqe_data_seg *scat; @@ -3929,7 +3922,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR && + !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4000,6 +3994,12 @@ out: return err; } +int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false); +} + static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) { switch (mlx4_state) { @@ -4047,9 +4047,9 @@ static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev, u8 port_num = path->sched_queue & 0x40 ? 2 : 1; memset(ah_attr, 0, sizeof(*ah_attr)); - ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (port_num == 0 || port_num > dev->caps.num_ports) return; + ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) | @@ -4465,3 +4465,131 @@ int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) kfree(ib_rwq_ind_tbl); return 0; } + +struct mlx4_ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void mlx4_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx4_ib_drain_cqe *cqe = container_of(wc->wr_cqe, + struct mlx4_ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* This function returns only once the drained WR was completed */ +static void handle_drain_completion(struct ib_cq *cq, + struct mlx4_ib_drain_cqe *sdrain, + struct mlx4_ib_dev *dev) +{ + struct mlx4_dev *mdev = dev->dev; + + if (cq->poll_ctx == IB_POLL_DIRECT) { + while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + return; + } + + if (mdev->persist->state == MLX4_DEVICE_STATE_INTERNAL_ERROR) { + struct mlx4_ib_cq *mcq = to_mcq(cq); + bool triggered = false; + unsigned long flags; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + /* Make sure that the CQ handler won't run if wasn't run yet */ + if (!mcq->mcq.reset_notify_added) + mcq->mcq.reset_notify_added = 1; + else + triggered = true; + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + if (triggered) { + /* Wait for any scheduled/running task to be ended */ + switch (cq->poll_ctx) { + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + irq_poll_enable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + cancel_work_sync(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + } + + /* Run the CQ handler - this makes sure that the drain WR will + * be processed if wasn't processed yet. + */ + mcq->mcq.comp(&mcq->mcq); + } + + wait_for_completion(&sdrain->done); +} + +void mlx4_ib_drain_sq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->send_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx4_ib_drain_cqe sdrain; + struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; + int ret; + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_dev *mdev = dev->dev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + sdrain.cqe.done = mlx4_ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = _mlx4_ib_post_send(qp, &swr.wr, &bad_swr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &sdrain, dev); +} + +void mlx4_ib_drain_rq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->recv_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx4_ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}, *bad_rwr; + int ret; + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_dev *mdev = dev->dev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = mlx4_ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = _mlx4_ib_post_recv(qp, &rwr, &bad_rwr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &rdrain, dev); +} diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index d42b922bede8..577e4c418bae 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o +mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index e6bde32a83f3..ffd03bf1a71e 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -37,7 +37,6 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, struct rdma_ah_attr *ah_attr) { enum ib_gid_type gid_type; - int err; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -53,18 +52,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { - err = mlx5_get_roce_gid_type(dev, ah_attr->port_num, - ah_attr->grh.sgid_index, - &gid_type); - if (err) - return ERR_PTR(err); + gid_type = ah_attr->grh.sgid_attr->gid_type; memcpy(ah->av.rmac, ah_attr->roce.dmac, sizeof(ah_attr->roce.dmac)); ah->av.udp_sport = - mlx5_get_roce_udp_sport(dev, - rdma_ah_get_port_num(ah_attr), - rdma_ah_read_grh(ah_attr)->sgid_index); + mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr); ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) #define MLX5_ECN_ENABLED BIT(1) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ccc0b5d06a7d..c84fef9a8a08 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -185,3 +185,15 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) return err; } + +int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) +{ + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, + 0, 0); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 98ea4648c655..88cbb1c41703 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -41,6 +41,7 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey); int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); +int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 985fa2637390..7e4e358a4fd8 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -359,9 +359,6 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count, int ret; char lbuf[11]; - if (*pos) - return 0; - ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var); if (ret) return ret; @@ -370,11 +367,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count, if (ret < 0) return ret; - if (copy_to_user(buf, lbuf, ret)) - return -EFAULT; - - *pos += ret; - return ret; + return simple_read_from_buffer(buf, count, pos, lbuf, ret); } static const struct file_operations dbg_cc_fops = { diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c new file mode 100644 index 000000000000..7f9d73b03421 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -0,0 +1,1107 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_umem.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) +struct devx_obj { + struct mlx5_core_dev *mdev; + u32 obj_id; + u32 dinlen; /* destroy inbox length */ + u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; +}; + +struct devx_umem { + struct mlx5_core_dev *mdev; + struct ib_umem *umem; + u32 page_offset; + int page_shift; + int ncont; + u32 dinlen; + u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; +}; + +struct devx_umem_reg_cmd { + void *in; + u32 inlen; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; +}; + +static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) +{ + return to_mucontext(ib_uverbs_get_ucontext(file)); +} + +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +{ + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u64 general_obj_types; + void *hdr; + int err; + + hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); + + general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) || + !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) + return -EINVAL; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return 0; +} + +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid); + + mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + u32 obj_id; + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + break; + case MLX5_CMD_OP_QUERY_MKEY: + obj_id = MLX5_GET(query_mkey_in, in, mkey_index); + break; + case MLX5_CMD_OP_QUERY_CQ: + obj_id = MLX5_GET(query_cq_in, in, cqn); + break; + case MLX5_CMD_OP_MODIFY_CQ: + obj_id = MLX5_GET(modify_cq_in, in, cqn); + break; + case MLX5_CMD_OP_QUERY_SQ: + obj_id = MLX5_GET(query_sq_in, in, sqn); + break; + case MLX5_CMD_OP_MODIFY_SQ: + obj_id = MLX5_GET(modify_sq_in, in, sqn); + break; + case MLX5_CMD_OP_QUERY_RQ: + obj_id = MLX5_GET(query_rq_in, in, rqn); + break; + case MLX5_CMD_OP_MODIFY_RQ: + obj_id = MLX5_GET(modify_rq_in, in, rqn); + break; + case MLX5_CMD_OP_QUERY_RMP: + obj_id = MLX5_GET(query_rmp_in, in, rmpn); + break; + case MLX5_CMD_OP_MODIFY_RMP: + obj_id = MLX5_GET(modify_rmp_in, in, rmpn); + break; + case MLX5_CMD_OP_QUERY_RQT: + obj_id = MLX5_GET(query_rqt_in, in, rqtn); + break; + case MLX5_CMD_OP_MODIFY_RQT: + obj_id = MLX5_GET(modify_rqt_in, in, rqtn); + break; + case MLX5_CMD_OP_QUERY_TIR: + obj_id = MLX5_GET(query_tir_in, in, tirn); + break; + case MLX5_CMD_OP_MODIFY_TIR: + obj_id = MLX5_GET(modify_tir_in, in, tirn); + break; + case MLX5_CMD_OP_QUERY_TIS: + obj_id = MLX5_GET(query_tis_in, in, tisn); + break; + case MLX5_CMD_OP_MODIFY_TIS: + obj_id = MLX5_GET(modify_tis_in, in, tisn); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + obj_id = MLX5_GET(query_flow_table_in, in, table_id); + break; + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + obj_id = MLX5_GET(modify_flow_table_in, in, table_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + obj_id = MLX5_GET(query_flow_group_in, in, group_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + obj_id = MLX5_GET(query_fte_in, in, flow_index); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + obj_id = MLX5_GET(set_fte_in, in, flow_index); + break; + case MLX5_CMD_OP_QUERY_Q_COUNTER: + obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id); + break; + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + break; + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + obj_id = MLX5_GET(query_scheduling_element_in, in, + scheduling_element_id); + break; + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + obj_id = MLX5_GET(modify_scheduling_element_in, in, + scheduling_element_id); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + break; + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + break; + case MLX5_CMD_OP_QUERY_QP: + obj_id = MLX5_GET(query_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RST2INIT_QP: + obj_id = MLX5_GET(rst2init_qp_in, in, qpn); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + obj_id = MLX5_GET(init2rtr_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RTS2RTS_QP: + obj_id = MLX5_GET(rts2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_SQERR2RTS_QP: + obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_2ERR_QP: + obj_id = MLX5_GET(qp_2err_in, in, qpn); + break; + case MLX5_CMD_OP_2RST_QP: + obj_id = MLX5_GET(qp_2rst_in, in, qpn); + break; + case MLX5_CMD_OP_QUERY_DCT: + obj_id = MLX5_GET(query_dct_in, in, dctn); + break; + case MLX5_CMD_OP_QUERY_XRQ: + obj_id = MLX5_GET(query_xrq_in, in, xrqn); + break; + case MLX5_CMD_OP_QUERY_XRC_SRQ: + obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn); + break; + case MLX5_CMD_OP_ARM_XRC_SRQ: + obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn); + break; + case MLX5_CMD_OP_QUERY_SRQ: + obj_id = MLX5_GET(query_srq_in, in, srqn); + break; + case MLX5_CMD_OP_ARM_RQ: + obj_id = MLX5_GET(arm_rq_in, in, srq_number); + break; + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + obj_id = MLX5_GET(drain_dct_in, in, dctn); + break; + case MLX5_CMD_OP_ARM_XRQ: + obj_id = MLX5_GET(arm_xrq_in, in, xrqn); + break; + default: + return false; + } + + if (obj_id == obj->obj_id) + return true; + + return false; +} + +static bool devx_is_obj_create_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_CREATE_MKEY: + case MLX5_CMD_OP_CREATE_CQ: + case MLX5_CMD_OP_ALLOC_PD: + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_CREATE_RMP: + case MLX5_CMD_OP_CREATE_SQ: + case MLX5_CMD_OP_CREATE_RQ: + case MLX5_CMD_OP_CREATE_RQT: + case MLX5_CMD_OP_CREATE_TIR: + case MLX5_CMD_OP_CREATE_TIS: + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_QP: + case MLX5_CMD_OP_CREATE_SRQ: + case MLX5_CMD_OP_CREATE_XRC_SRQ: + case MLX5_CMD_OP_CREATE_DCT: + case MLX5_CMD_OP_CREATE_XRQ: + case MLX5_CMD_OP_ATTACH_TO_MCG: + case MLX5_CMD_OP_ALLOC_XRCD: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + if (op_mod == 0) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_obj_modify_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_CQ: + case MLX5_CMD_OP_MODIFY_RMP: + case MLX5_CMD_OP_MODIFY_SQ: + case MLX5_CMD_OP_MODIFY_RQ: + case MLX5_CMD_OP_MODIFY_RQT: + case MLX5_CMD_OP_MODIFY_TIR: + case MLX5_CMD_OP_MODIFY_TIS: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_RST2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + case MLX5_CMD_OP_SQERR2RTS_QP: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_ARM_XRC_SRQ: + case MLX5_CMD_OP_ARM_RQ: + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + case MLX5_CMD_OP_ARM_XRQ: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + + if (op_mod == 1) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_obj_query_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_QUERY_XRQ: + return true; + default: + return false; + } +} + +static bool devx_is_general_cmd(void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OP_QUERY_VNIC_ENV: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + return true; + default: + return false; + } +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + int user_vector; + int dev_eqn; + unsigned int irqn; + int err; + + if (uverbs_copy_from(&user_vector, attrs, + MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) + return -EFAULT; + + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + if (err < 0) + return err; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + &dev_eqn, sizeof(dev_eqn))) + return -EFAULT; + + return 0; +} + +/* + *Security note: + * The hardware protection mechanism works like this: Each device object that + * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in + * the device specification manual) upon its creation. Then upon doorbell, + * hardware fetches the object context for which the doorbell was rang, and + * validates that the UAR through which the DB was rang matches the UAR ID + * of the object. + * If no match the doorbell is silently ignored by the hardware. Of course, + * the user cannot ring a doorbell on a UAR that was not mapped to it. + * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command + * mailboxes (except tagging them with UID), we expose to the user its UAR + * ID, so it can embed it in these objects in the expected specification + * format. So the only thing the user can do is hurt itself by creating a + * QP/SQ/CQ with a UAR ID other than his, and then in this case other users + * may ring a doorbell on its objects. + * The consequence of that will be that another user can schedule a QP/SQ + * of the buggy user for execution (just insert it to the hardware schedule + * queue or arm its CQ for event generation), no further harm is expected. + */ +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + u32 user_idx; + s32 dev_idx; + + if (uverbs_copy_from(&user_idx, attrs, + MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX)) + return -EFAULT; + + dev_idx = bfregn_to_uar_index(to_mdev(ib_dev), + &c->bfregi, user_idx, true); + if (dev_idx < 0) + return dev_idx; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + &dev_idx, sizeof(dev_idx))) + return -EFAULT; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + void *cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + /* Only white list of some general HCA commands are allowed for this method. */ + if (!devx_is_general_cmd(cmd_in)) + return -EINVAL; + + cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); + if (!cmd_out) + return -ENOMEM; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto other_cmd_free; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, cmd_out_len); + +other_cmd_free: + kvfree(cmd_out); + return err; +} + +static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, + u32 *dinlen, + u32 *obj_id) +{ + u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); + + switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + break; + + case MLX5_CMD_OP_CREATE_MKEY: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + break; + case MLX5_CMD_OP_CREATE_CQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + break; + case MLX5_CMD_OP_ALLOC_PD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + break; + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + break; + case MLX5_CMD_OP_CREATE_RMP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + break; + case MLX5_CMD_OP_CREATE_SQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + break; + case MLX5_CMD_OP_CREATE_RQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + break; + case MLX5_CMD_OP_CREATE_RQT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + break; + case MLX5_CMD_OP_CREATE_TIR: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + break; + case MLX5_CMD_OP_CREATE_TIS: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + break; + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + break; + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); + *obj_id = MLX5_GET(create_flow_table_out, out, table_id); + MLX5_SET(destroy_flow_table_in, din, other_vport, + MLX5_GET(create_flow_table_in, in, other_vport)); + MLX5_SET(destroy_flow_table_in, din, vport_number, + MLX5_GET(create_flow_table_in, in, vport_number)); + MLX5_SET(destroy_flow_table_in, din, table_type, + MLX5_GET(create_flow_table_in, in, table_type)); + MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + break; + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); + *obj_id = MLX5_GET(create_flow_group_out, out, group_id); + MLX5_SET(destroy_flow_group_in, din, other_vport, + MLX5_GET(create_flow_group_in, in, other_vport)); + MLX5_SET(destroy_flow_group_in, din, vport_number, + MLX5_GET(create_flow_group_in, in, vport_number)); + MLX5_SET(destroy_flow_group_in, din, table_type, + MLX5_GET(create_flow_group_in, in, table_type)); + MLX5_SET(destroy_flow_group_in, din, table_id, + MLX5_GET(create_flow_group_in, in, table_id)); + MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); + *obj_id = MLX5_GET(set_fte_in, in, flow_index); + MLX5_SET(delete_fte_in, din, other_vport, + MLX5_GET(set_fte_in, in, other_vport)); + MLX5_SET(delete_fte_in, din, vport_number, + MLX5_GET(set_fte_in, in, vport_number)); + MLX5_SET(delete_fte_in, din, table_type, + MLX5_GET(set_fte_in, in, table_type)); + MLX5_SET(delete_fte_in, din, table_id, + MLX5_GET(set_fte_in, in, table_id)); + MLX5_SET(delete_fte_in, din, flow_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + break; + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + break; + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); + break; + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + break; + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); + *obj_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_hierarchy, + MLX5_GET(create_scheduling_element_in, in, + scheduling_hierarchy)); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_element_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); + *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); + *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + break; + case MLX5_CMD_OP_CREATE_QP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + break; + case MLX5_CMD_OP_CREATE_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + break; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + break; + case MLX5_CMD_OP_CREATE_DCT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + break; + case MLX5_CMD_OP_CREATE_XRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + break; + case MLX5_CMD_OP_ATTACH_TO_MCG: + *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); + MLX5_SET(detach_from_mcg_in, din, qpn, + MLX5_GET(attach_to_mcg_in, in, qpn)); + memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), + MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), + MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + break; + case MLX5_CMD_OP_ALLOC_XRCD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + break; + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + break; + } +} + +static int devx_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct devx_obj *obj = uobject->object; + int ret; + + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + kfree(obj); + return ret; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT); + void *cmd_out; + struct ib_uobject *uobj; + struct devx_obj *obj; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_create_cmd(cmd_in)) + return -EINVAL; + + obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); + if (!cmd_out) { + err = -ENOMEM; + goto obj_free; + } + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto cmd_free; + + uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); + uobj->object = obj; + obj->mdev = dev->mdev; + devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id); + WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); + if (err) + goto cmd_free; + + kvfree(cmd_out); + return 0; + +cmd_free: + kvfree(cmd_out); +obj_free: + kfree(obj); + return err; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_modify_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(uobj->object, cmd_in)) + return -EINVAL; + + cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); + if (!cmd_out) + return -ENOMEM; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto other_cmd_free; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + cmd_out, cmd_out_len); + +other_cmd_free: + kvfree(cmd_out); + return err; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_query_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(uobj->object, cmd_in)) + return -EINVAL; + + cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); + if (!cmd_out) + return -ENOMEM; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto other_cmd_free; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, cmd_out, cmd_out_len); + +other_cmd_free: + kvfree(cmd_out); + return err; +} + +static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, + struct uverbs_attr_bundle *attrs, + struct devx_umem *obj) +{ + u64 addr; + size_t size; + int access; + int npages; + int err; + u32 page_mask; + + if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || + uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN) || + uverbs_copy_from(&access, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS)) + return -EFAULT; + + err = ib_check_mr_access(access); + if (err) + return err; + + obj->umem = ib_umem_get(ucontext, addr, size, access, 0); + if (IS_ERR(obj->umem)) + return PTR_ERR(obj->umem); + + mlx5_ib_cont_pages(obj->umem, obj->umem->address, + MLX5_MKEY_PAGE_SHIFT_MASK, &npages, + &obj->page_shift, &obj->ncont, NULL); + + if (!npages) { + ib_umem_release(obj->umem); + return -EINVAL; + } + + page_mask = (1 << obj->page_shift) - 1; + obj->page_offset = obj->umem->address & page_mask; + + return 0; +} + +static int devx_umem_reg_cmd_alloc(struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + + (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + cmd->in = kvzalloc(cmd->inlen, GFP_KERNEL); + return cmd->in ? 0 : -ENOMEM; +} + +static void devx_umem_reg_cmd_free(struct devx_umem_reg_cmd *cmd) +{ + kvfree(cmd->in); +} + +static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, + struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + void *umem; + __be64 *mtt; + + umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); + mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); + + MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM); + MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); + MLX5_SET(umem, umem, log_page_size, obj->page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, obj->page_offset); + mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | + MLX5_IB_MTT_READ); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + struct devx_umem_reg_cmd cmd; + struct devx_umem *obj; + struct ib_uobject *uobj; + u32 obj_id; + int err; + + if (!c->devx_uid) + return -EPERM; + + uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); + obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + err = devx_umem_get(dev, &c->ibucontext, attrs, obj); + if (err) + goto err_obj_free; + + err = devx_umem_reg_cmd_alloc(obj, &cmd); + if (err) + goto err_umem_release; + + devx_umem_reg_cmd_build(dev, obj, &cmd); + + MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, + sizeof(cmd.out)); + if (err) + goto err_umem_reg_cmd_free; + + obj->mdev = dev->mdev; + uobj->object = obj; + devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id); + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id)); + if (err) + goto err_umem_destroy; + + devx_umem_reg_cmd_free(&cmd); + + return 0; + +err_umem_destroy: + mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out)); +err_umem_reg_cmd_free: + devx_umem_reg_cmd_free(&cmd); +err_umem_release: + ib_umem_release(obj->umem); +err_obj_free: + kfree(obj); + return err; +} + +static int devx_umem_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct devx_umem *obj = uobject->object; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + int err; + + err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (ib_is_destroy_retryable(err, why, uobject)) + return err; + + ib_umem_release(obj->umem); + kfree(obj); + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_UMEM_REG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DEVX_UMEM_DEREG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_EQN, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_UAR, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OTHER, + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DEVX_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_MODIFY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_WRITE, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); + +DECLARE_UVERBS_OBJECT_TREE(devx_objects, + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); + +const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void) +{ + return &devx_objects; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e52dd21519b4..d4d894e9f942 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -419,8 +419,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size); @@ -510,12 +510,11 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, vlan_id, port_num); } -static int mlx5_ib_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, +static int mlx5_ib_add_gid(const struct ib_gid_attr *attr, __always_unused void **context) { return set_roce_addr(to_mdev(attr->device), attr->port_num, - attr->index, gid, attr); + attr->index, &attr->gid, attr); } static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, @@ -525,41 +524,15 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, attr->index, NULL, NULL); } -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, - int index) +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr) { - struct ib_gid_attr attr; - union ib_gid gid; - - if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) - return 0; - - dev_put(attr.ndev); - - if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) + if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) return 0; return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); } -int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, - int index, enum ib_gid_type *gid_type) -{ - struct ib_gid_attr attr; - union ib_gid gid; - int ret; - - ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr); - if (ret) - return ret; - - dev_put(attr.ndev); - - *gid_type = attr.gid_type; - - return 0; -} - static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) @@ -915,7 +888,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); - props->max_sge = min(max_rq_sg, max_sq_sg); + props->max_send_sge = max_sq_sg; + props->max_recv_sge = max_rq_sg; props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; @@ -1246,7 +1220,6 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, props->qkey_viol_cntr = rep->qkey_violation_counter; props->subnet_timeout = rep->subnet_timeout; props->init_type_reply = rep->init_type_reply; - props->grh_required = rep->grh_required; err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); if (err) @@ -1585,31 +1558,26 @@ error: return err; } -static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +static void deallocate_uars(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) { struct mlx5_bfreg_info *bfregi; - int err; int i; bfregi = &context->bfregi; - for (i = 0; i < bfregi->num_sys_pages; i++) { + for (i = 0; i < bfregi->num_sys_pages; i++) if (i < bfregi->num_static_sys_pages || - bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) { - err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); - if (err) { - mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err); - return err; - } - } - } - - return 0; + bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) + mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); } static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) { int err; + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return 0; + err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); if (err) return err; @@ -1631,6 +1599,9 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) { + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return; + mlx5_core_dealloc_transport_domain(dev->mdev, tdn); if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || @@ -1660,6 +1631,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int err; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); + u32 dump_fill_mkey; bool lib_uar_4k; if (!dev->ib_active) @@ -1676,8 +1648,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (err) return ERR_PTR(err); - if (req.flags) - return ERR_PTR(-EINVAL); + if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX) + return ERR_PTR(-EOPNOTSUPP); if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) return ERR_PTR(-EOPNOTSUPP); @@ -1755,10 +1727,26 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { - err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); + if (err) + goto out_uars; + + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { + /* Block DEVX on Infiniband as of SELinux */ + if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) { + err = -EPERM; + goto out_td; + } + + err = mlx5_ib_devx_create(dev, context); + if (err) + goto out_td; + } + + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); if (err) - goto out_uars; + goto out_mdev; } INIT_LIST_HEAD(&context->vma_private_list); @@ -1819,9 +1807,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length += sizeof(resp.num_dyn_bfregs); } + if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) { + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + resp.dump_fill_mkey = dump_fill_mkey; + resp.comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; + } + resp.response_length += sizeof(resp.dump_fill_mkey); + } + err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - goto out_td; + goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; @@ -1831,9 +1828,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, return &context->ibucontext; +out_mdev: + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) + mlx5_ib_devx_destroy(dev, context); out_td: - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); out_uars: deallocate_uars(dev, context); @@ -1856,9 +1855,11 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; + if (context->devx_uid) + mlx5_ib_devx_destroy(dev, context); + bfregi = &context->bfregi; - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); deallocate_uars(dev, context); kfree(bfregi->sys_pages); @@ -2040,7 +2041,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, struct mlx5_bfreg_info *bfregi = &context->bfregi; int err; unsigned long idx; - phys_addr_t pfn, pa; + phys_addr_t pfn; pgprot_t prot; u32 bfreg_dyn_idx = 0; u32 uar_index; @@ -2131,8 +2132,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, goto err; } - pa = pfn << PAGE_SHIFT; - err = mlx5_ib_set_vma_data(vma, context); if (err) goto err; @@ -2699,7 +2698,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, IPPROTO_GRE); MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, - 0xffff); + ntohs(ib_spec->gre.mask.protocol)); MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, ntohs(ib_spec->gre.val.protocol)); @@ -4460,7 +4459,8 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) cancel_work_sync(&devr->ports[port].pkey_change_work); } -static u32 get_core_cap_flags(struct ib_device *ibdev) +static u32 get_core_cap_flags(struct ib_device *ibdev, + struct mlx5_hca_vport_context *rep) { struct mlx5_ib_dev *dev = to_mdev(ibdev); enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); @@ -4469,11 +4469,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev) bool raw_support = !mlx5_core_mp_enabled(dev->mdev); u32 ret = 0; + if (rep->grh_required) + ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED; + if (ll == IB_LINK_LAYER_INFINIBAND) - return RDMA_CORE_PORT_IBA_IB; + return ret | RDMA_CORE_PORT_IBA_IB; if (raw_support) - ret = RDMA_CORE_PORT_RAW_PACKET; + ret |= RDMA_CORE_PORT_RAW_PACKET; if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) return ret; @@ -4496,17 +4499,23 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; struct mlx5_ib_dev *dev = to_mdev(ibdev); enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); + struct mlx5_hca_vport_context rep = {0}; int err; - immutable->core_cap_flags = get_core_cap_flags(ibdev); - err = ib_query_port(ibdev, port_num, &attr); if (err) return err; + if (ll == IB_LINK_LAYER_INFINIBAND) { + err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0, + &rep); + if (err) + return err; + } + immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = get_core_cap_flags(ibdev); + immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep); if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) immutable->max_mad_size = IB_MGMT_MAD_SIZE; @@ -4604,7 +4613,7 @@ static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) } } -static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num) +static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; @@ -4683,12 +4692,21 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(req_cqe_flush_error), }; +#define INIT_EXT_PPCNT_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} + +static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { + INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), +}; + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { int i; for (i = 0; i < dev->num_ports; i++) { - if (dev->port[i].cnts.set_id) + if (dev->port[i].cnts.set_id_valid) mlx5_core_dealloc_q_counter(dev->mdev, dev->port[i].cnts.set_id); kfree(dev->port[i].cnts.names); @@ -4718,7 +4736,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); num_counters += ARRAY_SIZE(cong_cnts); } - + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); + num_counters += ARRAY_SIZE(ext_ppcnt_cnts); + } cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); if (!cnts->names) return -ENOMEM; @@ -4775,6 +4796,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, offsets[j] = cong_cnts[i].offset; } } + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { + names[j] = ext_ppcnt_cnts[i].name; + offsets[j] = ext_ppcnt_cnts[i].offset; + } + } } static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) @@ -4820,7 +4848,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, return rdma_alloc_hw_stats_struct(port->cnts.names, port->cnts.num_q_counters + - port->cnts.num_cong_counters, + port->cnts.num_cong_counters + + port->cnts.num_ext_ppcnt_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -4853,6 +4882,34 @@ free: return ret; } +static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_port *port, + struct rdma_hw_stats *stats) +{ + int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int ret, i; + void *out; + + out = kvzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); + if (ret) + goto free; + + for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) { + stats->value[i + offset] = + be64_to_cpup((__be64 *)(out + + port->cnts.offsets[i + offset])); + } + +free: + kvfree(out); + return ret; +} + static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index) @@ -4866,13 +4923,21 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, if (!stats) return -EINVAL; - num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters; + num_counters = port->cnts.num_q_counters + + port->cnts.num_cong_counters + + port->cnts.num_ext_ppcnt_counters; /* q_counters are per IB device, query the master mdev */ ret = mlx5_ib_query_q_counters(dev->mdev, port, stats); if (ret) return ret; + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats); + if (ret) + return ret; + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); @@ -5257,22 +5322,26 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM, - UVERBS_METHOD_DM_ALLOC, - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - UVERBS_ATTR_TYPE(u16), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -#define NUM_TREES 2 +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_dm, + UVERBS_OBJECT_DM, + UVERBS_METHOD_DM_ALLOC, + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY)); + +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_flow_action, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + +#define NUM_TREES 3 static int populate_specs_root(struct mlx5_ib_dev *dev) { const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { @@ -5287,15 +5356,20 @@ static int populate_specs_root(struct mlx5_ib_dev *dev) !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) default_root[num_trees++] = &mlx5_ib_dm; - dev->ib_dev.specs_root = + if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_UCTX && + !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) + default_root[num_trees++] = mlx5_ib_get_devx_tree(); + + dev->ib_dev.driver_specs_root = uverbs_alloc_spec_tree(num_trees, default_root); - return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root); + return PTR_ERR_OR_ZERO(dev->ib_dev.driver_specs_root); } static void depopulate_specs_root(struct mlx5_ib_dev *dev) { - uverbs_free_spec_tree(dev->ib_dev.specs_root); + uverbs_free_spec_tree(dev->ib_dev.driver_specs_root); } static int mlx5_ib_read_counters(struct ib_counters *counters, @@ -5546,6 +5620,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.modify_qp = mlx5_ib_modify_qp; dev->ib_dev.query_qp = mlx5_ib_query_qp; dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; + dev->ib_dev.drain_sq = mlx5_ib_drain_sq; + dev->ib_dev.drain_rq = mlx5_ib_drain_rq; dev->ib_dev.post_send = mlx5_ib_post_send; dev->ib_dev.post_recv = mlx5_ib_post_recv; dev->ib_dev.create_cq = mlx5_ib_create_cq; @@ -5643,9 +5719,9 @@ int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) return 0; } -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, - u8 port_num) +static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { + u8 port_num; int i; for (i = 0; i < dev->num_ports; i++) { @@ -5668,6 +5744,8 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + return mlx5_add_netdev_notifier(dev, port_num); } @@ -5684,14 +5762,12 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) enum rdma_link_layer ll; int port_type_cap; int err = 0; - u8 port_num; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) - err = mlx5_ib_stage_common_roce_init(dev, port_num); + err = mlx5_ib_stage_common_roce_init(dev); return err; } @@ -5706,19 +5782,17 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num; int err; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_ib_stage_common_roce_init(dev, port_num); + err = mlx5_ib_stage_common_roce_init(dev); if (err) return err; - err = mlx5_enable_eth(dev, port_num); + err = mlx5_enable_eth(dev); if (err) goto cleanup; } @@ -5735,9 +5809,7 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -5909,8 +5981,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ib_dealloc_device((struct ib_device *)dev); } -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num); - void *__mlx5_ib_add(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile) { @@ -6040,7 +6110,7 @@ static const struct mlx5_ib_profile nic_rep_profile = { mlx5_ib_stage_rep_reg_cleanup), }; -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) +static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) { struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; @@ -6074,8 +6144,6 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) if (!bound) { list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n"); - } else { - mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1); } mutex_unlock(&mlx5_ib_multiport_mutex); @@ -6093,11 +6161,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) { - u8 port_num = mlx5_core_native_port_num(mdev) - 1; - - return mlx5_ib_add_slave_port(mdev, port_num); - } + if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) + return mlx5_ib_add_slave_port(mdev); dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); if (!dev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d89c8fe626f6..04a5d82c9cf3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -78,12 +78,6 @@ enum { MLX5_REQ_SCAT_DATA64_CQE = 0x22, }; -enum mlx5_ib_latency_class { - MLX5_IB_LATENCY_CLASS_LOW, - MLX5_IB_LATENCY_CLASS_MEDIUM, - MLX5_IB_LATENCY_CLASS_HIGH, -}; - enum mlx5_ib_mad_ifc_flags { MLX5_MAD_IFC_IGNORE_MKEY = 1, MLX5_MAD_IFC_IGNORE_BKEY = 2, @@ -143,6 +137,7 @@ struct mlx5_ib_ucontext { u64 lib_caps; DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); + u16 devx_uid; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -665,6 +660,7 @@ struct mlx5_ib_counters { size_t *offsets; u32 num_q_counters; u32 num_cong_counters; + u32 num_ext_ppcnt_counters; u16 set_id; bool set_id_valid; }; @@ -1014,6 +1010,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx5_ib_destroy_qp(struct ib_qp *qp); +void mlx5_ib_drain_sq(struct ib_qp *qp); +void mlx5_ib_drain_rq(struct ib_qp *qp); int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, @@ -1183,10 +1181,8 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, - int index); -int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, - int index, enum ib_gid_type *gid_type); +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr); void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); @@ -1217,6 +1213,21 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context); +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context); +const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); +#else +static inline int +mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; }; +static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) {} +static inline const struct uverbs_object_tree_def * +mlx5_ib_get_devx_tree(void) { return NULL; } +#endif static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -1318,4 +1329,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, unsigned long mlx5_ib_get_xlt_emergency_page(void); void mlx5_ib_put_xlt_emergency_page(void); +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 90a9c461cedc..308456d28afb 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -271,16 +271,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf, { struct mlx5_cache_ent *ent = filp->private_data; struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20]; + char lbuf[20] = {0}; u32 var; int err; int c; - if (copy_from_user(lbuf, buf, sizeof(lbuf))) + count = min(count, sizeof(lbuf) - 1); + if (copy_from_user(lbuf, buf, count)) return -EFAULT; c = order2idx(dev, ent->order); - lbuf[sizeof(lbuf) - 1] = 0; if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -310,19 +310,11 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - if (*pos) - return 0; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); if (err < 0) return err; - if (copy_to_user(buf, lbuf, err)) - return -EFAULT; - - *pos += err; - - return err; + return simple_read_from_buffer(buf, count, pos, lbuf, err); } static const struct file_operations size_fops = { @@ -337,16 +329,16 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, { struct mlx5_cache_ent *ent = filp->private_data; struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20]; + char lbuf[20] = {0}; u32 var; int err; int c; - if (copy_from_user(lbuf, buf, sizeof(lbuf))) + count = min(count, sizeof(lbuf) - 1); + if (copy_from_user(lbuf, buf, count)) return -EFAULT; c = order2idx(dev, ent->order); - lbuf[sizeof(lbuf) - 1] = 0; if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -372,19 +364,11 @@ static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - if (*pos) - return 0; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); if (err < 0) return err; - if (copy_to_user(buf, lbuf, err)) - return -EFAULT; - - *pos += err; - - return err; + return simple_read_from_buffer(buf, count, pos, lbuf, err); } static const struct file_operations limit_fops = { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a4f1f638509f..d4414015b64f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -563,32 +563,21 @@ static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, } static int alloc_bfreg(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, - enum mlx5_ib_latency_class lat) + struct mlx5_bfreg_info *bfregi) { - int bfregn = -EINVAL; + int bfregn = -ENOMEM; mutex_lock(&bfregi->lock); - switch (lat) { - case MLX5_IB_LATENCY_CLASS_LOW: + if (bfregi->ver >= 2) { + bfregn = alloc_high_class_bfreg(dev, bfregi); + if (bfregn < 0) + bfregn = alloc_med_class_bfreg(dev, bfregi); + } + + if (bfregn < 0) { BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1); bfregn = 0; bfregi->count[bfregn]++; - break; - - case MLX5_IB_LATENCY_CLASS_MEDIUM: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_med_class_bfreg(dev, bfregi); - break; - - case MLX5_IB_LATENCY_CLASS_HIGH: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_high_class_bfreg(dev, bfregi); - break; } mutex_unlock(&bfregi->lock); @@ -641,13 +630,13 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq); -static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, int bfregn, - bool dyn_bfreg) +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg) { - int bfregs_per_sys_page; - int index_of_sys_page; - int offset; + unsigned int bfregs_per_sys_page; + u32 index_of_sys_page; + u32 offset; bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * MLX5_NON_FP_BFREGS_PER_UAR; @@ -655,6 +644,10 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, if (dyn_bfreg) { index_of_sys_page += bfregi->num_static_sys_pages; + + if (index_of_sys_page >= bfregi->num_sys_pages) + return -EINVAL; + if (bfregn > bfregi->num_dyn_bfregs || bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) { mlx5_ib_dbg(dev, "Invalid dynamic uar index\n"); @@ -819,21 +812,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, bfregn = MLX5_CROSS_CHANNEL_BFREG; } else { - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to medium latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW); - if (bfregn < 0) { - mlx5_ib_warn(dev, "bfreg allocation failed\n"); - return bfregn; - } - } - } + bfregn = alloc_bfreg(dev, &context->bfregi); + if (bfregn < 0) + return bfregn; } mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); @@ -2555,18 +2536,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(ah_flags & IB_AH_GRH)) return -EINVAL; - err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index, - &gid_type); - if (err) - return err; + memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_XRC_INI || qp->ibqp.qp_type == IB_QPT_XRC_TGT) - path->udp_sport = mlx5_get_roce_udp_sport(dev, port, - grh->sgid_index); + path->udp_sport = + mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr); path->dci_cfi_prio_sl = (sl & 0x7) << 4; + gid_type = ah->grh.sgid_attr->gid_type; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f; } else { @@ -4360,9 +4339,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.w_list[idx].next = qp->sq.cur_post; } - -int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int _mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr, bool drain) { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4393,7 +4371,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4690,13 +4668,19 @@ out: return err; } +int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + return _mlx5_ib_post_send(ibqp, wr, bad_wr, false); +} + static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) { sig->signature = calc_sig(sig, size); } -int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int _mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr, bool drain) { struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; @@ -4714,7 +4698,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4776,6 +4760,12 @@ out: return err; } +int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false); +} + static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) { switch (mlx5_state) { @@ -5697,3 +5687,131 @@ out: kvfree(in); return err; } + +struct mlx5_ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe, + struct mlx5_ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* This function returns only once the drained WR was completed */ +static void handle_drain_completion(struct ib_cq *cq, + struct mlx5_ib_drain_cqe *sdrain, + struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + if (cq->poll_ctx == IB_POLL_DIRECT) { + while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + return; + } + + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + struct mlx5_ib_cq *mcq = to_mcq(cq); + bool triggered = false; + unsigned long flags; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + /* Make sure that the CQ handler won't run if wasn't run yet */ + if (!mcq->mcq.reset_notify_added) + mcq->mcq.reset_notify_added = 1; + else + triggered = true; + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + if (triggered) { + /* Wait for any scheduled/running task to be ended */ + switch (cq->poll_ctx) { + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + irq_poll_enable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + cancel_work_sync(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + } + + /* Run the CQ handler - this makes sure that the drain WR will + * be processed if wasn't processed yet. + */ + mcq->mcq.comp(&mcq->mcq); + } + + wait_for_completion(&sdrain->done); +} + +void mlx5_ib_drain_sq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->send_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe sdrain; + struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + sdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &sdrain, dev); +} + +void mlx5_ib_drain_rq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->recv_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}, *bad_rwr; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &rdrain, dev); +} diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index e7f6223e9c60..0823c0bc7e73 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -281,10 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, header->grh.flow_label = ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); header->grh.hop_limit = ah->av->hop_limit; - ib_get_cached_gid(&dev->ib_dev, - be32_to_cpu(ah->av->port_pd) >> 24, - ah->av->gid_index % dev->limits.gid_table_len, - &header->grh.source_gid, NULL); + header->grh.source_gid = ah->ibah.sgid_attr->gid; memcpy(header->grh.destination_gid.raw, ah->av->dgid, 16); } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 541f237965c7..20febafc1fdd 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -96,8 +96,9 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr props->page_size_cap = mdev->limits.page_size_cap; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; props->max_qp_wr = mdev->limits.max_wqes; - props->max_sge = mdev->limits.max_sg; - props->max_sge_rd = props->max_sge; + props->max_send_sge = mdev->limits.max_sg; + props->max_recv_sge = mdev->limits.max_sg; + props->max_sge_rd = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 00c27291dc26..bedaa02749fb 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -159,7 +159,7 @@ do { \ #define NES_EVENT_TIMEOUT 1200000 #else -#define nes_debug(level, fmt, args...) +#define nes_debug(level, fmt, args...) no_printk(fmt, ##args) #define assert(expr) do {} while (0) #define NES_EVENT_TIMEOUT 100000 diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6cdfbf8c5674..2b67ace5b614 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -58,6 +58,7 @@ #include <net/neighbour.h> #include <net/route.h> #include <net/ip_fib.h> +#include <net/secure_seq.h> #include <net/tcp.h> #include <linux/fcntl.h> @@ -1445,7 +1446,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, struct nes_cm_listener *listener) { struct nes_cm_node *cm_node; - struct timespec ts; int oldarpindex = 0; int arpindex = 0; struct nes_device *nesdev; @@ -1496,8 +1496,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> NES_CM_DEFAULT_RCV_WND_SCALE; - ts = current_kernel_time(); - cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); + cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr), + htonl(cm_node->rem_addr), + htons(cm_node->loc_port), + htons(cm_node->rem_port)); cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; cm_node->tcp_cntxt.rcv_nxt = 0; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 18a7de1c3923..bd0675d8f298 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -70,8 +70,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); static void nes_terminate_start_timer(struct nes_qp *nesqp); -#ifdef CONFIG_INFINIBAND_NES_DEBUG -static unsigned char *nes_iwarp_state_str[] = { +static const char *const nes_iwarp_state_str[] = { "Non-Existent", "Idle", "RTS", @@ -82,7 +81,7 @@ static unsigned char *nes_iwarp_state_str[] = { "RSVD2", }; -static unsigned char *nes_tcp_state_str[] = { +static const char *const nes_tcp_state_str[] = { "Non-Existent", "Closed", "Listen", @@ -100,7 +99,6 @@ static unsigned char *nes_tcp_state_str[] = { "RSVD3", "RSVD4", }; -#endif static inline void print_ip(struct nes_cm_node *cm_node) { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 32f26556c808..82b8f9630ee8 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -436,7 +436,8 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop props->max_mr_size = 0x80000000; props->max_qp = nesibdev->max_qp; props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; - props->max_sge = nesdev->nesadapter->max_sge; + props->max_send_sge = nesdev->nesadapter->max_sge; + props->max_recv_sge = nesdev->nesadapter->max_sge; props->max_cq = nesibdev->max_cq; props->max_cqe = nesdev->nesadapter->max_cqe; props->max_mr = nesibdev->max_mr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 3897b64532e1..8cc9459a9f9b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type) } static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, - struct rdma_ah_attr *attr, union ib_gid *sgid, + struct rdma_ah_attr *attr, const union ib_gid *sgid, int pdid, bool *isvlan, u16 vlan_tag) { int status; @@ -164,17 +164,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, struct ocrdma_ah *ah; bool isvlan = false; u16 vlan_tag = 0xffff; - struct ib_gid_attr sgid_attr; + const struct ib_gid_attr *sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); - const struct ib_global_route *grh; - union ib_gid sgid; if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH)) return ERR_PTR(-EINVAL); - grh = rdma_ah_read_grh(attr); if (atomic_cmpxchg(&dev->update_sl, 1, 0)) ocrdma_init_service_level(dev); @@ -186,20 +183,15 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, if (status) goto av_err; - status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid, - &sgid_attr); - if (status) { - pr_err("%s(): Failed to query sgid, status = %d\n", - __func__, status); - goto av_conf_err; - } - if (is_vlan_dev(sgid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); - dev_put(sgid_attr.ndev); + sgid_attr = attr->grh.sgid_attr; + if (is_vlan_dev(sgid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev); + /* Get network header type for this GID */ - ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + ah->hdr_type = rdma_gid_attr_network_type(sgid_attr); - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); + status = set_av_attr(dev, ah, attr, &sgid_attr->gid, pd->id, + &isvlan, vlan_tag); if (status) goto av_conf_err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 6c136e5017fe..c6c87cba943b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2494,8 +2494,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, { int status; struct rdma_ah_attr *ah_attr = &attrs->ah_attr; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; + const struct ib_gid_attr *sgid_attr; u32 vlan_id = 0xFFFF; u8 mac_addr[6], hdr_type; union { @@ -2525,25 +2524,23 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0], sizeof(cmd->params.dgid)); - status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, - &sgid, &sgid_attr); - if (!status) { - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); - memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(sgid_attr.ndev); - } + sgid_attr = ah_attr->grh.sgid_attr; + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); + memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN); qp->sgid_idx = grh->sgid_index; - memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); + memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0], + sizeof(cmd->params.sgid)); status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]); if (status) return status; + cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | (mac_addr[2] << 16) | (mac_addr[3] << 24); - hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + hdr_type = rdma_gid_attr_network_type(sgid_attr); if (hdr_type == RDMA_NETWORK_IPV4) { - rdma_gid2ip(&sgid_addr._sockaddr, &sgid); + rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid); rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid); memcpy(&cmd->params.dgid[0], &dgid_addr._sockaddr_in.sin_addr.s_addr, 4); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 82e20fc32890..86b22f6b7271 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -89,7 +89,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_send_sge = dev->attr.max_send_sge; + attr->max_recv_sge = dev->attr.max_recv_sge; attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; @@ -196,11 +197,10 @@ int ocrdma_query_port(struct ib_device *ibdev, props->sm_lid = 0; props->sm_sl = 0; props->state = port_state; - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | - IB_PORT_IP_BASED_GIDS; + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP; + props->ip_gids = true; props->gid_tbl_len = OCRDMA_MAX_SGID; props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; @@ -1774,13 +1774,13 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) * protect against proessing in-flight CQEs for this QP. */ spin_lock_irqsave(&qp->sq_cq->cq_lock, flags); - if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) { spin_lock(&qp->rq_cq->cq_lock); - - ocrdma_del_qpn_map(dev, qp); - - if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + ocrdma_del_qpn_map(dev, qp); spin_unlock(&qp->rq_cq->cq_lock); + } else { + ocrdma_del_qpn_map(dev, qp); + } spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags); if (!pd->uctx) { diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index 0f14e687bb91..2e1f352c037d 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -387,11 +387,10 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, bool has_vlan = false, has_grh_ipv6 = true; struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - union ib_gid sgid; + const struct ib_gid_attr *sgid_attr = grh->sgid_attr; int send_size = 0; u16 vlan_id = 0; u16 ether_type; - struct ib_gid_attr sgid_attr; int rc; int ip_ver = 0; @@ -402,28 +401,16 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, for (i = 0; i < swr->num_sge; ++i) send_size += swr->sg_list[i].length; - rc = ib_get_cached_gid(qp->ibqp.device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &sgid_attr); - if (rc) { - DP_ERR(dev, - "gsi post send: failed to get cached GID (port=%d, ix=%d)\n", - rdma_ah_get_port_num(ah_attr), - grh->sgid_index); - return rc; - } - - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); if (vlan_id < VLAN_CFI_MASK) has_vlan = true; - dev_put(sgid_attr.ndev); - - has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); + has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); if (!has_udp) { /* RoCE v1 */ ether_type = ETH_P_IBOE; *roce_mode = ROCE_V1; - } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { + } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) { /* RoCE v2 IPv4 */ ip_ver = 4; ether_type = ETH_P_IP; @@ -471,7 +458,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, udh->grh.flow_label = grh->flow_label; udh->grh.hop_limit = grh->hop_limit; udh->grh.destination_gid = grh->dgid; - memcpy(&udh->grh.source_gid.raw, &sgid.raw, + memcpy(&udh->grh.source_gid.raw, sgid_attr->gid.raw, sizeof(udh->grh.source_gid.raw)); } else { /* IPv4 header */ @@ -482,7 +469,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, udh->ip4.frag_off = htons(IP_DF); udh->ip4.ttl = grh->hop_limit; - ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw); + ipv4_addr = qedr_get_ipv4_from_gid(sgid_attr->gid.raw); udh->ip4.saddr = ipv4_addr; ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); udh->ip4.daddr = ipv4_addr; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f7ac8fc9b531..b82c5d5fb0e3 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -112,7 +112,8 @@ int qedr_query_device(struct ib_device *ibdev, IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = qattr->max_sge; + attr->max_send_sge = qattr->max_sge; + attr->max_recv_sge = qattr->max_sge; attr->max_sge_rd = qattr->max_sge; attr->max_cq = qattr->max_cq; attr->max_cqe = qattr->max_cqe; @@ -224,7 +225,7 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->lmc = 0; attr->sm_lid = 0; attr->sm_sl = 0; - attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; + attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { attr->gid_tbl_len = 1; attr->pkey_tbl_len = 1; @@ -1075,27 +1076,19 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, struct qed_rdma_modify_qp_in_params *qp_params) { + const struct ib_gid_attr *gid_attr; enum rdma_network_type nw_type; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); - union ib_gid gid; u32 ipv4_addr; - int rc = 0; int i; - rc = ib_get_cached_gid(ibqp->device, - rdma_ah_get_port_num(&attr->ah_attr), - grh->sgid_index, &gid, &gid_attr); - if (rc) - return rc; - - qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); + gid_attr = grh->sgid_attr; + qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev); - dev_put(gid_attr.ndev); - nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); + nw_type = rdma_gid_attr_network_type(gid_attr); switch (nw_type) { case RDMA_NETWORK_IPV6: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], &grh->dgid, @@ -1105,7 +1098,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); break; case RDMA_NETWORK_IB: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], &grh->dgid, @@ -1115,7 +1108,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, case RDMA_NETWORK_IPV4: memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); - ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); + ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw); qp_params->sgid.ipv4_addr = ipv4_addr; ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); @@ -2302,7 +2295,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, if (!ah) return ERR_PTR(-ENOMEM); - ah->attr = *attr; + rdma_copy_ah_attr(&ah->attr, attr); return &ah->ibah; } @@ -2311,6 +2304,7 @@ int qedr_destroy_ah(struct ib_ah *ibah) { struct qedr_ah *ah = get_qedr_ah(ibah); + rdma_destroy_ah_attr(&ah->attr); kfree(ah); return 0; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 14b4057a2b8f..41babbc0db58 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1489,7 +1489,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_mr_size = ~0ULL; rdi->dparms.props.max_qp = ib_qib_max_qps; rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs; - rdi->dparms.props.max_sge = ib_qib_max_sges; + rdi->dparms.props.max_send_sge = ib_qib_max_sges; + rdi->dparms.props.max_recv_sge = ib_qib_max_sges; rdi->dparms.props.max_sge_rd = ib_qib_max_sges; rdi->dparms.props.max_cq = ib_qib_max_cqs; rdi->dparms.props.max_cqe = ib_qib_max_cqes; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index f9a46768a19a..e72562a8959a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -78,9 +78,6 @@ struct qib_verbs_txreq; #define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) -/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ -#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) /* Values for set/get portinfo VLCap OperationalVLs */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a688a5669168..9524524fade4 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -666,7 +666,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr) usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); - usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); + usnic_uiom_reg_release(mr->umem, ibmr->uobject->context); kfree(mr); return 0; } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 4381c0a9a873..9dd39daa602b 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -41,6 +41,7 @@ #include <linux/workqueue.h> #include <linux/list.h> #include <linux/pci.h> +#include <rdma/ib_verbs.h> #include "usnic_log.h" #include "usnic_uiom.h" @@ -88,7 +89,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) for_each_sg(chunk->page_list, sg, chunk->nents, i) { page = sg_page(sg); pa = sg_phys(sg); - if (dirty) + if (!PageDirty(page) && dirty) set_page_dirty_lock(page); put_page(page); usnic_dbg("pa: %pa\n", &pa); @@ -114,6 +115,16 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, dma_addr_t pa; unsigned int gup_flags; + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size)) + return -EINVAL; + + if (!size) + return -EINVAL; + if (!can_do_mlock()) return -EPERM; @@ -127,7 +138,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, down_write(¤t->mm->mmap_sem); - locked = npages + current->mm->locked_vm; + locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -143,7 +154,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = 0; while (npages) { - ret = get_user_pages(cur_base, + ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), gup_flags, page_list, NULL); @@ -186,7 +197,7 @@ out: if (ret < 0) usnic_uiom_put_pages(chunk_list, 0); else - current->mm->locked_vm = locked; + current->mm->pinned_vm = locked; up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); @@ -420,18 +431,22 @@ out_free_uiomr: return ERR_PTR(err); } -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext) { + struct task_struct *task; struct mm_struct *mm; unsigned long diff; __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); - mm = get_task_mm(current); - if (!mm) { - kfree(uiomr); - return; - } + task = get_pid_task(ucontext->tgid, PIDTYPE_PID); + if (!task) + goto out; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; @@ -443,7 +458,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) * up here and not be able to take the mmap_sem. In that case * we defer the vm_locked accounting to the system workqueue. */ - if (closing) { + if (ucontext->closing) { if (!down_write_trylock(&mm->mmap_sem)) { INIT_WORK(&uiomr->work, usnic_uiom_reg_account); uiomr->mm = mm; @@ -455,9 +470,10 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) } else down_write(&mm->mmap_sem); - current->mm->locked_vm -= diff; + mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); +out: kfree(uiomr); } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 431efe4143f4..8c096acff123 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -39,6 +39,8 @@ #include "usnic_uiom_interval_tree.h" +struct ib_ucontext; + #define USNIC_UIOM_READ (1) #define USNIC_UIOM_WRITE (2) @@ -89,7 +91,8 @@ void usnic_uiom_free_dev_list(struct device **devs); struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext); int usnic_uiom_init(char *drv_name); void usnic_uiom_fini(void); #endif /* USNIC_UIOM_H_ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 44cb1cfba417..42b8685c997e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -378,11 +378,6 @@ static inline enum ib_port_speed pvrdma_port_speed_to_ib( return (enum ib_port_speed)speed; } -static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask) -{ - return attr_mask; -} - static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask) { return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 0be33a81bbe6..912933549dfb 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -62,9 +62,7 @@ static DEFINE_MUTEX(pvrdma_device_list_lock); static LIST_HEAD(pvrdma_device_list); static struct workqueue_struct *event_wq; -static int pvrdma_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context); +static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t show_hca(struct device *device, struct device_attribute *attr, @@ -650,13 +648,11 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, return 0; } -static int pvrdma_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context) +static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context) { struct pvrdma_dev *dev = to_vdev(attr->device); - return pvrdma_add_gid_at_index(dev, gid, + return pvrdma_add_gid_at_index(dev, &attr->gid, ib_gid_type_to_pvrdma(attr->gid_type), attr->index); } @@ -699,8 +695,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) } static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, + struct net_device *ndev, unsigned long event) { + struct pci_dev *pdev_net; + unsigned int slot; + switch (event) { case NETDEV_REBOOT: case NETDEV_DOWN: @@ -718,6 +718,24 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, else pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); break; + case NETDEV_UNREGISTER: + dev_put(dev->netdev); + dev->netdev = NULL; + break; + case NETDEV_REGISTER: + /* vmxnet3 will have same bus, slot. But func will be 0 */ + slot = PCI_SLOT(dev->pdev->devfn); + pdev_net = pci_get_slot(dev->pdev->bus, + PCI_DEVFN(slot, 0)); + if ((dev->netdev == NULL) && + (pci_get_drvdata(pdev_net) == ndev)) { + /* this is our netdev */ + dev->netdev = ndev; + dev_hold(ndev); + } + pci_dev_put(pdev_net); + break; + default: dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", event, dev->ib_dev.name); @@ -734,8 +752,11 @@ static void pvrdma_netdevice_event_work(struct work_struct *work) mutex_lock(&pvrdma_device_list_lock); list_for_each_entry(dev, &pvrdma_device_list, device_link) { - if (dev->netdev == netdev_work->event_netdev) { - pvrdma_netdevice_event_handle(dev, netdev_work->event); + if ((netdev_work->event == NETDEV_REGISTER) || + (dev->netdev == netdev_work->event_netdev)) { + pvrdma_netdevice_event_handle(dev, + netdev_work->event_netdev, + netdev_work->event); break; } } @@ -968,6 +989,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, ret = -ENODEV; goto err_free_cq_ring; } + dev_hold(dev->netdev); dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); @@ -1040,6 +1062,10 @@ err_free_intrs: pvrdma_free_irq(dev); pci_free_irq_vectors(pdev); err_free_cq_ring: + if (dev->netdev) { + dev_put(dev->netdev); + dev->netdev = NULL; + } pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); err_free_async_ring: pvrdma_page_dir_cleanup(dev, &dev->async_pdir); @@ -1079,6 +1105,11 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) flush_workqueue(event_wq); + if (dev->netdev) { + dev_put(dev->netdev); + dev->netdev = NULL; + } + /* Unregister ib device */ ib_unregister_device(&dev->ib_dev); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index a51463cd2f37..b65d10b0a875 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -82,7 +82,8 @@ int pvrdma_query_device(struct ib_device *ibdev, props->max_qp = dev->dsr->caps.max_qp; props->max_qp_wr = dev->dsr->caps.max_qp_wr; props->device_cap_flags = dev->dsr->caps.device_cap_flags; - props->max_sge = dev->dsr->caps.max_sge; + props->max_send_sge = dev->dsr->caps.max_sge; + props->max_recv_sge = dev->dsr->caps.max_sge; props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, dev->dsr->caps.max_sge_rd); props->max_srq = dev->dsr->caps.max_srq; @@ -154,7 +155,8 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port, props->gid_tbl_len = resp->attrs.gid_tbl_len; props->port_cap_flags = pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); - props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; props->max_msg_sz = resp->attrs.max_msg_sz; props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index ba3639a0d77c..89ec0f64abfc 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -120,7 +120,8 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, dev->n_ahs_allocated++; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - ah->attr = *ah_attr; + rdma_copy_ah_attr(&ah->attr, ah_attr); + atomic_set(&ah->refcount, 0); if (dev->driver_f.notify_new_ah) @@ -148,6 +149,7 @@ int rvt_destroy_ah(struct ib_ah *ibah) dev->n_ahs_allocated--; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + rdma_destroy_ah_attr(&ah->attr); kfree(ah); return 0; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 41183bd665ca..d29e3c943399 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -780,14 +780,15 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (!rdi) return ERR_PTR(-EINVAL); - if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || init_attr->create_flags) return ERR_PTR(-EINVAL); /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { - if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || + if (init_attr->cap.max_recv_sge > + rdi->dparms.props.max_recv_sge || init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); @@ -1336,13 +1337,13 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->qp_access_flags = attr->qp_access_flags; if (attr_mask & IB_QP_AV) { - qp->remote_ah_attr = attr->ah_attr; + rdma_replace_ah_attr(&qp->remote_ah_attr, &attr->ah_attr); qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr); qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); } if (attr_mask & IB_QP_ALT_PATH) { - qp->alt_ah_attr = attr->alt_ah_attr; + rdma_replace_ah_attr(&qp->alt_ah_attr, &attr->alt_ah_attr); qp->s_alt_pkey_index = attr->alt_pkey_index; } @@ -1459,6 +1460,8 @@ int rvt_destroy_qp(struct ib_qp *ibqp) vfree(qp->s_wq); rdi->driver_f.qp_priv_free(rdi, qp); kfree(qp->s_ack_queue); + rdma_destroy_ah_attr(&qp->remote_ah_attr); + rdma_destroy_ah_attr(&qp->alt_ah_attr); kfree(qp); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 7121e1b1eb89..10999fa69281 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -91,7 +91,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; - rxe->attr.max_sge = RXE_MAX_SGE; + rxe->attr.max_send_sge = RXE_MAX_SGE; + rxe->attr.max_recv_sge = RXE_MAX_SGE; rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; rxe->attr.max_cq = RXE_MAX_CQ; rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 7f1ae364088a..26fe8d7dbc55 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -55,29 +55,41 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) void rxe_av_from_attr(u8 port_num, struct rxe_av *av, struct rdma_ah_attr *attr) { + const struct ib_global_route *grh = rdma_ah_read_grh(attr); + memset(av, 0, sizeof(*av)); - memcpy(&av->grh, rdma_ah_read_grh(attr), - sizeof(*rdma_ah_read_grh(attr))); + memcpy(av->grh.dgid.raw, grh->dgid.raw, sizeof(grh->dgid.raw)); + av->grh.flow_label = grh->flow_label; + av->grh.sgid_index = grh->sgid_index; + av->grh.hop_limit = grh->hop_limit; + av->grh.traffic_class = grh->traffic_class; av->port_num = port_num; } void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr) { + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + attr->type = RDMA_AH_ATTR_TYPE_ROCE; - memcpy(rdma_ah_retrieve_grh(attr), &av->grh, sizeof(av->grh)); + + memcpy(grh->dgid.raw, av->grh.dgid.raw, sizeof(av->grh.dgid.raw)); + grh->flow_label = av->grh.flow_label; + grh->sgid_index = av->grh.sgid_index; + grh->hop_limit = av->grh.hop_limit; + grh->traffic_class = av->grh.traffic_class; + rdma_ah_set_ah_flags(attr, IB_AH_GRH); rdma_ah_set_port_num(attr, av->port_num); } -void rxe_av_fill_ip_info(struct rxe_av *av, - struct rdma_ah_attr *attr, - struct ib_gid_attr *sgid_attr, - union ib_gid *sgid) +void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) { - rdma_gid2ip((struct sockaddr *)&av->sgid_addr, sgid); + const struct ib_gid_attr *sgid_attr = attr->grh.sgid_attr; + + rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &rdma_ah_read_grh(attr)->dgid); - av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); + av->network_type = rdma_gid_attr_network_type(sgid_attr); } struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index a51ece596c43..87d14f7ef21b 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -43,10 +43,7 @@ void rxe_av_from_attr(u8 port_num, struct rxe_av *av, void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); -void rxe_av_fill_ip_info(struct rxe_av *av, - struct rdma_ah_attr *attr, - struct ib_gid_attr *sgid_attr, - union ib_gid *sgid); +void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 59ec6d918ed4..8094cbaa54a9 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -182,39 +182,19 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, #endif -/* - * Derive the net_device from the av. - * For physical devices, this will just return rxe->ndev. - * But for VLAN devices, it will return the vlan dev. - * Caller should dev_put() the returned net_device. - */ -static struct net_device *rxe_netdev_from_av(struct rxe_dev *rxe, - int port_num, - struct rxe_av *av) -{ - union ib_gid gid; - struct ib_gid_attr attr; - struct net_device *ndev = rxe->ndev; - - if (ib_get_cached_gid(&rxe->ib_dev, port_num, av->grh.sgid_index, - &gid, &attr) == 0 && - attr.ndev && attr.ndev != ndev) - ndev = attr.ndev; - else - /* Only to ensure that caller may call dev_put() */ - dev_hold(ndev); - - return ndev; -} - static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_av *av) { + const struct ib_gid_attr *attr; struct dst_entry *dst = NULL; struct net_device *ndev; - ndev = rxe_netdev_from_av(rxe, qp->attr.port_num, av); + attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num, + av->grh.sgid_index); + if (IS_ERR(attr)) + return NULL; + ndev = attr->ndev; if (qp_type(qp) == IB_QPT_RC) dst = sk_dst_get(qp->sk->sk); @@ -243,9 +223,13 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, rt6_get_cookie((struct rt6_info *)dst); #endif } - } - dev_put(ndev); + if (dst && (qp_type(qp) == IB_QPT_RC)) { + dst_hold(dst); + sk_dst_set(qp->sk->sk, dst); + } + } + rdma_put_gid_attr(attr); return dst; } @@ -418,11 +402,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, av->grh.traffic_class, av->grh.hop_limit, df, xnet); - if (qp_type(qp) == IB_QPT_RC) - sk_dst_set(qp->sk->sk, dst); - else - dst_release(dst); - + dst_release(dst); return 0; } @@ -450,11 +430,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, av->grh.traffic_class, av->grh.hop_limit); - if (qp_type(qp) == IB_QPT_RC) - sk_dst_set(qp->sk->sk, dst); - else - dst_release(dst); - + dst_release(dst); return 0; } @@ -536,9 +512,13 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, unsigned int hdr_len; struct sk_buff *skb; struct net_device *ndev; + const struct ib_gid_attr *attr; const int port_num = 1; - ndev = rxe_netdev_from_av(rxe, port_num, av); + attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index); + if (IS_ERR(attr)) + return NULL; + ndev = attr->ndev; if (av->network_type == RDMA_NETWORK_IPV4) hdr_len = ETH_HLEN + sizeof(struct udphdr) + @@ -550,10 +530,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), GFP_ATOMIC); - if (unlikely(!skb)) { - dev_put(ndev); - return NULL; - } + if (unlikely(!skb)) + goto out; skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); @@ -568,7 +546,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, pkt->hdr = skb_put_zero(skb, paylen); pkt->mask |= RXE_GRH_MASK; - dev_put(ndev); +out: + rdma_put_gid_attr(attr); return skb; } diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 1b596fbbe251..4555510d86c4 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -83,7 +83,7 @@ enum rxe_device_param { RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, RXE_MAX_LOG_CQE = 15, - RXE_MAX_MR = 2 * 1024, + RXE_MAX_MR = 256 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, RXE_MAX_EE_RD_ATOM = 0, diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index b9f7aa1114b2..c58452daffc7 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -49,9 +49,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, goto err1; } - if (cap->max_send_sge > rxe->attr.max_sge) { + if (cap->max_send_sge > rxe->attr.max_send_sge) { pr_warn("invalid send sge = %d > %d\n", - cap->max_send_sge, rxe->attr.max_sge); + cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } @@ -62,9 +62,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, goto err1; } - if (cap->max_recv_sge > rxe->attr.max_sge) { + if (cap->max_recv_sge > rxe->attr.max_recv_sge) { pr_warn("invalid recv sge = %d > %d\n", - cap->max_recv_sge, rxe->attr.max_sge); + cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } @@ -580,9 +580,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata) { int err; - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - union ib_gid sgid; - struct ib_gid_attr sgid_attr; if (mask & IB_QP_MAX_QP_RD_ATOMIC) { int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); @@ -623,30 +620,14 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.qkey = attr->qkey; if (mask & IB_QP_AV) { - ib_get_cached_gid(&rxe->ib_dev, 1, - rdma_ah_read_grh(&attr->ah_attr)->sgid_index, - &sgid, &sgid_attr); rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr); - rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr, - &sgid_attr, &sgid); - if (sgid_attr.ndev) - dev_put(sgid_attr.ndev); + rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr); } if (mask & IB_QP_ALT_PATH) { - u8 sgid_index = - rdma_ah_read_grh(&attr->alt_ah_attr)->sgid_index; - - ib_get_cached_gid(&rxe->ib_dev, 1, sgid_index, - &sgid, &sgid_attr); - rxe_av_from_attr(attr->alt_port_num, &qp->alt_av, &attr->alt_ah_attr); - rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr, - &sgid_attr, &sgid); - if (sgid_attr.ndev) - dev_put(sgid_attr.ndev); - + rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr); qp->attr.alt_port_num = attr->alt_port_num; qp->attr.alt_pkey_index = attr->alt_pkey_index; qp->attr.alt_timeout = attr->alt_timeout; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index dfba44a40f0b..cc5cfd156758 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -256,8 +256,7 @@ static int hdr_check(struct rxe_pkt_info *pkt) return 0; err2: - if (qp) - rxe_drop_ref(qp); + rxe_drop_ref(qp); err1: return -EINVAL; } @@ -328,6 +327,7 @@ err1: static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { + const struct ib_gid_attr *gid_attr; union ib_gid dgid; union ib_gid *pdgid; @@ -339,9 +339,14 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; } - return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, - IB_GID_TYPE_ROCE_UDP_ENCAP, - 1, skb->dev, NULL); + gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid, + IB_GID_TYPE_ROCE_UDP_ENCAP, + 1, skb->dev); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); + + rdma_put_gid_attr(gid_attr); + return 0; } /* rxe_rcv is called from the interface driver */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 5b57de30dee4..aa5833318372 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -884,6 +884,11 @@ static enum resp_states do_complete(struct rxe_qp *qp, else wc->network_hdr_type = RDMA_NETWORK_IPV6; + if (is_vlan_dev(skb->dev)) { + wc->wc_flags |= IB_WC_WITH_VLAN; + wc->vlan_id = vlan_dev_vlan_id(skb->dev); + } + if (pkt->mask & RXE_IMMDT_MASK) { wc->wc_flags |= IB_WC_WITH_IMM; wc->ex.imm_data = immdt_imm(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9deafc3aa6af..1188e163204d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -222,25 +222,11 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd) return 0; } -static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, - struct rxe_av *av) +static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, + struct rxe_av *av) { - int err; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; - - err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr), - rdma_ah_read_grh(attr)->sgid_index, &sgid, - &sgid_attr); - if (err) { - pr_err("Failed to query sgid. err = %d\n", err); - return err; - } - rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); - rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid); - dev_put(sgid_attr.ndev); - return 0; + rxe_av_fill_ip_info(av, attr); } static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, @@ -255,28 +241,17 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, err = rxe_av_chk_attr(rxe, attr); if (err) - goto err1; + return ERR_PTR(err); ah = rxe_alloc(&rxe->ah_pool); - if (!ah) { - err = -ENOMEM; - goto err1; - } + if (!ah) + return ERR_PTR(-ENOMEM); rxe_add_ref(pd); ah->pd = pd; - err = rxe_init_av(rxe, attr, &ah->av); - if (err) - goto err2; - + rxe_init_av(rxe, attr, &ah->av); return &ah->ibah; - -err2: - rxe_drop_ref(pd); - rxe_drop_ref(ah); -err1: - return ERR_PTR(err); } static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) @@ -289,10 +264,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) if (err) return err; - err = rxe_init_av(rxe, attr, &ah->av); - if (err) - return err; - + rxe_init_av(rxe, attr, &ah->av); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a50b062ed13e..e255a7e5a4c3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -252,11 +252,11 @@ struct ipoib_cm_tx { struct ipoib_neigh *neigh; struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; + unsigned int tx_head; + unsigned int tx_tail; unsigned long flags; u32 mtu; - unsigned max_send_sge; + unsigned int max_send_sge; }; struct ipoib_cm_rx_buf { @@ -373,8 +373,8 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; + unsigned int tx_head; + unsigned int tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_ud_wr tx_wr; struct ib_wc send_wc[MAX_SEND_CQE]; @@ -404,7 +404,7 @@ struct ipoib_dev_priv { #endif u64 hca_caps; struct ipoib_ethtool_st ethtool; - unsigned max_send_sge; + unsigned int max_send_sge; bool sm_fullmember_sendonly_support; const struct net_device_ops *rn_ops; }; @@ -414,7 +414,7 @@ struct ipoib_ah { struct ib_ah *ah; struct list_head list; struct kref ref; - unsigned last_send; + unsigned int last_send; int valid; }; @@ -729,7 +729,7 @@ void ipoib_cm_dev_stop(struct net_device *dev) static inline int ipoib_cm_dev_init(struct net_device *dev) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6535d9beb24d..582f199887b0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -275,7 +275,7 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, static int ipoib_cm_modify_rx_qp(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, - unsigned psn) + unsigned int psn) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_attr qp_attr; @@ -363,7 +363,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i if (!rx->rx_ring) return -ENOMEM; - t = kmalloc(sizeof *t, GFP_KERNEL); + t = kmalloc(sizeof(*t), GFP_KERNEL); if (!t) { ret = -ENOMEM; goto err_free_1; @@ -422,7 +422,7 @@ err_free_1: static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, struct ib_cm_req_event_param *req, - unsigned psn) + unsigned int psn) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_data data = {}; @@ -432,7 +432,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); rep.private_data = &data; - rep.private_data_len = sizeof data; + rep.private_data_len = sizeof(data); rep.flow_control = 0; rep.rnr_retry_count = req->rnr_retry_count; rep.srq = ipoib_cm_has_srq(dev); @@ -446,11 +446,11 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even struct net_device *dev = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_rx *p; - unsigned psn; + unsigned int psn; int ret; ipoib_dbg(priv, "REQ arrived\n"); - p = kzalloc(sizeof *p, GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; p->dev = dev; @@ -547,7 +547,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, 0, PAGE_SIZE); --skb_shinfo(skb)->nr_frags; } else { - size = min(length, (unsigned) PAGE_SIZE); + size = min_t(unsigned int, length, PAGE_SIZE); skb_frag_size_set(frag, size); skb->data_len += size; @@ -641,8 +641,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } } - frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, - (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; + frags = PAGE_ALIGN(wc->byte_len - + min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) / + PAGE_SIZE; newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping, GFP_ATOMIC); @@ -657,7 +658,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); - memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); + memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping)); ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); @@ -712,7 +713,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_tx_buf *tx_req; int rc; - unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb); + unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb); if (unlikely(skb->len > tx->mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", @@ -1068,8 +1069,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) - attr.cap.max_send_sge = - min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); + attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, + MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); tx->max_send_sge = attr.cap.max_send_sge; @@ -1094,7 +1095,7 @@ static int ipoib_cm_send_req(struct net_device *dev, req.qp_num = qp->qp_num; req.qp_type = qp->qp_type; req.private_data = &data; - req.private_data_len = sizeof data; + req.private_data_len = sizeof(data); req.flow_control = 0; req.starting_psn = 0; /* FIXME */ @@ -1152,7 +1153,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, ret = -ENOMEM; goto err_tx; } - memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); + memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring)); p->qp = ipoib_cm_create_tx_qp(p->dev, p); memalloc_noio_restore(noio_flag); @@ -1305,7 +1306,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_tx *tx; - tx = kzalloc(sizeof *tx, GFP_ATOMIC); + tx = kzalloc(sizeof(*tx), GFP_ATOMIC); if (!tx) return NULL; @@ -1370,7 +1371,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh; } - memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); + memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec)); spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); @@ -1428,7 +1429,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) struct net_device *dev = priv->dev; struct sk_buff *skb; unsigned long flags; - unsigned mtu = priv->mcast_mtu; + unsigned int mtu = priv->mcast_mtu; netif_tx_lock_bh(dev); spin_lock_irqsave(&priv->lock, flags); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index ea302b054601..178488028734 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -262,15 +262,15 @@ static const struct file_operations ipoib_path_fops = { void ipoib_create_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - char name[IFNAMSIZ + sizeof "_path"]; + char name[IFNAMSIZ + sizeof("_path")]; - snprintf(name, sizeof name, "%s_mcg", dev->name); + snprintf(name, sizeof(name), "%s_mcg", dev->name); priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_mcg_fops); if (!priv->mcg_dentry) ipoib_warn(priv, "failed to create mcg debug file\n"); - snprintf(name, sizeof name, "%s_path", dev->name); + snprintf(name, sizeof(name), "%s_path", dev->name); priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_path_fops); if (!priv->path_dentry) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f47f9ace1f48..5f5d42bad2ea 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -40,6 +40,7 @@ #include <linux/ip.h> #include <linux/tcp.h> +#include <rdma/ib_cache.h> #include "ipoib.h" @@ -57,7 +58,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ipoib_ah *ah; struct ib_ah *vah; - ah = kmalloc(sizeof *ah, GFP_KERNEL); + ah = kmalloc(sizeof(*ah), GFP_KERNEL); if (!ah) return ERR_PTR(-ENOMEM); @@ -202,7 +203,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } memcpy(mapping, priv->rx_ring[wr_id].mapping, - IPOIB_UD_RX_SG * sizeof *mapping); + IPOIB_UD_RX_SG * sizeof(*mapping)); /* * If we can't allocate a new RX buffer, dump @@ -568,7 +569,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; int hlen, rc; void *phead; - unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb); + unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb); if (skb_is_gso(skb)) { hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); @@ -1069,7 +1070,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) bool ret = false; netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); - if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) + if (rdma_query_gid(priv->ca, priv->port, 0, &gid0)) return false; netif_addr_lock_bh(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 26cde95bc0f3..012c9e3970ac 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -634,7 +634,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) { struct ipoib_path_iter *iter; - iter = kmalloc(sizeof *iter, GFP_KERNEL); + iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return NULL; @@ -770,8 +770,10 @@ static void path_rec_completion(int status, struct rdma_ah_attr av; if (!ib_init_ah_attr_from_path(priv->ca, priv->port, - pathrec, &av)) + pathrec, &av, NULL)) { ah = ipoib_create_ah(dev, priv->pd, &av); + rdma_destroy_ah_attr(&av); + } } spin_lock_irqsave(&priv->lock, flags); @@ -883,7 +885,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) if (!priv->broadcast) return NULL; - path = kzalloc(sizeof *path, GFP_ATOMIC); + path = kzalloc(sizeof(*path), GFP_ATOMIC); if (!path) return NULL; @@ -1199,11 +1201,13 @@ static void ipoib_timeout(struct net_device *dev) static int ipoib_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - const void *daddr, const void *saddr, unsigned len) + const void *daddr, + const void *saddr, + unsigned int len) { struct ipoib_header *header; - header = skb_push(skb, sizeof *header); + header = skb_push(skb, sizeof(*header)); header->proto = htons(type); header->reserved = 0; @@ -1371,7 +1375,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, { struct ipoib_neigh *neigh; - neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); + neigh = kzalloc(sizeof(*neigh), GFP_ATOMIC); if (!neigh) return NULL; @@ -1526,7 +1530,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) return -ENOMEM; set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); size = roundup_pow_of_two(arp_tbl.gc_thresh3); - buckets = kcalloc(size, sizeof(*buckets), GFP_KERNEL); + buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL); if (!buckets) { kfree(htbl); return -ENOMEM; @@ -1554,7 +1558,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct ipoib_neigh __rcu **buckets = htbl->buckets; struct ipoib_neigh_table *ntbl = htbl->ntbl; - kfree(buckets); + kvfree(buckets); kfree(htbl); complete(&ntbl->deleted); } @@ -2287,9 +2291,9 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[9] = priv->pkey & 0xff; - result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); + result = rdma_query_gid(hca, port, 0, &priv->local_gid); if (result) { - pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n", + pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } @@ -2362,7 +2366,7 @@ static void ipoib_add_one(struct ib_device *device) int p; int count = 0; - dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); + dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL); if (!dev_list) return; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 6709328d90f8..f696ea49c97a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -140,7 +140,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, { struct ipoib_mcast *mcast; - mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); + mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); if (!mcast) return NULL; @@ -917,7 +917,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast)) continue; - memcpy(mgid.raw, ha->addr + 4, sizeof mgid); + memcpy(mgid.raw, ha->addr + 4, sizeof(mgid)); mcast = __ipoib_mcast_find(dev, &mgid); if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -997,7 +997,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) { struct ipoib_mcast_iter *iter; - iter = kmalloc(sizeof *iter, GFP_KERNEL); + iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 984a88096f39..9f36ca786df8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -52,7 +52,7 @@ int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca, if (set_qkey) { ret = -ENOMEM; - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); if (!qp_attr) goto out; @@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .cap = { .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, - .max_send_sge = min_t(u32, priv->ca->attrs.max_sge, + .max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, MAX_SKB_FRAGS + 1), .max_recv_sge = IPOIB_UD_RX_SG }, @@ -168,8 +168,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) else size += ipoib_recvq_size * ipoib_max_conn_qp; } else - if (ret != -ENOSYS) - return -ENODEV; + if (ret != -EOPNOTSUPP) + return ret; req_vec = (priv->port - 1) * 2; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 55a9b71ed05a..b067ad5e4c7e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -130,7 +130,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) return -EPERM; - snprintf(intf_name, sizeof intf_name, "%s.%04x", + snprintf(intf_name, sizeof(intf_name), "%s.%04x", ppriv->dev->name, pkey); if (!mutex_trylock(&ppriv->sysfs_mutex)) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9a6434c31db2..3fecd87c9f2b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -610,12 +610,10 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, uint32_t initial_cmdsn) { struct iscsi_cls_session *cls_session; - struct iscsi_session *session; struct Scsi_Host *shost; struct iser_conn *iser_conn = NULL; struct ib_conn *ib_conn; u32 max_fr_sectors; - u16 max_cmds; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) @@ -633,8 +631,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, */ if (ep) { iser_conn = ep->dd_data; - max_cmds = iser_conn->max_cmds; shost->sg_tablesize = iser_conn->scsi_sg_tablesize; + shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds); mutex_lock(&iser_conn->state_mutex); if (iser_conn->state != ISER_CONN_UP) { @@ -660,7 +658,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } mutex_unlock(&iser_conn->state_mutex); } else { - max_cmds = ISER_DEF_XMIT_CMDS_MAX; + shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX); if (iscsi_host_add(shost, NULL)) goto free_host; } @@ -676,21 +674,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, iser_warn("max_sectors was reduced from %u to %u\n", iser_max_sectors, shost->max_sectors); - if (cmds_max > max_cmds) { - iser_info("cmds_max changed from %u to %u\n", - cmds_max, max_cmds); - cmds_max = max_cmds; - } - cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, - cmds_max, 0, + shost->can_queue, 0, sizeof(struct iscsi_iser_task), initial_cmdsn, 0); if (!cls_session) goto remove_host; - session = cls_session->dd_data; - shost->can_queue = session->scsi_cmds_max; return cls_session; remove_host: diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index cccbcf0eb035..7e056f3c82a0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -136,7 +136,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; - attr.cap.max_send_sge = device->ib_device->attrs.max_sge; + attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; @@ -299,7 +299,8 @@ isert_create_device_ib_res(struct isert_device *device) struct ib_device *ib_dev = device->ib_device; int ret; - isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge); + isert_dbg("devattr->max_send_sge: %d devattr->max_recv_sge %d\n", + ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge); isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); ret = isert_alloc_comps(device); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9786b24b956f..117dc1082e58 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -57,13 +57,10 @@ #define DRV_NAME "ib_srp" #define PFX DRV_NAME ": " -#define DRV_VERSION "2.0" -#define DRV_RELDATE "July 26, 2015" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_INFO(release_date, DRV_RELDATE); #if !defined(CONFIG_DYNAMIC_DEBUG) #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) @@ -3843,7 +3840,7 @@ static ssize_t srp_create_target(struct device *dev, INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); spin_lock_init(&target->lock); - ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); + ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid); if (ret) goto out; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 3081c629a7f7..8bd7373cb828 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -575,8 +575,7 @@ static int srpt_refresh_port(struct srpt_port *sport) sport->sm_lid = port_attr.sm_lid; sport->lid = port_attr.lid; - ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid, - NULL); + ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); if (ret) goto err_query_port; @@ -720,7 +719,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) && ioctx_size != sizeof(struct srpt_send_ioctx)); - ring = kmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); + ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); if (!ring) goto out; for (i = 0; i < ring_size; ++i) { @@ -734,7 +733,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, err: while (--i >= 0) srpt_free_ioctx(sdev, ring[i], dma_size, dir); - kfree(ring); + kvfree(ring); ring = NULL; out: return ring; @@ -759,7 +758,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, for (i = 0; i < ring_size; ++i) srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); - kfree(ioctx_ring); + kvfree(ioctx_ring); } /** @@ -1754,13 +1753,15 @@ retry: */ qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr); qp_init->cap.max_rdma_ctxs = sq_size / 2; - qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); + qp_init->cap.max_send_sge = min(attrs->max_send_sge, + SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; if (sdev->use_srq) { qp_init->srq = sdev->srq; } else { qp_init->cap.max_recv_wr = ch->rq_size; - qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge; + qp_init->cap.max_recv_sge = min(attrs->max_recv_sge, + SRPT_MAX_SG_PER_WQE); } if (ch->using_rdma_cm) { @@ -1833,8 +1834,7 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch) int ret; if (!srpt_set_ch_state(ch, CH_DRAINING)) { - pr_debug("%s-%d: already closed\n", ch->sess_name, - ch->qp->qp_num); + pr_debug("%s: already closed\n", ch->sess_name); return false; } @@ -1940,8 +1940,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport) list_for_each_entry(nexus, &sport->nexus_list, entry) { list_for_each_entry(ch, &nexus->ch_list, list) { if (srpt_disconnect_ch(ch) >= 0) - pr_info("Closing channel %s-%d because target %s_%d has been disabled\n", - ch->sess_name, ch->qp->qp_num, + pr_info("Closing channel %s because target %s_%d has been disabled\n", + ch->sess_name, sport->sdev->device->name, sport->port); srpt_close_ch(ch); } @@ -2087,7 +2087,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, struct rdma_conn_param rdma_cm; struct ib_cm_rep_param ib_cm; } *rep_param = NULL; - struct srpt_rdma_ch *ch; + struct srpt_rdma_ch *ch = NULL; char i_port_id[36]; u32 it_iu_len; int i, ret; @@ -2234,13 +2234,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); if (IS_ERR_OR_NULL(ch->sess)) { + WARN_ON_ONCE(ch->sess == NULL); ret = PTR_ERR(ch->sess); + ch->sess = NULL; pr_info("Rejected login for initiator %s: ret = %d.\n", ch->sess_name, ret); rej->reason = cpu_to_be32(ret == -ENOMEM ? SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES : SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); - goto reject; + goto destroy_ib; } mutex_lock(&sport->mutex); @@ -2279,7 +2281,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n", ret); - goto destroy_ib; + goto reject; } pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess, @@ -2358,8 +2360,11 @@ free_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, ch->max_rsp_size, DMA_TO_DEVICE); + free_ch: - if (ib_cm_id) + if (rdma_cm_id) + rdma_cm_id->context = NULL; + else ib_cm_id->context = NULL; kfree(ch); ch = NULL; @@ -2379,6 +2384,15 @@ reject: ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, rej, sizeof(*rej)); + if (ch && ch->sess) { + srpt_close_ch(ch); + /* + * Tell the caller not to free cm_id since + * srpt_release_channel_work() will do that. + */ + ret = 0; + } + out: kfree(rep_param); kfree(rsp); @@ -2969,7 +2983,8 @@ static void srpt_add_one(struct ib_device *device) pr_debug("device = %p\n", device); - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt), + GFP_KERNEL); if (!sdev) goto err; @@ -3023,8 +3038,6 @@ static void srpt_add_one(struct ib_device *device) srpt_event_handler); ib_register_event_handler(&sdev->event_handler); - WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); - for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; INIT_LIST_HEAD(&sport->nexus_list); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 2361483476a0..444dfd7281b5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -396,9 +396,9 @@ struct srpt_port { * @sdev_mutex: Serializes use_srq changes. * @use_srq: Whether or not to use SRQ. * @ioctx_ring: Per-HCA SRQ. - * @port: Information about the ports owned by this HCA. * @event_handler: Per-HCA asynchronous IB event handler. * @list: Node in srpt_dev_list. + * @port: Information about the ports owned by this HCA. */ struct srpt_device { struct ib_device *device; @@ -410,9 +410,9 @@ struct srpt_device { struct mutex sdev_mutex; bool use_srq; struct srpt_recv_ioctx **ioctx_ring; - struct srpt_port port[2]; struct ib_event_handler event_handler; struct list_head list; + struct srpt_port port[]; }; #endif /* IB_SRPT_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 041c18faea46..381dbfa6a68e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -433,6 +433,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_FPGA_QUERY_QP: case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -612,6 +614,9 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ARM_XRQ); MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT); MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); default: return "unknown command opcode"; } } diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 52e0c5d579a7..0d7f3d603f1d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -874,7 +874,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.cap.max_send_wr = queue->send_queue_size + 1; qp_attr.cap.max_rdma_ctxs = queue->send_queue_size; qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, - ndev->device->attrs.max_sge); + ndev->device->attrs.max_send_sge); if (ndev->srq) { qp_attr.srq = ndev->srq; diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index e459c97151b3..c5a1cddd8856 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1661,9 +1661,16 @@ static struct smbd_connection *_smbd_get_connection( info->max_receive_size = smbd_max_receive_size; info->keep_alive_interval = smbd_keep_alive_interval; - if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) { - log_rdma_event(ERR, "warning: device max_sge = %d too small\n", - info->id->device->attrs.max_sge); + if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) { + log_rdma_event(ERR, + "warning: device max_send_sge = %d too small\n", + info->id->device->attrs.max_send_sge); + log_rdma_event(ERR, "Queue Pair creation may fail\n"); + } + if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) { + log_rdma_event(ERR, + "warning: device max_recv_sge = %d too small\n", + info->id->device->attrs.max_recv_sge); log_rdma_event(ERR, "Queue Pair creation may fail\n"); } diff --git a/include/linux/idr.h b/include/linux/idr.h index e856f4e0ab35..3e8215b2c371 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -98,6 +98,17 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val) * period). */ +#define idr_lock(idr) xa_lock(&(idr)->idr_rt) +#define idr_unlock(idr) xa_unlock(&(idr)->idr_rt) +#define idr_lock_bh(idr) xa_lock_bh(&(idr)->idr_rt) +#define idr_unlock_bh(idr) xa_unlock_bh(&(idr)->idr_rt) +#define idr_lock_irq(idr) xa_lock_irq(&(idr)->idr_rt) +#define idr_unlock_irq(idr) xa_unlock_irq(&(idr)->idr_rt) +#define idr_lock_irqsave(idr, flags) \ + xa_lock_irqsave(&(idr)->idr_rt, flags) +#define idr_unlock_irqrestore(idr, flags) \ + xa_unlock_irqrestore(&(idr)->idr_rt, flags) + void idr_preload(gfp_t gfp_mask); int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c14b81559505..5e04e2053fd7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -82,6 +82,7 @@ enum { enum { MLX5_OBJ_TYPE_UCTX = 0x0004, + MLX5_OBJ_TYPE_UMEM = 0x0005, }; enum { @@ -246,12 +247,15 @@ enum { MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, + MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942, MLX5_CMD_OP_FPGA_CREATE_QP = 0x960, MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961, MLX5_CMD_OP_FPGA_QUERY_QP = 0x962, MLX5_CMD_OP_FPGA_DESTROY_QP = 0x963, MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS = 0x964, MLX5_CMD_OP_CREATE_GENERAL_OBJECT = 0xa00, + MLX5_CMD_OP_MODIFY_GENERAL_OBJECT = 0xa01, + MLX5_CMD_OP_QUERY_GENERAL_OBJECT = 0xa02, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT = 0xa03, MLX5_CMD_OP_MAX }; diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index c2c8b1fdeead..715394f6d18a 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -58,6 +58,7 @@ * @bound_dev_if: An optional device interface index. * @transport: The transport type used. * @net: Network namespace containing the bound_dev_if net_dev. + * @sgid_attr: GID attribute to use for identified SGID */ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; @@ -67,6 +68,7 @@ struct rdma_dev_addr { int bound_dev_if; enum rdma_transport_type transport; struct net *net; + const struct ib_gid_attr *sgid_attr; enum rdma_network_type network; int hoplimit; }; diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index a5f249828115..1108d4220276 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -37,45 +37,23 @@ #include <rdma/ib_verbs.h> -/** - * ib_get_cached_gid - Returns a cached GID table entry - * @device: The device to query. - * @port_num: The port number of the device to query. - * @index: The index into the cached GID table to query. - * @gid: The GID value found at the specified index. - * @attr: The GID attribute found at the specified index (only in RoCE). - * NULL means ignore (output parameter). - * - * ib_get_cached_gid() fetches the specified GID table entry stored in - * the local software cache. - */ -int ib_get_cached_gid(struct ib_device *device, - u8 port_num, - int index, - union ib_gid *gid, - struct ib_gid_attr *attr); - -int ib_find_cached_gid(struct ib_device *device, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, - u8 *port_num, - u16 *index); - -int ib_find_cached_gid_by_port(struct ib_device *device, - const union ib_gid *gid, - enum ib_gid_type gid_type, - u8 port_num, - struct net_device *ndev, - u16 *index); +int rdma_query_gid(struct ib_device *device, u8 port_num, int index, + union ib_gid *gid); +const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, + const union ib_gid *gid, + enum ib_gid_type gid_type, + struct net_device *ndev); +const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + enum ib_gid_type gid_type, + u8 port, + struct net_device *ndev); +const struct ib_gid_attr *rdma_find_gid_by_filter( + struct ib_device *device, const union ib_gid *gid, u8 port_num, + bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, + void *), + void *context); -int ib_find_gid_by_filter(struct ib_device *device, - const union ib_gid *gid, - u8 port_num, - bool (*filter)(const union ib_gid *gid, - const struct ib_gid_attr *, - void *), - void *context, u16 *index); /** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. @@ -150,4 +128,8 @@ int ib_get_cached_port_state(struct ib_device *device, enum ib_port_state *port_active); bool rdma_is_zero_gid(const union ib_gid *gid); +const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device, + u8 port_num, int index); +void rdma_put_gid_attr(const struct ib_gid_attr *attr); +void rdma_hold_gid_attr(const struct ib_gid_attr *attr); #endif /* _IB_CACHE_H */ diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 7979cb04f529..c98d603c0b63 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -246,6 +246,7 @@ struct ib_cm_sidr_rep_event_param { u32 qkey; u32 qpn; void *info; + const struct ib_gid_attr *sgid_attr; u8 info_len; }; @@ -365,6 +366,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, struct ib_cm_req_param { struct sa_path_rec *primary_path; struct sa_path_rec *alternate_path; + const struct ib_gid_attr *ppath_sgid_attr; __be64 service_id; u32 qp_num; enum ib_qp_type qp_type; @@ -566,6 +568,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, struct ib_cm_sidr_req_param { struct sa_path_rec *path; + const struct ib_gid_attr *sgid_attr; __be64 service_id; int timeout_ms; const void *private_data; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 2f4f1768ded4..f6ba366051c7 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -262,6 +262,39 @@ struct ib_class_port_info { __be32 trap_qkey; }; +/* PortInfo CapabilityMask */ +enum ib_port_capability_mask_bits { + IB_PORT_SM = 1 << 1, + IB_PORT_NOTICE_SUP = 1 << 2, + IB_PORT_TRAP_SUP = 1 << 3, + IB_PORT_OPT_IPD_SUP = 1 << 4, + IB_PORT_AUTO_MIGR_SUP = 1 << 5, + IB_PORT_SL_MAP_SUP = 1 << 6, + IB_PORT_MKEY_NVRAM = 1 << 7, + IB_PORT_PKEY_NVRAM = 1 << 8, + IB_PORT_LED_INFO_SUP = 1 << 9, + IB_PORT_SM_DISABLED = 1 << 10, + IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, + IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, + IB_PORT_CM_SUP = 1 << 16, + IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, + IB_PORT_REINIT_SUP = 1 << 18, + IB_PORT_DEVICE_MGMT_SUP = 1 << 19, + IB_PORT_VENDOR_CLASS_SUP = 1 << 20, + IB_PORT_DR_NOTICE_SUP = 1 << 21, + IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, + IB_PORT_BOOT_MGMT_SUP = 1 << 23, + IB_PORT_LINK_LATENCY_SUP = 1 << 24, + IB_PORT_CLIENT_REG_SUP = 1 << 25, + IB_PORT_OTHER_LOCAL_CHANGES_SUP = 1 << 26, + IB_PORT_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27, + IB_PORT_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28, + IB_PORT_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29, + IB_PORT_MCAST_FDB_TOP_SUP = 1 << 30, + IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31, +}; + #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26) struct opa_class_port_info { diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index bacb144f7780..b6ddf2a1b9d8 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -172,12 +172,7 @@ struct sa_path_rec_ib { */ struct sa_path_rec_roce { bool route_resolved; - u8 dmac[ETH_ALEN]; - /* ignored in IB */ - int ifindex; - /* ignored in IB */ - struct net *net; - + u8 dmac[ETH_ALEN]; }; struct sa_path_rec_opa { @@ -556,13 +551,10 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr); -/** - * ib_init_ah_attr_from_path - Initialize address handle attributes based on - * an SA path record. - */ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr); + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *sgid_attr); /** * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec @@ -667,45 +659,10 @@ static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec) eth_zero_addr(rec->roce.dmac); } -static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex) -{ - if (sa_path_is_roce(rec)) - rec->roce.ifindex = ifindex; -} - -static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net) -{ - if (sa_path_is_roce(rec)) - rec->roce.net = net; -} - static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec) { if (sa_path_is_roce(rec)) return rec->roce.dmac; return NULL; } - -static inline int sa_path_get_ifindex(struct sa_path_rec *rec) -{ - if (sa_path_is_roce(rec)) - return rec->roce.ifindex; - return 0; -} - -static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec) -{ - if (sa_path_is_roce(rec)) - return rec->roce.net; - return NULL; -} - -static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec) -{ - return sa_path_get_ndev(rec) ? - dev_get_by_index(sa_path_get_ndev(rec), - sa_path_get_ifindex(rec)) - : NULL; -} - #endif /* IB_SA_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4c6241bc2039..08348e53082c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -94,6 +94,7 @@ enum ib_gid_type { struct ib_gid_attr { struct net_device *ndev; struct ib_device *device; + union ib_gid gid; enum ib_gid_type gid_type; u16 index; u8 port_num; @@ -148,13 +149,13 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net return IB_GID_TYPE_IB; } -static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type, - union ib_gid *gid) +static inline enum rdma_network_type +rdma_gid_attr_network_type(const struct ib_gid_attr *attr) { - if (gid_type == IB_GID_TYPE_IB) + if (attr->gid_type == IB_GID_TYPE_IB) return RDMA_NETWORK_IB; - if (ipv6_addr_v4mapped((struct in6_addr *)gid)) + if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid)) return RDMA_NETWORK_IPV4; else return RDMA_NETWORK_IPV6; @@ -344,7 +345,8 @@ struct ib_device_attr { int max_qp; int max_qp_wr; u64 device_cap_flags; - int max_sge; + int max_send_sge; + int max_recv_sge; int max_sge_rd; int max_cq; int max_cqe; @@ -430,33 +432,6 @@ enum ib_port_state { IB_PORT_ACTIVE_DEFER = 5 }; -enum ib_port_cap_flags { - IB_PORT_SM = 1 << 1, - IB_PORT_NOTICE_SUP = 1 << 2, - IB_PORT_TRAP_SUP = 1 << 3, - IB_PORT_OPT_IPD_SUP = 1 << 4, - IB_PORT_AUTO_MIGR_SUP = 1 << 5, - IB_PORT_SL_MAP_SUP = 1 << 6, - IB_PORT_MKEY_NVRAM = 1 << 7, - IB_PORT_PKEY_NVRAM = 1 << 8, - IB_PORT_LED_INFO_SUP = 1 << 9, - IB_PORT_SM_DISABLED = 1 << 10, - IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, - IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, - IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, - IB_PORT_CM_SUP = 1 << 16, - IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, - IB_PORT_REINIT_SUP = 1 << 18, - IB_PORT_DEVICE_MGMT_SUP = 1 << 19, - IB_PORT_VENDOR_CLASS_SUP = 1 << 20, - IB_PORT_DR_NOTICE_SUP = 1 << 21, - IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, - IB_PORT_BOOT_MGMT_SUP = 1 << 23, - IB_PORT_LINK_LATENCY_SUP = 1 << 24, - IB_PORT_CLIENT_REG_SUP = 1 << 25, - IB_PORT_IP_BASED_GIDS = 1 << 26, -}; - enum ib_port_width { IB_WIDTH_1X = 1, IB_WIDTH_4X = 2, @@ -554,6 +529,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( #define RDMA_CORE_CAP_AF_IB 0x00001000 #define RDMA_CORE_CAP_ETH_AH 0x00002000 #define RDMA_CORE_CAP_OPA_AH 0x00004000 +#define RDMA_CORE_CAP_IB_GRH_REQUIRED 0x00008000 /* Protocol 0xFFF00000 */ #define RDMA_CORE_CAP_PROT_IB 0x00100000 @@ -563,6 +539,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( #define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000 #define RDMA_CORE_CAP_PROT_USNIC 0x02000000 +#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \ + | RDMA_CORE_CAP_PROT_ROCE \ + | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP) + #define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ | RDMA_CORE_CAP_IB_MAD \ | RDMA_CORE_CAP_IB_SMI \ @@ -595,6 +575,8 @@ struct ib_port_attr { enum ib_mtu max_mtu; enum ib_mtu active_mtu; int gid_tbl_len; + unsigned int ip_gids:1; + /* This is the value from PortInfo CapabilityMask, defined by IBA */ u32 port_cap_flags; u32 max_msg_sz; u32 bad_pkey_cntr; @@ -610,7 +592,6 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; - bool grh_required; }; enum ib_device_modify_flags { @@ -689,6 +670,7 @@ struct ib_event_handler { } while (0) struct ib_global_route { + const struct ib_gid_attr *sgid_attr; union ib_gid dgid; u32 flow_label; u8 sgid_index; @@ -1443,14 +1425,16 @@ struct ib_recv_wr { }; enum ib_access_flags { - IB_ACCESS_LOCAL_WRITE = 1, - IB_ACCESS_REMOTE_WRITE = (1<<1), - IB_ACCESS_REMOTE_READ = (1<<2), - IB_ACCESS_REMOTE_ATOMIC = (1<<3), - IB_ACCESS_MW_BIND = (1<<4), - IB_ZERO_BASED = (1<<5), - IB_ACCESS_ON_DEMAND = (1<<6), - IB_ACCESS_HUGETLB = (1<<7), + IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE, + IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE, + IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ, + IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC, + IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND, + IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED, + IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, + IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, + + IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1) }; /* @@ -1473,7 +1457,10 @@ struct ib_fmr_attr { struct ib_umem; enum rdma_remove_reason { - /* Userspace requested uobject deletion. Call could fail */ + /* + * Userspace requested uobject deletion or initial try + * to remove uobject via cleanup. Call could fail + */ RDMA_REMOVE_DESTROY, /* Context deletion. This call should delete the actual object itself */ RDMA_REMOVE_CLOSE, @@ -1494,12 +1481,7 @@ struct ib_ucontext { struct ib_uverbs_file *ufile; int closing; - /* locking the uobjects_list */ - struct mutex uobjects_lock; - struct list_head uobjects; - /* protects cleanup process from other actions */ - struct rw_semaphore cleanup_rwsem; - enum rdma_remove_reason cleanup_reason; + bool cleanup_retryable; struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING @@ -1524,6 +1506,9 @@ struct ib_ucontext { struct ib_uobject { u64 user_handle; /* handle given to us by userspace */ + /* ufile & ucontext owning this object */ + struct ib_uverbs_file *ufile; + /* FIXME, save memory: ufile->context == context */ struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ @@ -1536,12 +1521,6 @@ struct ib_uobject { const struct uverbs_obj_type *type; }; -struct ib_uobject_file { - struct ib_uobject uobj; - /* ufile contains the lock between context release and file close */ - struct ib_uverbs_file *ufile; -}; - struct ib_udata { const void __user *inbuf; void __user *outbuf; @@ -1578,6 +1557,7 @@ struct ib_ah { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; + const struct ib_gid_attr *sgid_attr; enum rdma_ah_attr_type type; }; @@ -1776,6 +1756,9 @@ struct ib_qp { struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; + /* sgid_attrs associated with the AV's */ + const struct ib_gid_attr *av_sgid_attr; + const struct ib_gid_attr *alt_path_sgid_attr; u32 qp_num; u32 max_write_sge; u32 max_read_sge; @@ -2242,11 +2225,6 @@ struct ib_counters { atomic_t usecnt; }; -enum ib_read_counters_flags { - /* prefer read values from driver cache */ - IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0, -}; - struct ib_counters_read_attr { u64 *counters_buff; u32 ncounters; @@ -2341,8 +2319,7 @@ struct ib_device { * concurrently for different ports. This function is only called when * roce_gid_table is used. */ - int (*add_gid)(const union ib_gid *gid, - const struct ib_gid_attr *attr, + int (*add_gid)(const struct ib_gid_attr *attr, void **context); /* When calling del_gid, the HW vendor's driver should delete the * gid of device @device at gid index gid_index of port port_num @@ -2592,7 +2569,7 @@ struct ib_device { const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); - struct uverbs_root_spec *specs_root; + struct uverbs_root_spec *driver_specs_root; enum rdma_driver_id driver_id; }; @@ -2679,6 +2656,46 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, } /** + * ib_is_destroy_retryable - Check whether the uobject destruction + * is retryable. + * @ret: The initial destruction return code + * @why: remove reason + * @uobj: The uobject that is destroyed + * + * This function is a helper function that IB layer and low-level drivers + * can use to consider whether the destruction of the given uobject is + * retry-able. + * It checks the original return code, if it wasn't success the destruction + * is retryable according to the ucontext state (i.e. cleanup_retryable) and + * the remove reason. (i.e. why). + * Must be called with the object locked for destroy. + */ +static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, + struct ib_uobject *uobj) +{ + return ret && (why == RDMA_REMOVE_DESTROY || + uobj->context->cleanup_retryable); +} + +/** + * ib_destroy_usecnt - Called during destruction to check the usecnt + * @usecnt: The usecnt atomic + * @why: remove reason + * @uobj: The uobject that is destroyed + * + * Non-zero usecnts will block destruction unless destruction was triggered by + * a ucontext cleanup. + */ +static inline int ib_destroy_usecnt(atomic_t *usecnt, + enum rdma_remove_reason why, + struct ib_uobject *uobj) +{ + if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) + return -EBUSY; + return 0; +} + +/** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for * the given QP state transition. @@ -2755,6 +2772,13 @@ static inline int rdma_is_port_valid(const struct ib_device *device, port <= rdma_end_port(device)); } +static inline bool rdma_is_grh_required(const struct ib_device *device, + u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & + RDMA_CORE_PORT_IB_GRH_REQUIRED; +} + static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB; @@ -3046,10 +3070,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) return rdma_protocol_iwarp(dev, port_num); } -int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid, - struct ib_gid_attr *attr); - int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, @@ -3148,6 +3168,13 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); * ignored unless the work completion indicates that the GRH is valid. * @ah_attr: Returned attributes that can be used when creating an address * handle for replying to the message. + * When ib_init_ah_attr_from_wc() returns success, + * (a) for IB link layer it optionally contains a reference to SGID attribute + * when GRH is present for IB link layer. + * (b) for RoCE link layer it contains a reference to SGID attribute. + * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID + * attributes which are initialized using ib_init_ah_attr_from_wc(). + * */ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, @@ -3798,10 +3825,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); */ int ib_dealloc_xrcd(struct ib_xrcd *xrcd); -struct ib_flow *ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, int domain); -int ib_destroy_flow(struct ib_flow *flow_id); - static inline int ib_check_mr_access(int flags) { /* @@ -4030,8 +4053,19 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr, grh->sgid_index = sgid_index; grh->hop_limit = hop_limit; grh->traffic_class = traffic_class; + grh->sgid_attr = NULL; } +void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr); +void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, + u32 flow_label, u8 hop_limit, u8 traffic_class, + const struct ib_gid_attr *sgid_attr); +void rdma_copy_ah_attr(struct rdma_ah_attr *dest, + const struct rdma_ah_attr *src); +void rdma_replace_ah_attr(struct rdma_ah_attr *old, + const struct rdma_ah_attr *new); +void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src); + /** * rdma_ah_find_type - Return address handle type. * @@ -4107,4 +4141,9 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) */ void rdma_roce_rescan_device(struct ib_device *ibdev); +struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile); + +int uverbs_destroy_def_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs); #endif /* IB_VERBS_H */ diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 2bbb7a67e643..66d4393d339c 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -120,7 +120,7 @@ static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr) if (attr->type == RDMA_AH_ATTR_TYPE_IB) { if (!rdma_ah_get_dlid(attr) || rdma_ah_get_dlid(attr) >= - be32_to_cpu(IB_MULTICAST_LID_BASE)) + be16_to_cpu(IB_MULTICAST_LID_BASE)) return false; } else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) { if (!rdma_ah_get_dlid(attr) || diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1145a4c154b2..927f6d5b6d0f 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2016, 2017 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -91,6 +91,7 @@ * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK * RVT_S_ECN - a BECN was queued to the send engine + * RVT_S_MAX_BIT_MASK - The max bit that can be used by rdmavt */ #define RVT_S_SIGNAL_REQ_WR 0x0001 #define RVT_S_BUSY 0x0002 @@ -103,23 +104,26 @@ #define RVT_S_WAIT_SSN_CREDIT 0x0100 #define RVT_S_WAIT_DMA 0x0200 #define RVT_S_WAIT_PIO 0x0400 -#define RVT_S_WAIT_PIO_DRAIN 0x0800 -#define RVT_S_WAIT_TX 0x1000 -#define RVT_S_WAIT_DMA_DESC 0x2000 -#define RVT_S_WAIT_KMEM 0x4000 -#define RVT_S_WAIT_PSN 0x8000 -#define RVT_S_WAIT_ACK 0x10000 -#define RVT_S_SEND_ONE 0x20000 -#define RVT_S_UNLIMITED_CREDIT 0x40000 -#define RVT_S_AHG_VALID 0x80000 -#define RVT_S_AHG_CLEAR 0x100000 -#define RVT_S_ECN 0x200000 +#define RVT_S_WAIT_TX 0x0800 +#define RVT_S_WAIT_DMA_DESC 0x1000 +#define RVT_S_WAIT_KMEM 0x2000 +#define RVT_S_WAIT_PSN 0x4000 +#define RVT_S_WAIT_ACK 0x8000 +#define RVT_S_SEND_ONE 0x10000 +#define RVT_S_UNLIMITED_CREDIT 0x20000 +#define RVT_S_ECN 0x40000 +#define RVT_S_MAX_BIT_MASK 0x800000 + +/* + * Drivers should use s_flags starting with bit 31 down to the bit next to + * RVT_S_MAX_BIT_MASK + */ /* * Wait flags that would prevent any packet type from being sent. */ #define RVT_S_ANY_WAIT_IO \ - (RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \ + (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) /* diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index bd6bba3a6e04..017ccf75890c 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -61,54 +61,57 @@ enum uverbs_obj_access { UVERBS_ACCESS_DESTROY }; -enum { - UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0, - /* Support extending attributes by length, validate all unknown size == zero */ - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1, -}; - /* Specification of a single attribute inside the ioctl message */ +/* good size 16 */ struct uverbs_attr_spec { + u8 type; + + /* + * Support extending attributes by length. Allow the user to provide + * more bytes than ptr.len, but check that everything after is zero'd + * by the user. + */ + u8 zero_trailing:1; + /* + * Valid only for PTR_IN. Allocate and copy the data inside + * the parser + */ + u8 alloc_and_copy:1; + u8 mandatory:1; + union { - /* Header shared by all following union members - to reduce space. */ - struct { - enum uverbs_attr_type type; - /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ - u8 flags; - }; struct { - enum uverbs_attr_type type; - /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ - u8 flags; /* Current known size to kernel */ - u16 len; + u16 len; /* User isn't allowed to provide something < min_len */ - u16 min_len; + u16 min_len; } ptr; + struct { - enum uverbs_attr_type type; - /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ - u8 flags; /* * higher bits mean the namespace and lower bits mean * the type id within the namespace. */ - u16 obj_type; - u8 access; + u16 obj_type; + u8 access; } obj; + + struct { + u8 num_elems; + } enum_def; + } u; + + /* This weird split of the enum lets us remove some padding */ + union { struct { - enum uverbs_attr_type type; - /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ - u8 flags; - u8 num_elems; /* * The enum attribute can select one of the attributes * contained in the ids array. Currently only PTR_IN * attributes are supported in the ids array. */ - const struct uverbs_attr_spec *ids; + const struct uverbs_attr_spec *ids; } enum_def; - }; + } u2; }; struct uverbs_attr_spec_hash { @@ -192,130 +195,112 @@ struct uverbs_object_tree_def { const struct uverbs_object_def * const (*objects)[]; }; -#define UA_FLAGS(_flags) .flags = _flags -#define __UVERBS_ATTR0(_id, _type, _fld, _attr, ...) \ - ((const struct uverbs_attr_def) \ - {.id = _id, .attr = {{._fld = {.type = _type, _attr, .flags = 0, } }, } }) -#define __UVERBS_ATTR1(_id, _type, _fld, _attr, _extra1, ...) \ - ((const struct uverbs_attr_def) \ - {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1 } },} }) -#define __UVERBS_ATTR2(_id, _type, _fld, _attr, _extra1, _extra2) \ - ((const struct uverbs_attr_def) \ - {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1, _extra2 } },} }) -#define __UVERBS_ATTR(_id, _type, _fld, _attr, _extra1, _extra2, _n, ...) \ - __UVERBS_ATTR##_n(_id, _type, _fld, _attr, _extra1, _extra2) +/* + * ======================================= + * Attribute Specifications + * ======================================= + */ -#define UVERBS_ATTR_TYPE(_type) \ - .min_len = sizeof(_type), .len = sizeof(_type) -#define UVERBS_ATTR_STRUCT(_type, _last) \ - .min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type) #define UVERBS_ATTR_SIZE(_min_len, _len) \ - .min_len = _min_len, .len = _len + .u.ptr.min_len = _min_len, .u.ptr.len = _len + +/* + * Specifies a uapi structure that cannot be extended. The user must always + * supply the whole structure and nothing more. The structure must be declared + * in a header under include/uapi/rdma. + */ +#define UVERBS_ATTR_TYPE(_type) \ + .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type) +/* + * Specifies a uapi structure where the user must provide at least up to + * member 'last'. Anything after last and up until the end of the structure + * can be non-zero, anything longer than the end of the structure must be + * zero. The structure must be declared in a header under include/uapi/rdma. + */ +#define UVERBS_ATTR_STRUCT(_type, _last) \ + .zero_trailing = 1, \ + UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)), \ + sizeof(_type)) +/* + * Specifies at least min_len bytes must be passed in, but the amount can be + * larger, up to the protocol maximum size. No check for zeroing is done. + */ +#define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX) + +/* Must be used in the '...' of any UVERBS_ATTR */ +#define UA_ALLOC_AND_COPY .alloc_and_copy = 1 +#define UA_MANDATORY .mandatory = 1 +#define UA_OPTIONAL .mandatory = 0 + +#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_IDR, \ + .u.obj.obj_type = _idr_type, \ + .u.obj.access = _access, \ + __VA_ARGS__ } }) + +#define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = (_attr_id) + \ + BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW && \ + (_access) != UVERBS_ACCESS_READ), \ + .attr = { .type = UVERBS_ATTR_TYPE_FD, \ + .u.obj.obj_type = _fd_type, \ + .u.obj.access = _access, \ + __VA_ARGS__ } }) + +#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_PTR_IN, \ + _type, \ + __VA_ARGS__ } }) + +#define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT, \ + _type, \ + __VA_ARGS__ } }) + +/* _enum_arry should be a 'static const union uverbs_attr_spec[]' */ +#define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN, \ + .u2.enum_def.ids = _enum_arr, \ + .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr), \ + __VA_ARGS__ }, \ + }) /* - * In new compiler, UVERBS_ATTR could be simplified by declaring it as - * [_id] = {.type = _type, .len = _len, ##__VA_ARGS__} - * But since we support older compilers too, we need the more complex code. + * This spec is used in order to pass information to the hardware driver in a + * legacy way. Every verb that could get driver specific data should get this + * spec. */ -#define UVERBS_ATTR(_id, _type, _fld, _attr, ...) \ - __UVERBS_ATTR(_id, _type, _fld, _attr, ##__VA_ARGS__, 2, 1, 0) -#define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...) \ - UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_IN, ptr, _len, ##__VA_ARGS__) -/* If sizeof(_type) <= sizeof(u64), this will be inlined rather than a pointer */ -#define UVERBS_ATTR_PTR_IN(_id, _type, ...) \ - UVERBS_ATTR_PTR_IN_SZ(_id, _type, ##__VA_ARGS__) -#define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...) \ - UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_OUT, ptr, _len, ##__VA_ARGS__) -#define UVERBS_ATTR_PTR_OUT(_id, _type, ...) \ - UVERBS_ATTR_PTR_OUT_SZ(_id, _type, ##__VA_ARGS__) -#define UVERBS_ATTR_ENUM_IN(_id, _enum_arr, ...) \ - UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_ENUM_IN, enum_def, \ - .ids = (_enum_arr), \ - .num_elems = ARRAY_SIZE(_enum_arr), ##__VA_ARGS__) +#define UVERBS_ATTR_UHW() \ + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ + UVERBS_ATTR_MIN_SIZE(0), \ + UA_OPTIONAL), \ + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ + UVERBS_ATTR_MIN_SIZE(0), \ + UA_OPTIONAL) /* - * In new compiler, UVERBS_ATTR_IDR (and FD) could be simplified by declaring - * it as - * {.id = _id, \ - * .attr {.type = __obj_class, \ - * .obj = {.obj_type = _idr_type, \ - * .access = _access \ - * }, ##__VA_ARGS__ } } - * But since we support older compilers too, we need the more complex code. + * ======================================= + * Declaration helpers + * ======================================= */ -#define ___UVERBS_ATTR_OBJ0(_id, _obj_class, _obj_type, _access, ...)\ - ((const struct uverbs_attr_def) \ - {.id = _id, \ - .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \ - .access = _access, .flags = 0 } }, } }) -#define ___UVERBS_ATTR_OBJ1(_id, _obj_class, _obj_type, _access, _flags)\ - ((const struct uverbs_attr_def) \ - {.id = _id, \ - .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \ - .access = _access, _flags} }, } }) -#define ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, _flags, \ - _n, ...) \ - ___UVERBS_ATTR_OBJ##_n(_id, _obj_class, _obj_type, _access, _flags) -#define __UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, ...) \ - ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, \ - ##__VA_ARGS__, 1, 0) -#define UVERBS_ATTR_IDR(_id, _idr_type, _access, ...) \ - __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_IDR, _idr_type, _access,\ - ##__VA_ARGS__) -#define UVERBS_ATTR_FD(_id, _fd_type, _access, ...) \ - __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_FD, _fd_type, \ - (_access) + BUILD_BUG_ON_ZERO( \ - (_access) != UVERBS_ACCESS_NEW && \ - (_access) != UVERBS_ACCESS_READ), \ - ##__VA_ARGS__) -#define DECLARE_UVERBS_ATTR_SPEC(_name, ...) \ - const struct uverbs_attr_def _name = __VA_ARGS__ - -#define DECLARE_UVERBS_ENUM(_name, ...) \ - const struct uverbs_enum_spec _name = { \ - .len = ARRAY_SIZE(((struct uverbs_attr_spec[]){__VA_ARGS__})),\ - .ids = {__VA_ARGS__}, \ + +#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \ + static const struct uverbs_object_def *const _name##_ptr[] = { \ + __VA_ARGS__, \ + }; \ + static const struct uverbs_object_tree_def _name = { \ + .num_objects = ARRAY_SIZE(_name##_ptr), \ + .objects = &_name##_ptr, \ } -#define _UVERBS_METHOD_ATTRS_SZ(...) \ - (sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\ - sizeof(const struct uverbs_attr_def *)) -#define _UVERBS_METHOD(_id, _handler, _flags, ...) \ - ((const struct uverbs_method_def) { \ - .id = _id, \ - .flags = _flags, \ - .handler = _handler, \ - .num_attrs = _UVERBS_METHOD_ATTRS_SZ(__VA_ARGS__), \ - .attrs = &(const struct uverbs_attr_def * const []){__VA_ARGS__} }) -#define DECLARE_UVERBS_METHOD(_name, _id, _handler, ...) \ - const struct uverbs_method_def _name = \ - _UVERBS_METHOD(_id, _handler, 0, ##__VA_ARGS__) -#define DECLARE_UVERBS_CTX_METHOD(_name, _id, _handler, _flags, ...) \ - const struct uverbs_method_def _name = \ - _UVERBS_METHOD(_id, _handler, \ - UVERBS_ACTION_FLAG_CREATE_ROOT, \ - ##__VA_ARGS__) -#define _UVERBS_OBJECT_METHODS_SZ(...) \ - (sizeof((const struct uverbs_method_def * const []){__VA_ARGS__}) / \ - sizeof(const struct uverbs_method_def *)) -#define _UVERBS_OBJECT(_id, _type_attrs, ...) \ - ((const struct uverbs_object_def) { \ - .id = _id, \ - .type_attrs = _type_attrs, \ - .num_methods = _UVERBS_OBJECT_METHODS_SZ(__VA_ARGS__), \ - .methods = &(const struct uverbs_method_def * const []){__VA_ARGS__} }) -#define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...) \ - const struct uverbs_object_def _name = \ - _UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__) -#define _UVERBS_TREE_OBJECTS_SZ(...) \ - (sizeof((const struct uverbs_object_def * const []){__VA_ARGS__}) / \ - sizeof(const struct uverbs_object_def *)) -#define _UVERBS_OBJECT_TREE(...) \ - ((const struct uverbs_object_tree_def) { \ - .num_objects = _UVERBS_TREE_OBJECTS_SZ(__VA_ARGS__), \ - .objects = &(const struct uverbs_object_def * const []){__VA_ARGS__} }) -#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \ - const struct uverbs_object_tree_def _name = \ - _UVERBS_OBJECT_TREE(__VA_ARGS__) /* ================================================= * Parsing infrastructure @@ -323,7 +308,14 @@ struct uverbs_object_tree_def { */ struct uverbs_ptr_attr { - u64 data; + /* + * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is + * used. + */ + union { + void *ptr; + u64 data; + }; u16 len; /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ u16 flags; @@ -331,11 +323,7 @@ struct uverbs_ptr_attr { }; struct uverbs_obj_attr { - /* pointer to the kernel descriptor -> type, access, etc */ - const struct uverbs_obj_type *type; struct ib_uobject *uobject; - /* fd or id in idr of this object */ - int id; }; struct uverbs_attr { @@ -431,6 +419,17 @@ static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_att return attr->obj_attr.uobject; } +static inline int +uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + return attr->ptr_attr.len; +} + static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, const void *from, size_t size) { @@ -457,6 +456,18 @@ static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); } +static inline void *uverbs_attr_get_alloced_ptr( + const struct uverbs_attr_bundle *attrs_bundle, u16 idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return (void *)attr; + + return uverbs_attr_ptr_is_inline(attr) ? (void *)&attr->ptr_attr.data : + attr->ptr_attr.ptr; +} + static inline int _uverbs_copy_from(void *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index c5bb4ebdb0b0..2eb1767042af 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -43,48 +43,84 @@ #define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y) #define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id) #define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id) +#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id) -#define DECLARE_UVERBS_NAMED_METHOD(id, ...) \ - DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__) +/* These are static so they do not need to be qualified */ +#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id +#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id -#define DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(id, handler, ...) \ - DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, handler, ##__VA_ARGS__) +#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .handler = UVERBS_HANDLER(_method_id), \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + } -#define DECLARE_UVERBS_NAMED_METHOD_NO_OVERRIDE(id, handler, ...) \ - DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, NULL, ##__VA_ARGS__) - -#define DECLARE_UVERBS_NAMED_OBJECT(id, ...) \ - DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__) - -#define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z) +/* Create a standard destroy method using the default handler. The handle_attr + * argument must be the attribute specifying the handle to destroy, the + * default handler does not support any other attributes. + */ +#define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { _handle_attr }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .handler = uverbs_destroy_def_handler, \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + } -#define UVERBS_NO_OVERRIDE NULL +#define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...) \ + static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ + _object_id)[] = { __VA_ARGS__ }; \ + const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + .id = _object_id, \ + .type_attrs = &_type_attrs, \ + .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ + .methods = &UVERBS_OBJECT_METHODS(_object_id) \ + } -/* This declares a parsing tree with one object and one method. This is usually - * used for merging driver attributes to the common attributes. The driver has - * a chance to override the handler and type attrs of the original object. - * The __VA_ARGS__ just contains a list of attributes. +/* + * Declare global methods. These still have a unique object_id because we + * identify all uapi methods with a (object,method) tuple. However, they have + * no type pointer. */ -#define ADD_UVERBS_ATTRIBUTES(_name, _object, _method, _type_attrs, _handler, ...) \ -static DECLARE_UVERBS_METHOD(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ - _method_, _name), \ - _method, _handler, ##__VA_ARGS__); \ - \ -static DECLARE_UVERBS_OBJECT(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ - _object_, _name), \ - _object, _type_attrs, \ - &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ - _method_, _name)); \ - \ -static DECLARE_UVERBS_OBJECT_TREE(_name, \ - &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ - _object_, _name)) +#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \ + static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ + _object_id)[] = { __VA_ARGS__ }; \ + const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + .id = _object_id, \ + .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ + .methods = &UVERBS_OBJECT_METHODS(_object_id) \ + } -/* A very common use case is that the driver doesn't override the handler and - * type_attrs. Therefore, we provide a simplified macro for this common case. +/* Used by drivers to declare a complete parsing tree for a single method that + * differs only in having additional driver specific attributes. */ -#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object, _method, ...) \ - ADD_UVERBS_ATTRIBUTES(_name, _object, _method, UVERBS_NO_OVERRIDE, \ - UVERBS_NO_OVERRIDE, ##__VA_ARGS__) +#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + }; \ + static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ + _object_id)[] = { &UVERBS_METHOD(_method_id) }; \ + static const struct uverbs_object_def _name##_struct = { \ + .id = _object_id, \ + .num_methods = 1, \ + .methods = &UVERBS_OBJECT_METHODS(_object_id) \ + }; \ + static const struct uverbs_object_def *const _name##_ptrs[] = { \ + &_name##_struct, \ + }; \ + static const struct uverbs_object_tree_def _name = { \ + .num_objects = 1, \ + .objects = &_name##_ptrs, \ + } #endif diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 9d56cdb84655..3e3f108f0912 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -37,8 +37,6 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/ib_user_ioctl_verbs.h> -#define UVERBS_OBJECT(id) uverbs_object_##id - #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) const struct uverbs_object_tree_def *uverbs_default_get_objects(void); #else @@ -50,28 +48,37 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, bool write, - struct ib_ucontext *ucontext, + struct ib_uverbs_file *ufile, int id) { - return rdma_lookup_get_uobject(type, ucontext, id, write); + return rdma_lookup_get_uobject(type, ufile, id, write); } #define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs -#define uobj_get_read(_type, _id, _ucontext) \ - __uobj_get(uobj_get_type(_type), false, _ucontext, _id) +#define uobj_get_read(_type, _id, _ufile) \ + __uobj_get(uobj_get_type(_type), false, _ufile, _id) + +static inline void *_uobj_get_obj_read(const struct uverbs_obj_type *type, + int id, struct ib_uverbs_file *ufile) +{ + struct ib_uobject *uobj = __uobj_get(type, false, ufile, id); + + if (IS_ERR(uobj)) + return NULL; + return uobj->object; +} +#define uobj_get_obj_read(_object, _type, _id, _ufile) \ + ((struct ib_##_object *)_uobj_get_obj_read(uobj_get_type(_type), _id, \ + _ufile)) -#define uobj_get_obj_read(_object, _type, _id, _ucontext) \ -({ \ - struct ib_uobject *__uobj = \ - __uobj_get(uobj_get_type(_type), \ - false, _ucontext, _id); \ - \ - (struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\ -}) +#define uobj_get_write(_type, _id, _ufile) \ + __uobj_get(uobj_get_type(_type), true, _ufile, _id) -#define uobj_get_write(_type, _id, _ucontext) \ - __uobj_get(uobj_get_type(_type), true, _ucontext, _id) +int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, + struct ib_uverbs_file *ufile, int success_res); +#define uobj_perform_destroy(_type, _id, _ufile, _success_res) \ + __uobj_perform_destroy(uobj_get_type(_type), _id, _ufile, _success_res) static inline void uobj_put_read(struct ib_uobject *uobj) { @@ -102,13 +109,12 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj) } static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) + struct ib_uverbs_file *ufile) { - return rdma_alloc_begin_uobject(type, ucontext); + return rdma_alloc_begin_uobject(type, ufile); } -#define uobj_alloc(_type, ucontext) \ - __uobj_alloc(uobj_get_type(_type), ucontext) +#define uobj_alloc(_type, _ufile) __uobj_alloc(uobj_get_type(_type), _ufile) #endif diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index cc04ec65588d..e2fc9db466d3 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -72,12 +72,12 @@ struct uverbs_obj_type_class { * reset flow). */ struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext); + struct ib_uverbs_file *ufile); void (*alloc_commit)(struct ib_uobject *uobj); void (*alloc_abort)(struct ib_uobject *uobj); struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, int id, + struct ib_uverbs_file *ufile, int id, bool exclusive); void (*lookup_put)(struct ib_uobject *uobj, bool exclusive); /* @@ -93,7 +93,6 @@ struct uverbs_obj_type_class { struct uverbs_obj_type { const struct uverbs_obj_type_class * const type_class; size_t obj_size; - unsigned int destroy_order; }; /* @@ -121,11 +120,11 @@ struct uverbs_obj_idr_type { }; struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, + struct ib_uverbs_file *ufile, int id, bool exclusive); void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext); + struct ib_uverbs_file *ufile); void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); int rdma_alloc_commit_uobject(struct ib_uobject *uobj); @@ -140,7 +139,7 @@ struct uverbs_obj_fd_type { * the driver is removed or the process terminated. */ struct uverbs_obj_type type; - int (*context_closed)(struct ib_uobject_file *uobj_file, + int (*context_closed)(struct ib_uobject *uobj, enum rdma_remove_reason why); const struct file_operations *fops; const char *name; @@ -152,30 +151,29 @@ extern const struct uverbs_obj_type_class uverbs_fd_class; #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) -#define UVERBS_TYPE_ALLOC_FD(_order, _obj_size, _context_closed, _fops, _name, _flags)\ +#define UVERBS_TYPE_ALLOC_FD(_obj_size, _context_closed, _fops, _name, _flags)\ ((&((const struct uverbs_obj_fd_type) \ {.type = { \ - .destroy_order = _order, \ .type_class = &uverbs_fd_class, \ .obj_size = (_obj_size) + \ - UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \ + UVERBS_BUILD_BUG_ON((_obj_size) < \ + sizeof(struct ib_uobject)), \ }, \ .context_closed = _context_closed, \ .fops = _fops, \ .name = _name, \ .flags = _flags}))->type) -#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order, _destroy_object) \ +#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object) \ ((&((const struct uverbs_obj_idr_type) \ {.type = { \ - .destroy_order = _order, \ .type_class = &uverbs_idr_class, \ .obj_size = (_size) + \ UVERBS_BUILD_BUG_ON((_size) < \ sizeof(struct ib_uobject)) \ }, \ .destroy_object = _destroy_object,}))->type) -#define UVERBS_TYPE_ALLOC_IDR(_order, _destroy_object) \ - UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order, \ +#define UVERBS_TYPE_ALLOC_IDR(_destroy_object) \ + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), \ _destroy_object) #endif diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h index a159ba8dcf8f..65c9eacd3ffb 100644 --- a/include/uapi/rdma/cxgb4-abi.h +++ b/include/uapi/rdma/cxgb4-abi.h @@ -44,6 +44,16 @@ * In particular do not use pointer types -- pass pointers in __aligned_u64 * instead. */ + +enum { + C4IW_64B_CQE = (1 << 0) +}; + +struct c4iw_create_cq { + __u32 flags; + __u32 reserved; +}; + struct c4iw_create_cq_resp { __aligned_u64 key; __aligned_u64 gts_key; @@ -51,7 +61,7 @@ struct c4iw_create_cq_resp { __u32 cqid; __u32 size; __u32 qid_mask; - __u32 reserved; /* explicit padding (optional for i386) */ + __u32 flags; }; enum { diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 888ac5975a6c..2c881aaf05c2 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -79,7 +79,7 @@ enum uverbs_attrs_destroy_cq_cmd_attr_ids { }; enum uverbs_attrs_create_flow_action_esp { - UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, + UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, @@ -87,6 +87,11 @@ enum uverbs_attrs_create_flow_action_esp { UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, }; +enum uverbs_attrs_modify_flow_action_esp { + UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE = + UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, +}; + enum uverbs_attrs_destroy_flow_action_esp { UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, }; diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 625545d862d7..6cdf192070a2 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -40,6 +40,59 @@ #define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name #endif +enum ib_uverbs_access_flags { + IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0, + IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1, + IB_UVERBS_ACCESS_REMOTE_READ = 1 << 2, + IB_UVERBS_ACCESS_REMOTE_ATOMIC = 1 << 3, + IB_UVERBS_ACCESS_MW_BIND = 1 << 4, + IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5, + IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6, + IB_UVERBS_ACCESS_HUGETLB = 1 << 7, +}; + +enum ib_uverbs_query_port_cap_flags { + IB_UVERBS_PCF_SM = 1 << 1, + IB_UVERBS_PCF_NOTICE_SUP = 1 << 2, + IB_UVERBS_PCF_TRAP_SUP = 1 << 3, + IB_UVERBS_PCF_OPT_IPD_SUP = 1 << 4, + IB_UVERBS_PCF_AUTO_MIGR_SUP = 1 << 5, + IB_UVERBS_PCF_SL_MAP_SUP = 1 << 6, + IB_UVERBS_PCF_MKEY_NVRAM = 1 << 7, + IB_UVERBS_PCF_PKEY_NVRAM = 1 << 8, + IB_UVERBS_PCF_LED_INFO_SUP = 1 << 9, + IB_UVERBS_PCF_SM_DISABLED = 1 << 10, + IB_UVERBS_PCF_SYS_IMAGE_GUID_SUP = 1 << 11, + IB_UVERBS_PCF_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_UVERBS_PCF_EXTENDED_SPEEDS_SUP = 1 << 14, + IB_UVERBS_PCF_CM_SUP = 1 << 16, + IB_UVERBS_PCF_SNMP_TUNNEL_SUP = 1 << 17, + IB_UVERBS_PCF_REINIT_SUP = 1 << 18, + IB_UVERBS_PCF_DEVICE_MGMT_SUP = 1 << 19, + IB_UVERBS_PCF_VENDOR_CLASS_SUP = 1 << 20, + IB_UVERBS_PCF_DR_NOTICE_SUP = 1 << 21, + IB_UVERBS_PCF_CAP_MASK_NOTICE_SUP = 1 << 22, + IB_UVERBS_PCF_BOOT_MGMT_SUP = 1 << 23, + IB_UVERBS_PCF_LINK_LATENCY_SUP = 1 << 24, + IB_UVERBS_PCF_CLIENT_REG_SUP = 1 << 25, + /* + * IsOtherLocalChangesNoticeSupported is aliased by IP_BASED_GIDS and + * is inaccessible + */ + IB_UVERBS_PCF_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27, + IB_UVERBS_PCF_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28, + IB_UVERBS_PCF_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29, + IB_UVERBS_PCF_MCAST_FDB_TOP_SUP = 1 << 30, + IB_UVERBS_PCF_HIERARCHY_INFO_SUP = 1ULL << 31, + + /* NOTE this is an internal flag, not an IBA flag */ + IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26, +}; + +enum ib_uverbs_query_port_flags { + IB_UVERBS_QPF_GRH_REQUIRED = 1 << 0, +}; + enum ib_uverbs_flow_action_esp_keymat { IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM, }; @@ -99,4 +152,9 @@ struct ib_uverbs_flow_action_esp { __aligned_u64 hard_limit_pkts; }; +enum ib_uverbs_read_counters_flags { + /* prefer read values from driver cache */ + IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0, +}; + #endif diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 4f9991de8e3a..25a16760de2a 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -279,7 +279,7 @@ struct ib_uverbs_query_port { }; struct ib_uverbs_query_port_resp { - __u32 port_cap_flags; + __u32 port_cap_flags; /* see ib_uverbs_query_port_cap_flags */ __u32 max_msg_sz; __u32 bad_pkey_cntr; __u32 qkey_viol_cntr; @@ -299,7 +299,8 @@ struct ib_uverbs_query_port_resp { __u8 active_speed; __u8 phys_state; __u8 link_layer; - __u8 reserved[2]; + __u8 flags; /* see ib_uverbs_query_port_flags */ + __u8 reserved; }; struct ib_uverbs_alloc_pd { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 8daec1fa49cf..addbb9c4529e 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -76,6 +76,9 @@ enum mlx5_lib_caps { MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0, }; +enum mlx5_ib_alloc_uctx_v2_flags { + MLX5_IB_ALLOC_UCTX_DEVX = 1 << 0, +}; struct mlx5_ib_alloc_ucontext_req_v2 { __u32 total_num_bfregs; __u32 num_low_latency_bfregs; @@ -90,6 +93,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 { enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY = 1UL << 1, }; enum mlx5_user_cmds_supp_uhw { @@ -138,7 +142,7 @@ struct mlx5_ib_alloc_ucontext_resp { __u32 log_uar_size; __u32 num_uars_per_page; __u32 num_dyn_bfregs; - __u32 reserved3; + __u32 dump_fill_mkey; }; struct mlx5_ib_alloc_pd_resp { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index f7d685ef2d1f..1a05bb4b0b34 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -45,4 +45,77 @@ enum mlx5_ib_alloc_dm_attrs { MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, }; +enum mlx5_ib_devx_methods { + MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DEVX_QUERY_UAR, + MLX5_IB_METHOD_DEVX_QUERY_EQN, +}; + +enum mlx5_ib_devx_other_attrs { + MLX5_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, +}; + +enum mlx5_ib_devx_obj_create_attrs { + MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, +}; + +enum mlx5_ib_devx_query_uar_attrs { + MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, +}; + +enum mlx5_ib_devx_obj_destroy_attrs { + MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_obj_modify_attrs { + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, +}; + +enum mlx5_ib_devx_obj_query_attrs { + MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, +}; + +enum mlx5_ib_devx_query_eqn_attrs { + MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, +}; + +enum mlx5_ib_devx_obj_methods { + MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DEVX_OBJ_DESTROY, + MLX5_IB_METHOD_DEVX_OBJ_MODIFY, + MLX5_IB_METHOD_DEVX_OBJ_QUERY, +}; + +enum mlx5_ib_devx_umem_reg_attrs { + MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, + MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, +}; + +enum mlx5_ib_devx_umem_dereg_attrs { + MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_umem_methods { + MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DEVX_UMEM_DEREG, +}; + +enum mlx5_ib_devx_objects { + MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_OBJECT_DEVX_OBJ, + MLX5_IB_OBJECT_DEVX_UMEM, +}; + #endif diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 7232274de334..af6ad467ed61 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -140,6 +140,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, &net_secret); return seq_scale(hash); } +EXPORT_SYMBOL_GPL(secure_tcp_seq); u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { diff --git a/net/rds/ib.c b/net/rds/ib.c index b6ad38e48f62..683b55d4e2b0 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -143,7 +143,7 @@ static void rds_ib_add_one(struct ib_device *device) INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); rds_ibdev->max_wrs = device->attrs.max_qp_wr; - rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); + rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); has_fr = (device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index add82b0266f3..d99a75f75e42 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -16,6 +16,7 @@ #include <net/tcp.h> #include <net/sock.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_cache.h> #include "smc.h" #include "smc_clc.h" @@ -450,8 +451,7 @@ out: static int smc_link_determine_gid(struct smc_link_group *lgr) { struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - struct ib_gid_attr gattr; - union ib_gid gid; + const struct ib_gid_attr *gattr; int i; if (!lgr->vlan_id) { @@ -461,18 +461,18 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; i++) { - if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, - &gattr)) + gattr = rdma_get_gid_attr(lnk->smcibdev->ibdev, lnk->ibport, i); + if (IS_ERR(gattr)) continue; - if (gattr.ndev) { - if (is_vlan_dev(gattr.ndev) && - vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { - lnk->gid = gid; - dev_put(gattr.ndev); + if (gattr->ndev) { + if (is_vlan_dev(gattr->ndev) && + vlan_dev_vlan_id(gattr->ndev) == lgr->vlan_id) { + lnk->gid = gattr->gid; + rdma_put_gid_attr(gattr); return 0; } - dev_put(gattr.ndev); } + rdma_put_gid_attr(gattr); } return -ENODEV; } diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 0eed7ab9f28b..74f29f814ec1 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -16,6 +16,7 @@ #include <linux/workqueue.h> #include <linux/scatterlist.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_cache.h> #include "smc_pnet.h" #include "smc_ib.h" @@ -372,17 +373,21 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) { - struct ib_gid_attr gattr; - int rc; - - rc = ib_query_gid(smcibdev->ibdev, ibport, 0, - &smcibdev->gid[ibport - 1], &gattr); - if (rc || !gattr.ndev) - return -ENODEV; + const struct ib_gid_attr *gattr; + int rc = 0; - memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); - dev_put(gattr.ndev); - return 0; + gattr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); + if (IS_ERR(gattr)) + return PTR_ERR(gattr); + if (!gattr->ndev) { + rc = -ENODEV; + goto done; + } + smcibdev->gid[ibport - 1] = gattr->gid; + memcpy(smcibdev->mac[ibport - 1], gattr->ndev->dev_addr, ETH_ALEN); +done: + rdma_put_gid_attr(gattr); + return rc; } /* Create an identifier unique for this instance of SMC-R. diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e9535a66bab0..547b2cdf1427 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -476,7 +476,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Qualify the transport resource defaults with the * capabilities of this particular device */ - newxprt->sc_max_send_sges = dev->attrs.max_sge; + newxprt->sc_max_send_sges = dev->attrs.max_send_sge; /* transport hdr, head iovec, one page list entry, tail iovec */ if (newxprt->sc_max_send_sges < 4) { pr_err("svcrdma: too few Send SGEs available (%d)\n", diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 16161a36dc73..112a15abc4a4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -508,7 +508,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, unsigned int max_sge; int rc; - max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, + max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge, RPCRDMA_MAX_SEND_SGES); if (max_sge < RPCRDMA_MIN_SEND_SGES) { pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); |